aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--MAINTAINERS5
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/include/asm/futex.h29
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/alpha/kernel/sys_titan.c1
-rw-r--r--arch/arm/include/asm/futex.h29
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/frv/include/asm/futex.h5
-rw-r--r--arch/frv/kernel/futex.c14
-rw-r--r--arch/ia64/include/asm/futex.h15
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/include/asm/xen/hypercall.h2
-rw-r--r--arch/ia64/xen/suspend.c9
-rw-r--r--arch/microblaze/include/asm/futex.h31
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/alchemy/mtx-1/board_setup.c4
-rw-r--r--arch/mips/alchemy/mtx-1/platform.c9
-rw-r--r--arch/mips/alchemy/xxs1500/board_setup.c4
-rw-r--r--arch/mips/include/asm/futex.h39
-rw-r--r--arch/mips/include/asm/perf_event.h12
-rw-r--r--arch/mips/kernel/ftrace.c179
-rw-r--r--arch/mips/kernel/perf_event.c345
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mips/kernel/smp.c31
-rw-r--r--arch/mips/kernel/syscall.c5
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/loongson/Kconfig5
-rw-r--r--arch/mips/loongson/common/cmdline.c5
-rw-r--r--arch/mips/loongson/common/machtype.c3
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/ops-pmcmsp.c4
-rw-r--r--arch/mips/pmc-sierra/Kconfig4
-rw-r--r--arch/mips/pmc-sierra/msp71xx/msp_time.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h2
-rw-r--r--arch/mn10300/include/asm/uaccess.h5
-rw-r--r--arch/mn10300/mm/cache-inv-icache.c4
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/parisc/include/asm/futex.h24
-rw-r--r--arch/powerpc/include/asm/futex.h27
-rw-r--r--arch/powerpc/include/asm/lppaca.h16
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/powerpc/platforms/iseries/dt.c6
-rw-r--r--arch/powerpc/platforms/iseries/setup.c1
-rw-r--r--arch/s390/include/asm/futex.h12
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/lib/uaccess.h8
-rw-r--r--arch/s390/lib/uaccess_pt.c17
-rw-r--r--arch/s390/lib/uaccess_std.c8
-rw-r--r--arch/sh/include/asm/futex-irq.h24
-rw-r--r--arch/sh/include/asm/futex.h11
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sparc/include/asm/futex_64.h20
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/tile/include/asm/futex.h27
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/futex.h22
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/unistd_32.h4
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/numa_64.c6
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/pci/ce4100.c7
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c82
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--block/blk-lib.c19
-rw-r--r--drivers/block/xen-blkfront.c87
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c8
-rw-r--r--drivers/gpio/ml_ioh_gpio.c1
-rw-r--r--drivers/gpio/pch_gpio.c1
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c36
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/r100.c22
-rw-r--r--drivers/gpu/drm/radeon/r600.c3
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c14
-rw-r--r--drivers/gpu/drm/radeon/rs600.c1
-rw-r--r--drivers/gpu/drm/radeon/rs690.c1
-rw-r--r--drivers/gpu/drm/radeon/rv770.c3
-rw-r--r--drivers/hwmon/f71882fg.c4
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c1
-rw-r--r--drivers/i2c/busses/i2c-ocores.c2
-rw-r--r--drivers/i2c/busses/i2c-omap.c4
-rw-r--r--drivers/media/common/tuners/tda8290.c14
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c21
-rw-r--r--drivers/media/dvb/dvb-usb/lmedm04.c6
-rw-r--r--drivers/media/dvb/frontends/dib7000m.c19
-rw-r--r--drivers/media/dvb/frontends/dib7000m.h15
-rw-r--r--drivers/media/dvb/mantis/mantis_pci.c1
-rw-r--r--drivers/media/rc/ir-raw.c3
-rw-r--r--drivers/media/rc/mceusb.c27
-rw-r--r--drivers/media/rc/nuvoton-cir.c5
-rw-r--r--drivers/media/rc/nuvoton-cir.h7
-rw-r--r--drivers/media/rc/rc-main.c2
-rw-r--r--drivers/media/video/au0828/au0828-video.c28
-rw-r--r--drivers/media/video/cx18/cx18-cards.c50
-rw-r--r--drivers/media/video/cx18/cx18-driver.c25
-rw-r--r--drivers/media/video/cx18/cx18-driver.h3
-rw-r--r--drivers/media/video/cx18/cx18-dvb.c38
-rw-r--r--drivers/media/video/cx23885/cx23885-i2c.c10
-rw-r--r--drivers/media/video/cx25840/cx25840-core.c3
-rw-r--r--drivers/media/video/ivtv/ivtv-irq.c58
-rw-r--r--drivers/media/video/mem2mem_testdev.c1
-rw-r--r--drivers/media/video/s2255drv.c10
-rw-r--r--drivers/mmc/core/core.c2
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c43
-rw-r--r--drivers/mtd/chips/jedec_probe.c35
-rw-r--r--drivers/mtd/maps/amd76xrom.c1
-rw-r--r--drivers/mtd/mtd_blkdevs.c1
-rw-r--r--drivers/mtd/nand/omap2.c2
-rw-r--r--drivers/mtd/onenand/generic.c2
-rw-r--r--drivers/mtd/onenand/omap2.c2
-rw-r--r--drivers/net/ariadne.c5
-rw-r--r--drivers/net/bnx2x/bnx2x.h5
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.c22
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.h9
-rw-r--r--drivers/net/bnx2x/bnx2x_ethtool.c18
-rw-r--r--drivers/net/bnx2x/bnx2x_main.c19
-rw-r--r--drivers/net/bonding/bond_3ad.c32
-rw-r--r--drivers/net/bonding/bond_3ad.h3
-rw-r--r--drivers/net/macvtap.c3
-rw-r--r--drivers/net/r6040.c115
-rw-r--r--drivers/net/smsc911x.c5
-rw-r--r--drivers/pci/xen-pcifront.c31
-rw-r--r--drivers/target/target_core_tmr.c5
-rw-r--r--drivers/target/target_core_transport.c8
-rw-r--r--drivers/watchdog/cpwd.c2
-rw-r--r--drivers/watchdog/hpwdt.c4
-rw-r--r--drivers/watchdog/sbc_fitpc2_wdt.c7
-rw-r--r--drivers/watchdog/sch311x_wdt.c2
-rw-r--r--drivers/watchdog/w83697ug_wdt.c2
-rw-r--r--drivers/xen/balloon.c16
-rw-r--r--drivers/xen/events.c342
-rw-r--r--drivers/xen/manage.c153
-rw-r--r--drivers/xen/platform-pci.c3
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/internal.h13
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/namei.c1490
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c91
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/open.c134
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--include/asm-generic/fcntl.h4
-rw-r--r--include/asm-generic/futex.h7
-rw-r--r--include/asm-generic/unistd.h6
-rw-r--r--include/linux/debugobjects.h5
-rw-r--r--include/linux/exportfs.h9
-rw-r--r--include/linux/fcntl.h1
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/fs.h19
-rw-r--r--include/linux/interrupt.h3
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/namei.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/nfs_fs_sb.h10
-rw-r--r--include/linux/plist.h47
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--include/linux/sunrpc/sched.h1
-rw-r--r--include/linux/syscalls.h8
-rw-r--r--include/linux/sysctl.h14
-rw-r--r--include/target/target_core_transport.h2
-rw-r--r--include/xen/events.h8
-rw-r--r--include/xen/interface/io/blkif.h37
-rw-r--r--include/xen/interface/xen.h4
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/audit_watch.c85
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/futex.c147
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/manage.c11
-rw-r--r--kernel/irq/pm.c3
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c40
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched.c1
-rw-r--r--kernel/sched_rt.c14
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c15
-rw-r--r--kernel/sysctl_binary.c19
-rw-r--r--kernel/timer.c29
-rw-r--r--kernel/workqueue.c6
-rw-r--r--lib/debugobjects.c9
-rw-r--r--lib/plist.c135
-rw-r--r--lib/rwsem.c10
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/rmap.c54
-rw-r--r--mm/shmem.c4
-rw-r--r--net/Makefile4
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/scm.c2
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/sunrpc/sched.c75
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c2
-rw-r--r--scripts/basic/fixdep.c19
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--scripts/mod/sumversion.c19
-rw-r--r--scripts/rt-tester/rt-tester.py2
-rw-r--r--scripts/rt-tester/t2-l1-2rt-sameprio.tst5
-rw-r--r--scripts/rt-tester/t2-l1-pi.tst5
-rw-r--r--scripts/rt-tester/t2-l1-signal.tst5
-rw-r--r--scripts/rt-tester/t2-l2-2rt-deadlock.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-1rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-2rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-3rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-signal.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-steal.tst5
-rw-r--r--scripts/rt-tester/t3-l2-pi.tst5
-rw-r--r--scripts/rt-tester/t4-l2-pi-deboost.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost.tst5
-rw-r--r--sound/soc/codecs/wm8978.c14
-rw-r--r--sound/soc/codecs/wm8994.c10
-rw-r--r--sound/soc/omap/am3517evm.c2
-rw-r--r--sound/soc/soc-dapm.c2
-rw-r--r--tools/perf/util/header.c11
326 files changed, 4802 insertions, 3765 deletions
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907d..2e3c64b1a6a5 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
86 86
87The routines look the same as above: 87The routines look the same as above:
88 88
89 rwlock_t xxx_lock = RW_LOCK_UNLOCKED; 89 rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
90 90
91 unsigned long flags; 91 unsigned long flags;
92 92
@@ -196,25 +196,3 @@ appropriate:
196 196
197For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or 197For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
198__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. 198__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
199
200SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
201with lockdep state tracking.
202
203Most of the time, you can simply turn:
204 static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
205into:
206 static DEFINE_SPINLOCK(xxx_lock);
207
208Static structure member variables go from:
209
210 struct foo bar {
211 .lock = SPIN_LOCK_UNLOCKED;
212 };
213
214to:
215
216 struct foo bar {
217 .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
218 };
219
220Declaration of static rw_locks undergo a similar transformation.
diff --git a/MAINTAINERS b/MAINTAINERS
index 560ecce38ff5..f1bc3dc6b369 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4292,10 +4292,7 @@ S: Maintained
4292F: net/sched/sch_netem.c 4292F: net/sched/sch_netem.c
4293 4293
4294NETERION 10GbE DRIVERS (s2io/vxge) 4294NETERION 10GbE DRIVERS (s2io/vxge)
4295M: Ramkrishna Vepa <ramkrishna.vepa@exar.com> 4295M: Jon Mason <jdmason@kudzu.us>
4296M: Sivakumar Subramani <sivakumar.subramani@exar.com>
4297M: Sreenivasa Honnur <sreenivasa.honnur@exar.com>
4298M: Jon Mason <jon.mason@exar.com>
4299L: netdev@vger.kernel.org 4296L: netdev@vger.kernel.org
4300W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous 4297W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
4301W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous 4298W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
diff --git a/Makefile b/Makefile
index 504f788773e5..d6592b63c8cb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 38 3SUBLEVEL = 38
4EXTRAVERSION = -rc8 4EXTRAVERSION =
5NAME = Flesh-Eating Bats with Fangs 5NAME = Flesh-Eating Bats with Fangs
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index 945de222ab91..e8a761aee088 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -29,7 +29,7 @@
29 : "r" (uaddr), "r"(oparg) \ 29 : "r" (uaddr), "r"(oparg) \
30 : "memory") 30 : "memory")
31 31
32static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 32static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7; 34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15; 35 int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg; 40 oparg = 1 << oparg;
41 41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT; 43 return -EFAULT;
44 44
45 pagefault_disable(); 45 pagefault_disable();
@@ -81,21 +81,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
81} 81}
82 82
83static inline int 83static inline int
84futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 84futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
85 u32 oldval, u32 newval)
85{ 86{
86 int prev, cmp; 87 int ret = 0, cmp;
88 u32 prev;
87 89
88 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 90 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
89 return -EFAULT; 91 return -EFAULT;
90 92
91 __asm__ __volatile__ ( 93 __asm__ __volatile__ (
92 __ASM_SMP_MB 94 __ASM_SMP_MB
93 "1: ldl_l %0,0(%2)\n" 95 "1: ldl_l %1,0(%3)\n"
94 " cmpeq %0,%3,%1\n" 96 " cmpeq %1,%4,%2\n"
95 " beq %1,3f\n" 97 " beq %2,3f\n"
96 " mov %4,%1\n" 98 " mov %5,%2\n"
97 "2: stl_c %1,0(%2)\n" 99 "2: stl_c %2,0(%3)\n"
98 " beq %1,4f\n" 100 " beq %2,4f\n"
99 "3: .subsection 2\n" 101 "3: .subsection 2\n"
100 "4: br 1b\n" 102 "4: br 1b\n"
101 " .previous\n" 103 " .previous\n"
@@ -105,11 +107,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
105 " .long 2b-.\n" 107 " .long 2b-.\n"
106 " lda $31,3b-2b(%0)\n" 108 " lda $31,3b-2b(%0)\n"
107 " .previous\n" 109 " .previous\n"
108 : "=&r"(prev), "=&r"(cmp) 110 : "+r"(ret), "=&r"(prev), "=&r"(cmp)
109 : "r"(uaddr), "r"((long)oldval), "r"(newval) 111 : "r"(uaddr), "r"((long)oldval), "r"(newval)
110 : "memory"); 112 : "memory");
111 113
112 return prev; 114 *uval = prev;
115 return ret;
113} 116}
114 117
115#endif /* __KERNEL__ */ 118#endif /* __KERNEL__ */
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14 14
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18 16
19struct rwsem_waiter;
20
21extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
22extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
23extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
24extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
25
26/*
27 * the semaphore definition
28 */
29struct rw_semaphore {
30 long count;
31#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L 17#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
32#define RWSEM_ACTIVE_BIAS 0x0000000000000001L 18#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
33#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL 19#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
34#define RWSEM_WAITING_BIAS (-0x0000000100000000L) 20#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
35#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 21#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 22#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
37 spinlock_t wait_lock;
38 struct list_head wait_list;
39};
40
41#define __RWSEM_INITIALIZER(name) \
42 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
43 LIST_HEAD_INIT((name).wait_list) }
44
45#define DECLARE_RWSEM(name) \
46 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
47
48static inline void init_rwsem(struct rw_semaphore *sem)
49{
50 sem->count = RWSEM_UNLOCKED_VALUE;
51 spin_lock_init(&sem->wait_lock);
52 INIT_LIST_HEAD(&sem->wait_list);
53}
54 23
55static inline void __down_read(struct rw_semaphore *sem) 24static inline void __down_read(struct rw_semaphore *sem)
56{ 25{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
250#endif 219#endif
251} 220}
252 221
253static inline int rwsem_is_locked(struct rw_semaphore *sem)
254{
255 return (sem->count != 0);
256}
257
258#endif /* __KERNEL__ */ 222#endif /* __KERNEL__ */
259#endif /* _ALPHA_RWSEM_H */ 223#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5045e9..376f22130791 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; 230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
231} 231}
232 232
233static int 233SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
234do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, 234 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
235 unsigned long bufsiz)
236{ 235{
237 struct kstatfs linux_stat; 236 struct kstatfs linux_stat;
238 int error = vfs_statfs(path, &linux_stat); 237 int error = user_statfs(pathname, &linux_stat);
239 if (!error) 238 if (!error)
240 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); 239 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
241 return error; 240 return error;
242} 241}
243 242
244SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
245 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
246{
247 struct path path;
248 int retval;
249
250 retval = user_path(pathname, &path);
251 if (!retval) {
252 retval = do_osf_statfs(&path, buffer, bufsiz);
253 path_put(&path);
254 }
255 return retval;
256}
257
258SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, 243SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
259 struct osf_statfs __user *, buffer, unsigned long, bufsiz) 244 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
260{ 245{
261 struct file *file; 246 struct kstatfs linux_stat;
262 int retval; 247 int error = fd_statfs(fd, &linux_stat);
263 248 if (!error)
264 retval = -EBADF; 249 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
265 file = fget(fd); 250 return error;
266 if (file) {
267 retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
268 fput(file);
269 }
270 return retval;
271} 251}
272 252
273/* 253/*
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index f6c108a3d673..8c13a0c77830 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -149,6 +149,7 @@ static int
149titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, 149titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
150 bool force) 150 bool force)
151{ 151{
152 unsigned int irq = d->irq;
152 spin_lock(&titan_irq_lock); 153 spin_lock(&titan_irq_lock);
153 titan_cpu_set_irq_affinity(irq - 16, *affinity); 154 titan_cpu_set_irq_affinity(irq - 16, *affinity);
154 titan_update_irq_hw(titan_cached_irq_mask); 155 titan_update_irq_hw(titan_cached_irq_mask);
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index b33fe7065b38..199a6b6de7f4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -35,7 +35,7 @@
35 : "cc", "memory") 35 : "cc", "memory")
36 36
37static inline int 37static inline int
38futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 38futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
39{ 39{
40 int op = (encoded_op >> 28) & 7; 40 int op = (encoded_op >> 28) & 7;
41 int cmp = (encoded_op >> 24) & 15; 41 int cmp = (encoded_op >> 24) & 15;
@@ -46,7 +46,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
46 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 46 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
47 oparg = 1 << oparg; 47 oparg = 1 << oparg;
48 48
49 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 49 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
50 return -EFAULT; 50 return -EFAULT;
51 51
52 pagefault_disable(); /* implies preempt_disable() */ 52 pagefault_disable(); /* implies preempt_disable() */
@@ -88,36 +88,35 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
88} 88}
89 89
90static inline int 90static inline int
91futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 91futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
92 u32 oldval, u32 newval)
92{ 93{
93 int val; 94 int ret = 0;
95 u32 val;
94 96
95 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 97 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
96 return -EFAULT; 98 return -EFAULT;
97 99
98 pagefault_disable(); /* implies preempt_disable() */
99
100 __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" 100 __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
101 "1: " T(ldr) " %0, [%3]\n" 101 "1: " T(ldr) " %1, [%4]\n"
102 " teq %0, %1\n" 102 " teq %1, %2\n"
103 " it eq @ explicit IT needed for the 2b label\n" 103 " it eq @ explicit IT needed for the 2b label\n"
104 "2: " T(streq) " %2, [%3]\n" 104 "2: " T(streq) " %3, [%4]\n"
105 "3:\n" 105 "3:\n"
106 " .pushsection __ex_table,\"a\"\n" 106 " .pushsection __ex_table,\"a\"\n"
107 " .align 3\n" 107 " .align 3\n"
108 " .long 1b, 4f, 2b, 4f\n" 108 " .long 1b, 4f, 2b, 4f\n"
109 " .popsection\n" 109 " .popsection\n"
110 " .pushsection .fixup,\"ax\"\n" 110 " .pushsection .fixup,\"ax\"\n"
111 "4: mov %0, %4\n" 111 "4: mov %0, %5\n"
112 " b 3b\n" 112 " b 3b\n"
113 " .popsection" 113 " .popsection"
114 : "=&r" (val) 114 : "+r" (ret), "=&r" (val)
115 : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) 115 : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
116 : "cc", "memory"); 116 : "cc", "memory");
117 117
118 pagefault_enable(); /* subsumes preempt_enable() */ 118 *uval = val;
119 119 return ret;
120 return val;
121} 120}
122 121
123#endif /* !SMP */ 122#endif /* !SMP */
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 84fed3b4b079..4c9e3e1ba5d1 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -26,7 +26,9 @@
26#define FLUSH_ALL (void*)0xffffffff 26#define FLUSH_ALL (void*)0xffffffff
27 27
28/* Vector of locks used for various atomic operations */ 28/* Vector of locks used for various atomic operations */
29spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; 29spinlock_t cris_atomic_locks[] = {
30 [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
31};
30 32
31/* CPU masks */ 33/* CPU masks */
32cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 34cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 08b3d1da3583..4bea27f50a7a 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,10 +7,11 @@
7#include <asm/errno.h> 7#include <asm/errno.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr); 10extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
11 11
12static inline int 12static inline int
13futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 13futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
14 u32 oldval, u32 newval)
14{ 15{
15 return -ENOSYS; 16 return -ENOSYS;
16} 17}
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index 14f64b054c7e..d155ca9e5098 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -18,7 +18,7 @@
18 * the various futex operations; MMU fault checking is ignored under no-MMU 18 * the various futex operations; MMU fault checking is ignored under no-MMU
19 * conditions 19 * conditions
20 */ 20 */
21static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_oldval) 21static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *_oldval)
22{ 22{
23 int oldval, ret; 23 int oldval, ret;
24 24
@@ -50,7 +50,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_o
50 return ret; 50 return ret;
51} 51}
52 52
53static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_oldval) 53static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *_oldval)
54{ 54{
55 int oldval, ret; 55 int oldval, ret;
56 56
@@ -83,7 +83,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_o
83 return ret; 83 return ret;
84} 84}
85 85
86static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_oldval) 86static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *_oldval)
87{ 87{
88 int oldval, ret; 88 int oldval, ret;
89 89
@@ -116,7 +116,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_ol
116 return ret; 116 return ret;
117} 117}
118 118
119static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_oldval) 119static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *_oldval)
120{ 120{
121 int oldval, ret; 121 int oldval, ret;
122 122
@@ -149,7 +149,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_o
149 return ret; 149 return ret;
150} 150}
151 151
152static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_oldval) 152static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_oldval)
153{ 153{
154 int oldval, ret; 154 int oldval, ret;
155 155
@@ -186,7 +186,7 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_o
186/* 186/*
187 * do the futex operations 187 * do the futex operations
188 */ 188 */
189int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 189int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
190{ 190{
191 int op = (encoded_op >> 28) & 7; 191 int op = (encoded_op >> 28) & 7;
192 int cmp = (encoded_op >> 24) & 15; 192 int cmp = (encoded_op >> 24) & 15;
@@ -197,7 +197,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
198 oparg = 1 << oparg; 198 oparg = 1 << oparg;
199 199
200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
201 return -EFAULT; 201 return -EFAULT;
202 202
203 pagefault_disable(); 203 pagefault_disable();
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index c7f0f062239c..8428525ddb22 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -46,7 +46,7 @@ do { \
46} while (0) 46} while (0)
47 47
48static inline int 48static inline int
49futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 49futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
50{ 50{
51 int op = (encoded_op >> 28) & 7; 51 int op = (encoded_op >> 28) & 7;
52 int cmp = (encoded_op >> 24) & 15; 52 int cmp = (encoded_op >> 24) & 15;
@@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
56 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 56 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
57 oparg = 1 << oparg; 57 oparg = 1 << oparg;
58 58
59 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 59 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
60 return -EFAULT; 60 return -EFAULT;
61 61
62 pagefault_disable(); 62 pagefault_disable();
@@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
100} 100}
101 101
102static inline int 102static inline int
103futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 103futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
104 u32 oldval, u32 newval)
104{ 105{
105 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 106 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
106 return -EFAULT; 107 return -EFAULT;
107 108
108 { 109 {
109 register unsigned long r8 __asm ("r8"); 110 register unsigned long r8 __asm ("r8") = 0;
111 unsigned long prev;
110 __asm__ __volatile__( 112 __asm__ __volatile__(
111 " mf;; \n" 113 " mf;; \n"
112 " mov ar.ccv=%3;; \n" 114 " mov ar.ccv=%3;; \n"
113 "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n" 115 "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n"
114 " .xdata4 \"__ex_table\", 1b-., 2f-. \n" 116 " .xdata4 \"__ex_table\", 1b-., 2f-. \n"
115 "[2:]" 117 "[2:]"
116 : "=r" (r8) 118 : "=r" (prev)
117 : "r" (uaddr), "r" (newval), 119 : "r" (uaddr), "r" (newval),
118 "rO" ((long) (unsigned) oldval) 120 "rO" ((long) (unsigned) oldval)
119 : "memory"); 121 : "memory");
122 *uval = prev;
120 return r8; 123 return r8;
121 } 124 }
122} 125}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
25#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." 25#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
26#endif 26#endif
27 27
28#include <linux/list.h>
29#include <linux/spinlock.h>
30
31#include <asm/intrinsics.h> 28#include <asm/intrinsics.h>
32 29
33/*
34 * the semaphore definition
35 */
36struct rw_semaphore {
37 signed long count;
38 spinlock_t wait_lock;
39 struct list_head wait_list;
40};
41
42#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) 30#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
43#define RWSEM_ACTIVE_BIAS (1L) 31#define RWSEM_ACTIVE_BIAS (1L)
44#define RWSEM_ACTIVE_MASK (0xffffffffL) 32#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
46#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 34#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
47#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 35#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
48 36
49#define __RWSEM_INITIALIZER(name) \
50 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
51 LIST_HEAD_INIT((name).wait_list) }
52
53#define DECLARE_RWSEM(name) \
54 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
55
56extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
57extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
58extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
59extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
60
61static inline void
62init_rwsem (struct rw_semaphore *sem)
63{
64 sem->count = RWSEM_UNLOCKED_VALUE;
65 spin_lock_init(&sem->wait_lock);
66 INIT_LIST_HEAD(&sem->wait_list);
67}
68
69/* 37/*
70 * lock for reading 38 * lock for reading
71 */ 39 */
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
174#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) 142#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
175#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) 143#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
176 144
177static inline int rwsem_is_locked(struct rw_semaphore *sem)
178{
179 return (sem->count != 0);
180}
181
182#endif /* _ASM_IA64_RWSEM_H */ 145#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
index 96fc62366aa4..ed28bcd5bb85 100644
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
107static inline int 107static inline int
108xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) 108xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
109{ 109{
110 return _hypercall2(int, sched_op_new, cmd, arg); 110 return _hypercall2(int, sched_op, cmd, arg);
111} 111}
112 112
113static inline long 113static inline long
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
index fd66b048c6fa..419c8620945a 100644
--- a/arch/ia64/xen/suspend.c
+++ b/arch/ia64/xen/suspend.c
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
37 /* nothing */ 37 /* nothing */
38} 38}
39 39
40void xen_pre_device_suspend(void)
41{
42 /* nothing */
43}
44
45void 40void
46xen_pre_suspend() 41xen_arch_pre_suspend()
47{ 42{
48 /* nothing */ 43 /* nothing */
49} 44}
50 45
51void 46void
52xen_post_suspend(int suspend_cancelled) 47xen_arch_post_suspend(int suspend_cancelled)
53{ 48{
54 if (suspend_cancelled) 49 if (suspend_cancelled)
55 return; 50 return;
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index ad3fd61b2fe7..b0526d2716fa 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,7 +29,7 @@
29}) 29})
30 30
31static inline int 31static inline int
32futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 32futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7; 34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15; 35 int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg; 40 oparg = 1 << oparg;
41 41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT; 43 return -EFAULT;
44 44
45 pagefault_disable(); 45 pagefault_disable();
@@ -94,31 +94,34 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
94} 94}
95 95
96static inline int 96static inline int
97futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 97futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
98 u32 oldval, u32 newval)
98{ 99{
99 int prev, cmp; 100 int ret = 0, cmp;
101 u32 prev;
100 102
101 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 103 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
102 return -EFAULT; 104 return -EFAULT;
103 105
104 __asm__ __volatile__ ("1: lwx %0, %2, r0; \ 106 __asm__ __volatile__ ("1: lwx %1, %3, r0; \
105 cmp %1, %0, %3; \ 107 cmp %2, %1, %4; \
106 beqi %1, 3f; \ 108 beqi %2, 3f; \
107 2: swx %4, %2, r0; \ 109 2: swx %5, %3, r0; \
108 addic %1, r0, 0; \ 110 addic %2, r0, 0; \
109 bnei %1, 1b; \ 111 bnei %2, 1b; \
110 3: \ 112 3: \
111 .section .fixup,\"ax\"; \ 113 .section .fixup,\"ax\"; \
112 4: brid 3b; \ 114 4: brid 3b; \
113 addik %0, r0, %5; \ 115 addik %0, r0, %6; \
114 .previous; \ 116 .previous; \
115 .section __ex_table,\"a\"; \ 117 .section __ex_table,\"a\"; \
116 .word 1b,4b,2b,4b; \ 118 .word 1b,4b,2b,4b; \
117 .previous;" \ 119 .previous;" \
118 : "=&r" (prev), "=&r"(cmp) \ 120 : "+r" (ret), "=&r" (prev), "=&r"(cmp) \
119 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)); 121 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT));
120 122
121 return prev; 123 *uval = prev;
124 return ret;
122} 125}
123 126
124#endif /* __KERNEL__ */ 127#endif /* __KERNEL__ */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc0566bc2..d88983516e26 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
4 select HAVE_GENERIC_DMA_COHERENT 4 select HAVE_GENERIC_DMA_COHERENT
5 select HAVE_IDE 5 select HAVE_IDE
6 select HAVE_OPROFILE 6 select HAVE_OPROFILE
7 select HAVE_IRQ_WORK
7 select HAVE_PERF_EVENTS 8 select HAVE_PERF_EVENTS
8 select PERF_USE_VMALLOC 9 select PERF_USE_VMALLOC
9 select HAVE_ARCH_KGDB 10 select HAVE_ARCH_KGDB
@@ -208,6 +209,7 @@ config MACH_JZ4740
208 select ARCH_REQUIRE_GPIOLIB 209 select ARCH_REQUIRE_GPIOLIB
209 select SYS_HAS_EARLY_PRINTK 210 select SYS_HAS_EARLY_PRINTK
210 select HAVE_PWM 211 select HAVE_PWM
212 select HAVE_CLK
211 213
212config LASAT 214config LASAT
213 bool "LASAT Networks platforms" 215 bool "LASAT Networks platforms"
@@ -333,6 +335,8 @@ config PNX8550_STB810
333config PMC_MSP 335config PMC_MSP
334 bool "PMC-Sierra MSP chipsets" 336 bool "PMC-Sierra MSP chipsets"
335 depends on EXPERIMENTAL 337 depends on EXPERIMENTAL
338 select CEVT_R4K
339 select CSRC_R4K
336 select DMA_NONCOHERENT 340 select DMA_NONCOHERENT
337 select SWAP_IO_SPACE 341 select SWAP_IO_SPACE
338 select NO_EXCEPT_FILL 342 select NO_EXCEPT_FILL
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index 6398fa95905c..40b84b991191 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert);
54 54
55static void mtx1_reset(char *c) 55static void mtx1_reset(char *c)
56{ 56{
57 /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ 57 /* Jump to the reset vector */
58 au_writel(0x00000000, 0xAE00001C); 58 __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
59} 59}
60 60
61static void mtx1_power_off(void) 61static void mtx1_power_off(void)
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index e30e42add697..956f946218c5 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -28,6 +28,8 @@
28#include <linux/mtd/physmap.h> 28#include <linux/mtd/physmap.h>
29#include <mtd/mtd-abi.h> 29#include <mtd/mtd-abi.h>
30 30
31#include <asm/mach-au1x00/au1xxx_eth.h>
32
31static struct gpio_keys_button mtx1_gpio_button[] = { 33static struct gpio_keys_button mtx1_gpio_button[] = {
32 { 34 {
33 .gpio = 207, 35 .gpio = 207,
@@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = {
140 &mtx1_mtd, 142 &mtx1_mtd,
141}; 143};
142 144
145static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = {
146 .phy_search_highest_addr = 1,
147 .phy1_search_mac0 = 1,
148};
149
143static int __init mtx1_register_devices(void) 150static int __init mtx1_register_devices(void)
144{ 151{
145 int rc; 152 int rc;
146 153
154 au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata);
155
147 rc = gpio_request(mtx1_gpio_button[0].gpio, 156 rc = gpio_request(mtx1_gpio_button[0].gpio,
148 mtx1_gpio_button[0].desc); 157 mtx1_gpio_button[0].desc);
149 if (rc < 0) { 158 if (rc < 0) {
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index b43c918925d3..80c521e5290d 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -36,8 +36,8 @@
36 36
37static void xxs1500_reset(char *c) 37static void xxs1500_reset(char *c)
38{ 38{
39 /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ 39 /* Jump to the reset vector */
40 au_writel(0x00000000, 0xAE00001C); 40 __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
41} 41}
42 42
43static void xxs1500_power_off(void) 43static void xxs1500_power_off(void)
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b9cce90346cf..6ebf1734b411 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -75,7 +75,7 @@
75} 75}
76 76
77static inline int 77static inline int
78futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 78futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
79{ 79{
80 int op = (encoded_op >> 28) & 7; 80 int op = (encoded_op >> 28) & 7;
81 int cmp = (encoded_op >> 24) & 15; 81 int cmp = (encoded_op >> 24) & 15;
@@ -85,7 +85,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
85 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 85 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
86 oparg = 1 << oparg; 86 oparg = 1 << oparg;
87 87
88 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 88 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
89 return -EFAULT; 89 return -EFAULT;
90 90
91 pagefault_disable(); 91 pagefault_disable();
@@ -132,11 +132,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
132} 132}
133 133
134static inline int 134static inline int
135futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 135futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
136 u32 oldval, u32 newval)
136{ 137{
137 int retval; 138 int ret = 0;
139 u32 val;
138 140
139 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 141 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
140 return -EFAULT; 142 return -EFAULT;
141 143
142 if (cpu_has_llsc && R10000_LLSC_WAR) { 144 if (cpu_has_llsc && R10000_LLSC_WAR) {
@@ -145,25 +147,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
145 " .set push \n" 147 " .set push \n"
146 " .set noat \n" 148 " .set noat \n"
147 " .set mips3 \n" 149 " .set mips3 \n"
148 "1: ll %0, %2 \n" 150 "1: ll %1, %3 \n"
149 " bne %0, %z3, 3f \n" 151 " bne %1, %z4, 3f \n"
150 " .set mips0 \n" 152 " .set mips0 \n"
151 " move $1, %z4 \n" 153 " move $1, %z5 \n"
152 " .set mips3 \n" 154 " .set mips3 \n"
153 "2: sc $1, %1 \n" 155 "2: sc $1, %2 \n"
154 " beqzl $1, 1b \n" 156 " beqzl $1, 1b \n"
155 __WEAK_LLSC_MB 157 __WEAK_LLSC_MB
156 "3: \n" 158 "3: \n"
157 " .set pop \n" 159 " .set pop \n"
158 " .section .fixup,\"ax\" \n" 160 " .section .fixup,\"ax\" \n"
159 "4: li %0, %5 \n" 161 "4: li %0, %6 \n"
160 " j 3b \n" 162 " j 3b \n"
161 " .previous \n" 163 " .previous \n"
162 " .section __ex_table,\"a\" \n" 164 " .section __ex_table,\"a\" \n"
163 " "__UA_ADDR "\t1b, 4b \n" 165 " "__UA_ADDR "\t1b, 4b \n"
164 " "__UA_ADDR "\t2b, 4b \n" 166 " "__UA_ADDR "\t2b, 4b \n"
165 " .previous \n" 167 " .previous \n"
166 : "=&r" (retval), "=R" (*uaddr) 168 : "+r" (ret), "=&r" (val), "=R" (*uaddr)
167 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) 169 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
168 : "memory"); 170 : "memory");
169 } else if (cpu_has_llsc) { 171 } else if (cpu_has_llsc) {
@@ -172,31 +174,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
172 " .set push \n" 174 " .set push \n"
173 " .set noat \n" 175 " .set noat \n"
174 " .set mips3 \n" 176 " .set mips3 \n"
175 "1: ll %0, %2 \n" 177 "1: ll %1, %3 \n"
176 " bne %0, %z3, 3f \n" 178 " bne %1, %z4, 3f \n"
177 " .set mips0 \n" 179 " .set mips0 \n"
178 " move $1, %z4 \n" 180 " move $1, %z5 \n"
179 " .set mips3 \n" 181 " .set mips3 \n"
180 "2: sc $1, %1 \n" 182 "2: sc $1, %2 \n"
181 " beqz $1, 1b \n" 183 " beqz $1, 1b \n"
182 __WEAK_LLSC_MB 184 __WEAK_LLSC_MB
183 "3: \n" 185 "3: \n"
184 " .set pop \n" 186 " .set pop \n"
185 " .section .fixup,\"ax\" \n" 187 " .section .fixup,\"ax\" \n"
186 "4: li %0, %5 \n" 188 "4: li %0, %6 \n"
187 " j 3b \n" 189 " j 3b \n"
188 " .previous \n" 190 " .previous \n"
189 " .section __ex_table,\"a\" \n" 191 " .section __ex_table,\"a\" \n"
190 " "__UA_ADDR "\t1b, 4b \n" 192 " "__UA_ADDR "\t1b, 4b \n"
191 " "__UA_ADDR "\t2b, 4b \n" 193 " "__UA_ADDR "\t2b, 4b \n"
192 " .previous \n" 194 " .previous \n"
193 : "=&r" (retval), "=R" (*uaddr) 195 : "+r" (ret), "=&r" (val), "=R" (*uaddr)
194 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) 196 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
195 : "memory"); 197 : "memory");
196 } else 198 } else
197 return -ENOSYS; 199 return -ENOSYS;
198 200
199 return retval; 201 *uval = val;
202 return ret;
200} 203}
201 204
202#endif 205#endif
diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h
index e00007cf8162..d0c77496c728 100644
--- a/arch/mips/include/asm/perf_event.h
+++ b/arch/mips/include/asm/perf_event.h
@@ -11,15 +11,5 @@
11 11
12#ifndef __MIPS_PERF_EVENT_H__ 12#ifndef __MIPS_PERF_EVENT_H__
13#define __MIPS_PERF_EVENT_H__ 13#define __MIPS_PERF_EVENT_H__
14 14/* Leave it empty here. The file is required by linux/perf_event.h */
15/*
16 * MIPS performance counters do not raise NMI upon overflow, a regular
17 * interrupt will be signaled. Hence we can do the pending perf event
18 * work at the tail of the irq handler.
19 */
20static inline void
21set_perf_event_pending(void)
22{
23}
24
25#endif /* __MIPS_PERF_EVENT_H__ */ 15#endif /* __MIPS_PERF_EVENT_H__ */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 5a84a1f11231..94ca2b018af7 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -17,29 +17,13 @@
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/uasm.h> 18#include <asm/uasm.h>
19 19
20/* 20#include <asm-generic/sections.h>
21 * If the Instruction Pointer is in module space (0xc0000000), return true;
22 * otherwise, it is in kernel space (0x80000000), return false.
23 *
24 * FIXME: This will not work when the kernel space and module space are the
25 * same. If they are the same, we need to modify scripts/recordmcount.pl,
26 * ftrace_make_nop/call() and the other related parts to ensure the
27 * enabling/disabling of the calling site to _mcount is right for both kernel
28 * and module.
29 */
30
31static inline int in_module(unsigned long ip)
32{
33 return ip & 0x40000000;
34}
35 21
36#ifdef CONFIG_DYNAMIC_FTRACE 22#ifdef CONFIG_DYNAMIC_FTRACE
37 23
38#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ 24#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
39#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ 25#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
40 26
41#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */
42#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */
43#define INSN_NOP 0x00000000 /* nop */ 27#define INSN_NOP 0x00000000 /* nop */
44#define INSN_JAL(addr) \ 28#define INSN_JAL(addr) \
45 ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK))) 29 ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK)))
@@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void)
69#endif 53#endif
70} 54}
71 55
56/*
57 * Check if the address is in kernel space
58 *
59 * Clone core_kernel_text() from kernel/extable.c, but doesn't call
60 * init_kernel_text() for Ftrace doesn't trace functions in init sections.
61 */
62static inline int in_kernel_space(unsigned long ip)
63{
64 if (ip >= (unsigned long)_stext &&
65 ip <= (unsigned long)_etext)
66 return 1;
67 return 0;
68}
69
72static int ftrace_modify_code(unsigned long ip, unsigned int new_code) 70static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
73{ 71{
74 int faulted; 72 int faulted;
@@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
84 return 0; 82 return 0;
85} 83}
86 84
85/*
86 * The details about the calling site of mcount on MIPS
87 *
88 * 1. For kernel:
89 *
90 * move at, ra
91 * jal _mcount --> nop
92 *
93 * 2. For modules:
94 *
95 * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT
96 *
97 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
98 * addiu v1, v1, low_16bit_of_mcount
99 * move at, ra
100 * move $12, ra_address
101 * jalr v1
102 * sub sp, sp, 8
103 * 1: offset = 5 instructions
104 * 2.2 For the Other situations
105 *
106 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
107 * addiu v1, v1, low_16bit_of_mcount
108 * move at, ra
109 * jalr v1
110 * nop | move $12, ra_address | sub sp, sp, 8
111 * 1: offset = 4 instructions
112 */
113
114#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
115#define MCOUNT_OFFSET_INSNS 5
116#else
117#define MCOUNT_OFFSET_INSNS 4
118#endif
119#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
120
87int ftrace_make_nop(struct module *mod, 121int ftrace_make_nop(struct module *mod,
88 struct dyn_ftrace *rec, unsigned long addr) 122 struct dyn_ftrace *rec, unsigned long addr)
89{ 123{
@@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod,
91 unsigned long ip = rec->ip; 125 unsigned long ip = rec->ip;
92 126
93 /* 127 /*
94 * We have compiled module with -mlong-calls, but compiled the kernel 128 * If ip is in kernel space, no long call, otherwise, long call is
95 * without it, we need to cope with them respectively. 129 * needed.
96 */ 130 */
97 if (in_module(ip)) { 131 new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
98#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) 132
99 /*
100 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
101 * addiu v1, v1, low_16bit_of_mcount
102 * move at, ra
103 * move $12, ra_address
104 * jalr v1
105 * sub sp, sp, 8
106 * 1: offset = 5 instructions
107 */
108 new = INSN_B_1F_5;
109#else
110 /*
111 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
112 * addiu v1, v1, low_16bit_of_mcount
113 * move at, ra
114 * jalr v1
115 * nop | move $12, ra_address | sub sp, sp, 8
116 * 1: offset = 4 instructions
117 */
118 new = INSN_B_1F_4;
119#endif
120 } else {
121 /*
122 * move at, ra
123 * jal _mcount --> nop
124 */
125 new = INSN_NOP;
126 }
127 return ftrace_modify_code(ip, new); 133 return ftrace_modify_code(ip, new);
128} 134}
129 135
@@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
132 unsigned int new; 138 unsigned int new;
133 unsigned long ip = rec->ip; 139 unsigned long ip = rec->ip;
134 140
135 /* ip, module: 0xc0000000, kernel: 0x80000000 */ 141 new = in_kernel_space(ip) ? insn_jal_ftrace_caller :
136 new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller; 142 insn_lui_v1_hi16_mcount;
137 143
138 return ftrace_modify_code(ip, new); 144 return ftrace_modify_code(ip, new);
139} 145}
@@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void)
190#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ 196#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */
191#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ 197#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */
192 198
193unsigned long ftrace_get_parent_addr(unsigned long self_addr, 199unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
194 unsigned long parent, 200 old_parent_ra, unsigned long parent_ra_addr, unsigned long fp)
195 unsigned long parent_addr,
196 unsigned long fp)
197{ 201{
198 unsigned long sp, ip, ra; 202 unsigned long sp, ip, tmp;
199 unsigned int code; 203 unsigned int code;
200 int faulted; 204 int faulted;
201 205
202 /* 206 /*
203 * For module, move the ip from calling site of mcount to the 207 * For module, move the ip from the return address after the
204 * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for 208 * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
205 * kernel, move to the instruction "move ra, at"(offset is 12) 209 * kernel, move after the instruction "move ra, at"(offset is 16)
206 */ 210 */
207 ip = self_addr - (in_module(self_addr) ? 20 : 12); 211 ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
208 212
209 /* 213 /*
210 * search the text until finding the non-store instruction or "s{d,w} 214 * search the text until finding the non-store instruction or "s{d,w}
211 * ra, offset(sp)" instruction 215 * ra, offset(sp)" instruction
212 */ 216 */
213 do { 217 do {
214 ip -= 4;
215
216 /* get the code at "ip": code = *(unsigned int *)ip; */ 218 /* get the code at "ip": code = *(unsigned int *)ip; */
217 safe_load_code(code, ip, faulted); 219 safe_load_code(code, ip, faulted);
218 220
@@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
224 * store the ra on the stack 226 * store the ra on the stack
225 */ 227 */
226 if ((code & S_R_SP) != S_R_SP) 228 if ((code & S_R_SP) != S_R_SP)
227 return parent_addr; 229 return parent_ra_addr;
228 230
229 } while (((code & S_RA_SP) != S_RA_SP)); 231 /* Move to the next instruction */
232 ip -= 4;
233 } while ((code & S_RA_SP) != S_RA_SP);
230 234
231 sp = fp + (code & OFFSET_MASK); 235 sp = fp + (code & OFFSET_MASK);
232 236
233 /* ra = *(unsigned long *)sp; */ 237 /* tmp = *(unsigned long *)sp; */
234 safe_load_stack(ra, sp, faulted); 238 safe_load_stack(tmp, sp, faulted);
235 if (unlikely(faulted)) 239 if (unlikely(faulted))
236 return 0; 240 return 0;
237 241
238 if (ra == parent) 242 if (tmp == old_parent_ra)
239 return sp; 243 return sp;
240 return 0; 244 return 0;
241} 245}
@@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
246 * Hook the return address and push it in the stack of return addrs 250 * Hook the return address and push it in the stack of return addrs
247 * in current thread info. 251 * in current thread info.
248 */ 252 */
249void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 253void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
250 unsigned long fp) 254 unsigned long fp)
251{ 255{
252 unsigned long old; 256 unsigned long old_parent_ra;
253 struct ftrace_graph_ent trace; 257 struct ftrace_graph_ent trace;
254 unsigned long return_hooker = (unsigned long) 258 unsigned long return_hooker = (unsigned long)
255 &return_to_handler; 259 &return_to_handler;
256 int faulted; 260 int faulted, insns;
257 261
258 if (unlikely(atomic_read(&current->tracing_graph_pause))) 262 if (unlikely(atomic_read(&current->tracing_graph_pause)))
259 return; 263 return;
260 264
261 /* 265 /*
262 * "parent" is the stack address saved the return address of the caller 266 * "parent_ra_addr" is the stack address saved the return address of
263 * of _mcount. 267 * the caller of _mcount.
264 * 268 *
265 * if the gcc < 4.5, a leaf function does not save the return address 269 * if the gcc < 4.5, a leaf function does not save the return address
266 * in the stack address, so, we "emulate" one in _mcount's stack space, 270 * in the stack address, so, we "emulate" one in _mcount's stack space,
@@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
275 * do it in ftrace_graph_caller of mcount.S. 279 * do it in ftrace_graph_caller of mcount.S.
276 */ 280 */
277 281
278 /* old = *parent; */ 282 /* old_parent_ra = *parent_ra_addr; */
279 safe_load_stack(old, parent, faulted); 283 safe_load_stack(old_parent_ra, parent_ra_addr, faulted);
280 if (unlikely(faulted)) 284 if (unlikely(faulted))
281 goto out; 285 goto out;
282#ifndef KBUILD_MCOUNT_RA_ADDRESS 286#ifndef KBUILD_MCOUNT_RA_ADDRESS
283 parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old, 287 parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra,
284 (unsigned long)parent, fp); 288 old_parent_ra, (unsigned long)parent_ra_addr, fp);
285 /* 289 /*
286 * If fails when getting the stack address of the non-leaf function's 290 * If fails when getting the stack address of the non-leaf function's
287 * ra, stop function graph tracer and return 291 * ra, stop function graph tracer and return
288 */ 292 */
289 if (parent == 0) 293 if (parent_ra_addr == 0)
290 goto out; 294 goto out;
291#endif 295#endif
292 /* *parent = return_hooker; */ 296 /* *parent_ra_addr = return_hooker; */
293 safe_store_stack(return_hooker, parent, faulted); 297 safe_store_stack(return_hooker, parent_ra_addr, faulted);
294 if (unlikely(faulted)) 298 if (unlikely(faulted))
295 goto out; 299 goto out;
296 300
297 if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) == 301 if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
298 -EBUSY) { 302 == -EBUSY) {
299 *parent = old; 303 *parent_ra_addr = old_parent_ra;
300 return; 304 return;
301 } 305 }
302 306
303 trace.func = self_addr; 307 /*
308 * Get the recorded ip of the current mcount calling site in the
309 * __mcount_loc section, which will be used to filter the function
310 * entries configured through the tracing/set_graph_function interface.
311 */
312
313 insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
314 trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
304 315
305 /* Only trace if the calling function expects to */ 316 /* Only trace if the calling function expects to */
306 if (!ftrace_graph_entry(&trace)) { 317 if (!ftrace_graph_entry(&trace)) {
307 current->curr_ret_stack--; 318 current->curr_ret_stack--;
308 *parent = old; 319 *parent_ra_addr = old_parent_ra;
309 } 320 }
310 return; 321 return;
311out: 322out:
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index 2b7f3f703b83..a8244854d3dc 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event,
161 return ret; 161 return ret;
162} 162}
163 163
164static int mipspmu_enable(struct perf_event *event)
165{
166 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
167 struct hw_perf_event *hwc = &event->hw;
168 int idx;
169 int err = 0;
170
171 /* To look for a free counter for this event. */
172 idx = mipspmu->alloc_counter(cpuc, hwc);
173 if (idx < 0) {
174 err = idx;
175 goto out;
176 }
177
178 /*
179 * If there is an event in the counter we are going to use then
180 * make sure it is disabled.
181 */
182 event->hw.idx = idx;
183 mipspmu->disable_event(idx);
184 cpuc->events[idx] = event;
185
186 /* Set the period for the event. */
187 mipspmu_event_set_period(event, hwc, idx);
188
189 /* Enable the event. */
190 mipspmu->enable_event(hwc, idx);
191
192 /* Propagate our changes to the userspace mapping. */
193 perf_event_update_userpage(event);
194
195out:
196 return err;
197}
198
199static void mipspmu_event_update(struct perf_event *event, 164static void mipspmu_event_update(struct perf_event *event,
200 struct hw_perf_event *hwc, 165 struct hw_perf_event *hwc,
201 int idx) 166 int idx)
@@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event,
204 unsigned long flags; 169 unsigned long flags;
205 int shift = 64 - TOTAL_BITS; 170 int shift = 64 - TOTAL_BITS;
206 s64 prev_raw_count, new_raw_count; 171 s64 prev_raw_count, new_raw_count;
207 s64 delta; 172 u64 delta;
208 173
209again: 174again:
210 prev_raw_count = local64_read(&hwc->prev_count); 175 prev_raw_count = local64_read(&hwc->prev_count);
@@ -231,32 +196,90 @@ again:
231 return; 196 return;
232} 197}
233 198
234static void mipspmu_disable(struct perf_event *event) 199static void mipspmu_start(struct perf_event *event, int flags)
200{
201 struct hw_perf_event *hwc = &event->hw;
202
203 if (!mipspmu)
204 return;
205
206 if (flags & PERF_EF_RELOAD)
207 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
208
209 hwc->state = 0;
210
211 /* Set the period for the event. */
212 mipspmu_event_set_period(event, hwc, hwc->idx);
213
214 /* Enable the event. */
215 mipspmu->enable_event(hwc, hwc->idx);
216}
217
218static void mipspmu_stop(struct perf_event *event, int flags)
219{
220 struct hw_perf_event *hwc = &event->hw;
221
222 if (!mipspmu)
223 return;
224
225 if (!(hwc->state & PERF_HES_STOPPED)) {
226 /* We are working on a local event. */
227 mipspmu->disable_event(hwc->idx);
228 barrier();
229 mipspmu_event_update(event, hwc, hwc->idx);
230 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
231 }
232}
233
234static int mipspmu_add(struct perf_event *event, int flags)
235{ 235{
236 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 236 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
237 struct hw_perf_event *hwc = &event->hw; 237 struct hw_perf_event *hwc = &event->hw;
238 int idx = hwc->idx; 238 int idx;
239 int err = 0;
239 240
241 perf_pmu_disable(event->pmu);
240 242
241 WARN_ON(idx < 0 || idx >= mipspmu->num_counters); 243 /* To look for a free counter for this event. */
244 idx = mipspmu->alloc_counter(cpuc, hwc);
245 if (idx < 0) {
246 err = idx;
247 goto out;
248 }
242 249
243 /* We are working on a local event. */ 250 /*
251 * If there is an event in the counter we are going to use then
252 * make sure it is disabled.
253 */
254 event->hw.idx = idx;
244 mipspmu->disable_event(idx); 255 mipspmu->disable_event(idx);
256 cpuc->events[idx] = event;
245 257
246 barrier(); 258 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
247 259 if (flags & PERF_EF_START)
248 mipspmu_event_update(event, hwc, idx); 260 mipspmu_start(event, PERF_EF_RELOAD);
249 cpuc->events[idx] = NULL;
250 clear_bit(idx, cpuc->used_mask);
251 261
262 /* Propagate our changes to the userspace mapping. */
252 perf_event_update_userpage(event); 263 perf_event_update_userpage(event);
264
265out:
266 perf_pmu_enable(event->pmu);
267 return err;
253} 268}
254 269
255static void mipspmu_unthrottle(struct perf_event *event) 270static void mipspmu_del(struct perf_event *event, int flags)
256{ 271{
272 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
257 struct hw_perf_event *hwc = &event->hw; 273 struct hw_perf_event *hwc = &event->hw;
274 int idx = hwc->idx;
258 275
259 mipspmu->enable_event(hwc, hwc->idx); 276 WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
277
278 mipspmu_stop(event, PERF_EF_UPDATE);
279 cpuc->events[idx] = NULL;
280 clear_bit(idx, cpuc->used_mask);
281
282 perf_event_update_userpage(event);
260} 283}
261 284
262static void mipspmu_read(struct perf_event *event) 285static void mipspmu_read(struct perf_event *event)
@@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event)
270 mipspmu_event_update(event, hwc, hwc->idx); 293 mipspmu_event_update(event, hwc, hwc->idx);
271} 294}
272 295
273static struct pmu pmu = { 296static void mipspmu_enable(struct pmu *pmu)
274 .enable = mipspmu_enable, 297{
275 .disable = mipspmu_disable, 298 if (mipspmu)
276 .unthrottle = mipspmu_unthrottle, 299 mipspmu->start();
277 .read = mipspmu_read, 300}
278}; 301
302static void mipspmu_disable(struct pmu *pmu)
303{
304 if (mipspmu)
305 mipspmu->stop();
306}
279 307
280static atomic_t active_events = ATOMIC_INIT(0); 308static atomic_t active_events = ATOMIC_INIT(0);
281static DEFINE_MUTEX(pmu_reserve_mutex); 309static DEFINE_MUTEX(pmu_reserve_mutex);
@@ -318,6 +346,82 @@ static void mipspmu_free_irq(void)
318 perf_irq = save_perf_irq; 346 perf_irq = save_perf_irq;
319} 347}
320 348
349/*
350 * mipsxx/rm9000/loongson2 have different performance counters, they have
351 * specific low-level init routines.
352 */
353static void reset_counters(void *arg);
354static int __hw_perf_event_init(struct perf_event *event);
355
356static void hw_perf_event_destroy(struct perf_event *event)
357{
358 if (atomic_dec_and_mutex_lock(&active_events,
359 &pmu_reserve_mutex)) {
360 /*
361 * We must not call the destroy function with interrupts
362 * disabled.
363 */
364 on_each_cpu(reset_counters,
365 (void *)(long)mipspmu->num_counters, 1);
366 mipspmu_free_irq();
367 mutex_unlock(&pmu_reserve_mutex);
368 }
369}
370
371static int mipspmu_event_init(struct perf_event *event)
372{
373 int err = 0;
374
375 switch (event->attr.type) {
376 case PERF_TYPE_RAW:
377 case PERF_TYPE_HARDWARE:
378 case PERF_TYPE_HW_CACHE:
379 break;
380
381 default:
382 return -ENOENT;
383 }
384
385 if (!mipspmu || event->cpu >= nr_cpumask_bits ||
386 (event->cpu >= 0 && !cpu_online(event->cpu)))
387 return -ENODEV;
388
389 if (!atomic_inc_not_zero(&active_events)) {
390 if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
391 atomic_dec(&active_events);
392 return -ENOSPC;
393 }
394
395 mutex_lock(&pmu_reserve_mutex);
396 if (atomic_read(&active_events) == 0)
397 err = mipspmu_get_irq();
398
399 if (!err)
400 atomic_inc(&active_events);
401 mutex_unlock(&pmu_reserve_mutex);
402 }
403
404 if (err)
405 return err;
406
407 err = __hw_perf_event_init(event);
408 if (err)
409 hw_perf_event_destroy(event);
410
411 return err;
412}
413
414static struct pmu pmu = {
415 .pmu_enable = mipspmu_enable,
416 .pmu_disable = mipspmu_disable,
417 .event_init = mipspmu_event_init,
418 .add = mipspmu_add,
419 .del = mipspmu_del,
420 .start = mipspmu_start,
421 .stop = mipspmu_stop,
422 .read = mipspmu_read,
423};
424
321static inline unsigned int 425static inline unsigned int
322mipspmu_perf_event_encode(const struct mips_perf_event *pev) 426mipspmu_perf_event_encode(const struct mips_perf_event *pev)
323{ 427{
@@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc,
382{ 486{
383 struct hw_perf_event fake_hwc = event->hw; 487 struct hw_perf_event fake_hwc = event->hw;
384 488
385 if (event->pmu && event->pmu != &pmu) 489 /* Allow mixed event group. So return 1 to pass validation. */
386 return 0; 490 if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
491 return 1;
387 492
388 return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0; 493 return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
389} 494}
@@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event)
409 return 0; 514 return 0;
410} 515}
411 516
412/*
413 * mipsxx/rm9000/loongson2 have different performance counters, they have
414 * specific low-level init routines.
415 */
416static void reset_counters(void *arg);
417static int __hw_perf_event_init(struct perf_event *event);
418
419static void hw_perf_event_destroy(struct perf_event *event)
420{
421 if (atomic_dec_and_mutex_lock(&active_events,
422 &pmu_reserve_mutex)) {
423 /*
424 * We must not call the destroy function with interrupts
425 * disabled.
426 */
427 on_each_cpu(reset_counters,
428 (void *)(long)mipspmu->num_counters, 1);
429 mipspmu_free_irq();
430 mutex_unlock(&pmu_reserve_mutex);
431 }
432}
433
434const struct pmu *hw_perf_event_init(struct perf_event *event)
435{
436 int err = 0;
437
438 if (!mipspmu || event->cpu >= nr_cpumask_bits ||
439 (event->cpu >= 0 && !cpu_online(event->cpu)))
440 return ERR_PTR(-ENODEV);
441
442 if (!atomic_inc_not_zero(&active_events)) {
443 if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
444 atomic_dec(&active_events);
445 return ERR_PTR(-ENOSPC);
446 }
447
448 mutex_lock(&pmu_reserve_mutex);
449 if (atomic_read(&active_events) == 0)
450 err = mipspmu_get_irq();
451
452 if (!err)
453 atomic_inc(&active_events);
454 mutex_unlock(&pmu_reserve_mutex);
455 }
456
457 if (err)
458 return ERR_PTR(err);
459
460 err = __hw_perf_event_init(event);
461 if (err)
462 hw_perf_event_destroy(event);
463
464 return err ? ERR_PTR(err) : &pmu;
465}
466
467void hw_perf_enable(void)
468{
469 if (mipspmu)
470 mipspmu->start();
471}
472
473void hw_perf_disable(void)
474{
475 if (mipspmu)
476 mipspmu->stop();
477}
478
479/* This is needed by specific irq handlers in perf_event_*.c */ 517/* This is needed by specific irq handlers in perf_event_*.c */
480static void 518static void
481handle_associated_event(struct cpu_hw_events *cpuc, 519handle_associated_event(struct cpu_hw_events *cpuc,
@@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc,
496#include "perf_event_mipsxx.c" 534#include "perf_event_mipsxx.c"
497 535
498/* Callchain handling code. */ 536/* Callchain handling code. */
499static inline void
500callchain_store(struct perf_callchain_entry *entry,
501 u64 ip)
502{
503 if (entry->nr < PERF_MAX_STACK_DEPTH)
504 entry->ip[entry->nr++] = ip;
505}
506 537
507/* 538/*
508 * Leave userspace callchain empty for now. When we find a way to trace 539 * Leave userspace callchain empty for now. When we find a way to trace
509 * the user stack callchains, we add here. 540 * the user stack callchains, we add here.
510 */ 541 */
511static void 542void perf_callchain_user(struct perf_callchain_entry *entry,
512perf_callchain_user(struct pt_regs *regs, 543 struct pt_regs *regs)
513 struct perf_callchain_entry *entry)
514{ 544{
515} 545}
516 546
@@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
523 while (!kstack_end(sp)) { 553 while (!kstack_end(sp)) {
524 addr = *sp++; 554 addr = *sp++;
525 if (__kernel_text_address(addr)) { 555 if (__kernel_text_address(addr)) {
526 callchain_store(entry, addr); 556 perf_callchain_store(entry, addr);
527 if (entry->nr >= PERF_MAX_STACK_DEPTH) 557 if (entry->nr >= PERF_MAX_STACK_DEPTH)
528 break; 558 break;
529 } 559 }
530 } 560 }
531} 561}
532 562
533static void 563void perf_callchain_kernel(struct perf_callchain_entry *entry,
534perf_callchain_kernel(struct pt_regs *regs, 564 struct pt_regs *regs)
535 struct perf_callchain_entry *entry)
536{ 565{
537 unsigned long sp = regs->regs[29]; 566 unsigned long sp = regs->regs[29];
538#ifdef CONFIG_KALLSYMS 567#ifdef CONFIG_KALLSYMS
539 unsigned long ra = regs->regs[31]; 568 unsigned long ra = regs->regs[31];
540 unsigned long pc = regs->cp0_epc; 569 unsigned long pc = regs->cp0_epc;
541 570
542 callchain_store(entry, PERF_CONTEXT_KERNEL);
543 if (raw_show_trace || !__kernel_text_address(pc)) { 571 if (raw_show_trace || !__kernel_text_address(pc)) {
544 unsigned long stack_page = 572 unsigned long stack_page =
545 (unsigned long)task_stack_page(current); 573 (unsigned long)task_stack_page(current);
@@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs,
549 return; 577 return;
550 } 578 }
551 do { 579 do {
552 callchain_store(entry, pc); 580 perf_callchain_store(entry, pc);
553 if (entry->nr >= PERF_MAX_STACK_DEPTH) 581 if (entry->nr >= PERF_MAX_STACK_DEPTH)
554 break; 582 break;
555 pc = unwind_stack(current, &sp, pc, &ra); 583 pc = unwind_stack(current, &sp, pc, &ra);
556 } while (pc); 584 } while (pc);
557#else 585#else
558 callchain_store(entry, PERF_CONTEXT_KERNEL);
559 save_raw_perf_callchain(entry, sp); 586 save_raw_perf_callchain(entry, sp);
560#endif 587#endif
561} 588}
562
563static void
564perf_do_callchain(struct pt_regs *regs,
565 struct perf_callchain_entry *entry)
566{
567 int is_user;
568
569 if (!regs)
570 return;
571
572 is_user = user_mode(regs);
573
574 if (!current || !current->pid)
575 return;
576
577 if (is_user && current->state != TASK_RUNNING)
578 return;
579
580 if (!is_user) {
581 perf_callchain_kernel(regs, entry);
582 if (current->mm)
583 regs = task_pt_regs(current);
584 else
585 regs = NULL;
586 }
587 if (regs)
588 perf_callchain_user(regs, entry);
589}
590
591static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
592
593struct perf_callchain_entry *
594perf_callchain(struct pt_regs *regs)
595{
596 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
597
598 entry->nr = 0;
599 perf_do_callchain(regs, entry);
600 return entry;
601}
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 183e0d226669..d9a7db78ed62 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
696 * interrupt, not NMI. 696 * interrupt, not NMI.
697 */ 697 */
698 if (handled == IRQ_HANDLED) 698 if (handled == IRQ_HANDLED)
699 perf_event_do_pending(); 699 irq_work_run();
700 700
701#ifdef CONFIG_MIPS_MT_SMP 701#ifdef CONFIG_MIPS_MT_SMP
702 read_unlock(&pmuint_rwlock); 702 read_unlock(&pmuint_rwlock);
@@ -1045,6 +1045,8 @@ init_hw_perf_events(void)
1045 "CPU, irq %d%s\n", mipspmu->name, counters, irq, 1045 "CPU, irq %d%s\n", mipspmu->name, counters, irq,
1046 irq < 0 ? " (share with timer interrupt)" : ""); 1046 irq < 0 ? " (share with timer interrupt)" : "");
1047 1047
1048 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1049
1048 return 0; 1050 return 0;
1049} 1051}
1050early_initcall(init_hw_perf_events); 1052early_initcall(init_hw_perf_events);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5922342bca39..dbbe0ce48d89 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
84 84
85static int protected_restore_fp_context(struct sigcontext __user *sc) 85static int protected_restore_fp_context(struct sigcontext __user *sc)
86{ 86{
87 int err, tmp; 87 int err, tmp __maybe_unused;
88 while (1) { 88 while (1) {
89 lock_fpu_owner(); 89 lock_fpu_owner();
90 own_fpu_inatomic(0); 90 own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index a0ed0e052b2e..aae986613795 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
115 115
116static int protected_restore_fp_context32(struct sigcontext32 __user *sc) 116static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
117{ 117{
118 int err, tmp; 118 int err, tmp __maybe_unused;
119 while (1) { 119 while (1) {
120 lock_fpu_owner(); 120 lock_fpu_owner();
121 own_fpu_inatomic(0); 121 own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb95cb49..32a256101082 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void)
193 */ 193 */
194static struct task_struct *cpu_idle_thread[NR_CPUS]; 194static struct task_struct *cpu_idle_thread[NR_CPUS];
195 195
196struct create_idle {
197 struct work_struct work;
198 struct task_struct *idle;
199 struct completion done;
200 int cpu;
201};
202
203static void __cpuinit do_fork_idle(struct work_struct *work)
204{
205 struct create_idle *c_idle =
206 container_of(work, struct create_idle, work);
207
208 c_idle->idle = fork_idle(c_idle->cpu);
209 complete(&c_idle->done);
210}
211
196int __cpuinit __cpu_up(unsigned int cpu) 212int __cpuinit __cpu_up(unsigned int cpu)
197{ 213{
198 struct task_struct *idle; 214 struct task_struct *idle;
@@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu)
203 * Linux can schedule processes on this slave. 219 * Linux can schedule processes on this slave.
204 */ 220 */
205 if (!cpu_idle_thread[cpu]) { 221 if (!cpu_idle_thread[cpu]) {
206 idle = fork_idle(cpu); 222 /*
207 cpu_idle_thread[cpu] = idle; 223 * Schedule work item to avoid forking user task
224 * Ported from arch/x86/kernel/smpboot.c
225 */
226 struct create_idle c_idle = {
227 .cpu = cpu,
228 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
229 };
230
231 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
232 schedule_work(&c_idle.work);
233 wait_for_completion(&c_idle.done);
234 idle = cpu_idle_thread[cpu] = c_idle.idle;
208 235
209 if (IS_ERR(idle)) 236 if (IS_ERR(idle))
210 panic(KERN_ERR "Fork failed for CPU %d", cpu); 237 panic(KERN_ERR "Fork failed for CPU %d", cpu);
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edff45e0..58beabf50b3c 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -383,12 +383,11 @@ save_static_function(sys_sysmips);
383static int __used noinline 383static int __used noinline
384_sys_sysmips(nabi_no_regargs struct pt_regs regs) 384_sys_sysmips(nabi_no_regargs struct pt_regs regs)
385{ 385{
386 long cmd, arg1, arg2, arg3; 386 long cmd, arg1, arg2;
387 387
388 cmd = regs.regs[4]; 388 cmd = regs.regs[4];
389 arg1 = regs.regs[5]; 389 arg1 = regs.regs[5];
390 arg2 = regs.regs[6]; 390 arg2 = regs.regs[6];
391 arg3 = regs.regs[7];
392 391
393 switch (cmd) { 392 switch (cmd) {
394 case MIPS_ATOMIC_SET: 393 case MIPS_ATOMIC_SET:
@@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
405 if (arg1 & 2) 404 if (arg1 & 2)
406 set_thread_flag(TIF_LOGADE); 405 set_thread_flag(TIF_LOGADE);
407 else 406 else
408 clear_thread_flag(TIF_FIXADE); 407 clear_thread_flag(TIF_LOGADE);
409 408
410 return 0; 409 return 0;
411 410
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
148 spinlock_t tc_list_lock; 148 spinlock_t tc_list_lock;
149 struct list_head tc_list; /* Thread contexts */ 149 struct list_head tc_list; /* Thread contexts */
150} vpecontrol = { 150} vpecontrol = {
151 .vpe_list_lock = SPIN_LOCK_UNLOCKED, 151 .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
152 .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list), 152 .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
153 .tc_list_lock = SPIN_LOCK_UNLOCKED, 153 .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
154 .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list) 154 .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
155}; 155};
156 156
diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 6e1b77fec7ea..aca93eed8779 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig
@@ -1,6 +1,7 @@
1if MACH_LOONGSON
2
1choice 3choice
2 prompt "Machine Type" 4 prompt "Machine Type"
3 depends on MACH_LOONGSON
4 5
5config LEMOTE_FULOONG2E 6config LEMOTE_FULOONG2E
6 bool "Lemote Fuloong(2e) mini-PC" 7 bool "Lemote Fuloong(2e) mini-PC"
@@ -87,3 +88,5 @@ config LOONGSON_UART_BASE
87config LOONGSON_MC146818 88config LOONGSON_MC146818
88 bool 89 bool
89 default n 90 default n
91
92endif # MACH_LOONGSON
diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c
index 1a06defc4f7f..353e1d2e41a5 100644
--- a/arch/mips/loongson/common/cmdline.c
+++ b/arch/mips/loongson/common/cmdline.c
@@ -44,10 +44,5 @@ void __init prom_init_cmdline(void)
44 strcat(arcs_cmdline, " "); 44 strcat(arcs_cmdline, " ");
45 } 45 }
46 46
47 if ((strstr(arcs_cmdline, "console=")) == NULL)
48 strcat(arcs_cmdline, " console=ttyS0,115200");
49 if ((strstr(arcs_cmdline, "root=")) == NULL)
50 strcat(arcs_cmdline, " root=/dev/hda1");
51
52 prom_init_machtype(); 47 prom_init_machtype();
53} 48}
diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c
index 81fbe6b73f91..2efd5d9dee27 100644
--- a/arch/mips/loongson/common/machtype.c
+++ b/arch/mips/loongson/common/machtype.c
@@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void)
41 41
42void __init prom_init_machtype(void) 42void __init prom_init_machtype(void)
43{ 43{
44 char *p, str[MACHTYPE_LEN]; 44 char *p, str[MACHTYPE_LEN + 1];
45 int machtype = MACH_LEMOTE_FL2E; 45 int machtype = MACH_LEMOTE_FL2E;
46 46
47 mips_machtype = LOONGSON_MACHTYPE; 47 mips_machtype = LOONGSON_MACHTYPE;
@@ -53,6 +53,7 @@ void __init prom_init_machtype(void)
53 } 53 }
54 p += strlen("machtype="); 54 p += strlen("machtype=");
55 strncpy(str, p, MACHTYPE_LEN); 55 strncpy(str, p, MACHTYPE_LEN);
56 str[MACHTYPE_LEN] = '\0';
56 p = strstr(str, " "); 57 p = strstr(str, " ");
57 if (p) 58 if (p)
58 *p = '\0'; 59 *p = '\0';
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 2701d9500959..2a7d43f4f161 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -70,7 +70,7 @@
70 70
71 71
72#define COMPXSP \ 72#define COMPXSP \
73 unsigned xm; int xe; int xs; int xc 73 unsigned xm; int xe; int xs __maybe_unused; int xc
74 74
75#define COMPYSP \ 75#define COMPYSP \
76 unsigned ym; int ye; int ys; int yc 76 unsigned ym; int ye; int ys; int yc
@@ -104,7 +104,7 @@
104 104
105 105
106#define COMPXDP \ 106#define COMPXDP \
107u64 xm; int xe; int xs; int xc 107u64 xm; int xe; int xs __maybe_unused; int xc
108 108
109#define COMPYDP \ 109#define COMPYDP \
110u64 ym; int ye; int ys; int yc 110u64 ym; int ye; int ys; int yc
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd24c82f..279599e9a779 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr)
324void __init paging_init(void) 324void __init paging_init(void)
325{ 325{
326 unsigned long max_zone_pfns[MAX_NR_ZONES]; 326 unsigned long max_zone_pfns[MAX_NR_ZONES];
327 unsigned long lastpfn; 327 unsigned long lastpfn __maybe_unused;
328 328
329 pagetable_init(); 329 pagetable_init();
330 330
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 083d3412d0bc..04f9e17db9d0 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -109,6 +109,8 @@ static bool scratchpad_available(void)
109static int scratchpad_offset(int i) 109static int scratchpad_offset(int i)
110{ 110{
111 BUG(); 111 BUG();
112 /* Really unreachable, but evidently some GCC want this. */
113 return 0;
112} 114}
113#endif 115#endif
114/* 116/*
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index b7c03d80c88c..68798f869c0f 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -308,7 +308,7 @@ static struct resource pci_mem_resource = {
308 * RETURNS: PCIBIOS_SUCCESSFUL - success 308 * RETURNS: PCIBIOS_SUCCESSFUL - success
309 * 309 *
310 ****************************************************************************/ 310 ****************************************************************************/
311static int bpci_interrupt(int irq, void *dev_id) 311static irqreturn_t bpci_interrupt(int irq, void *dev_id)
312{ 312{
313 struct msp_pci_regs *preg = (void *)PCI_BASE_REG; 313 struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
314 unsigned int stat = preg->if_status; 314 unsigned int stat = preg->if_status;
@@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id)
326 /* write to clear all asserted interrupts */ 326 /* write to clear all asserted interrupts */
327 preg->if_status = stat; 327 preg->if_status = stat;
328 328
329 return PCIBIOS_SUCCESSFUL; 329 return IRQ_HANDLED;
330} 330}
331 331
332/***************************************************************************** 332/*****************************************************************************
diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index c139988bb85d..8d798497c614 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig
@@ -4,15 +4,11 @@ choice
4 4
5config PMC_MSP4200_EVAL 5config PMC_MSP4200_EVAL
6 bool "PMC-Sierra MSP4200 Eval Board" 6 bool "PMC-Sierra MSP4200 Eval Board"
7 select CEVT_R4K
8 select CSRC_R4K
9 select IRQ_MSP_SLP 7 select IRQ_MSP_SLP
10 select HW_HAS_PCI 8 select HW_HAS_PCI
11 9
12config PMC_MSP4200_GW 10config PMC_MSP4200_GW
13 bool "PMC-Sierra MSP4200 VoIP Gateway" 11 bool "PMC-Sierra MSP4200 VoIP Gateway"
14 select CEVT_R4K
15 select CSRC_R4K
16 select IRQ_MSP_SLP 12 select IRQ_MSP_SLP
17 select HW_HAS_PCI 13 select HW_HAS_PCI
18 14
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index cca64e15f57f..01df84ce31e2 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -81,7 +81,7 @@ void __init plat_time_init(void)
81 mips_hpt_frequency = cpu_rate/2; 81 mips_hpt_frequency = cpu_rate/2;
82} 82}
83 83
84unsigned int __init get_c0_compare_int(void) 84unsigned int __cpuinit get_c0_compare_int(void)
85{ 85{
86 return MSP_INT_VPE0_TIMER; 86 return MSP_INT_VPE0_TIMER;
87} 87}
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 92d2f9298e38..9d773a639513 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
139 * Atomically reads the value of @v. Note that the guaranteed 139 * Atomically reads the value of @v. Note that the guaranteed
140 * useful range of an atomic_t is only 24 bits. 140 * useful range of an atomic_t is only 24 bits.
141 */ 141 */
142#define atomic_read(v) ((v)->counter) 142#define atomic_read(v) (ACCESS_ONCE((v)->counter))
143 143
144/** 144/**
145 * atomic_set - set atomic variable 145 * atomic_set - set atomic variable
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 679dee0bbd08..3d6e60dad9d9 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; };
160 160
161#define __get_user_check(x, ptr, size) \ 161#define __get_user_check(x, ptr, size) \
162({ \ 162({ \
163 const __typeof__(ptr) __guc_ptr = (ptr); \
163 int _e; \ 164 int _e; \
164 if (likely(__access_ok((unsigned long) (ptr), (size)))) \ 165 if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \
165 _e = __get_user_nocheck((x), (ptr), (size)); \ 166 _e = __get_user_nocheck((x), __guc_ptr, (size)); \
166 else { \ 167 else { \
167 _e = -EFAULT; \ 168 _e = -EFAULT; \
168 (x) = (__typeof__(x))0; \ 169 (x) = (__typeof__(x))0; \
diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c
index a8933a60b2d4..a6b63dde603d 100644
--- a/arch/mn10300/mm/cache-inv-icache.c
+++ b/arch/mn10300/mm/cache-inv-icache.c
@@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end)
69 69
70 /* invalidate the icache coverage on that region */ 70 /* invalidate the icache coverage on that region */
71 mn10300_local_icache_inv_range2(addr + off, size); 71 mn10300_local_icache_inv_range2(addr + off, size);
72 smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); 72 smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
73} 73}
74 74
75/** 75/**
@@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
101 * directly */ 101 * directly */
102 start_page = (start >= 0x80000000UL) ? start : 0x80000000UL; 102 start_page = (start >= 0x80000000UL) ? start : 0x80000000UL;
103 mn10300_icache_inv_range(start_page, end); 103 mn10300_icache_inv_range(start_page, end);
104 smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); 104 smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
105 if (start_page == start) 105 if (start_page == start)
106 goto done; 106 goto done;
107 end = start_page; 107 end = start_page;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 30394081d9b6..6ab9580b0b00 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
185 int16_t f_pad; 185 int16_t f_pad;
186}; 186};
187 187
188static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) 188static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
189{ 189{
190 struct kstatfs st; 190 struct hpux_statfs buf;
191 int retval; 191 memset(&buf, 0, sizeof(buf));
192 192 buf.f_type = st->f_type;
193 retval = vfs_statfs(path, &st); 193 buf.f_bsize = st->f_bsize;
194 if (retval) 194 buf.f_blocks = st->f_blocks;
195 return retval; 195 buf.f_bfree = st->f_bfree;
196 196 buf.f_bavail = st->f_bavail;
197 memset(buf, 0, sizeof(*buf)); 197 buf.f_files = st->f_files;
198 buf->f_type = st.f_type; 198 buf.f_ffree = st->f_ffree;
199 buf->f_bsize = st.f_bsize; 199 buf.f_fsid[0] = st->f_fsid.val[0];
200 buf->f_blocks = st.f_blocks; 200 buf.f_fsid[1] = st->f_fsid.val[1];
201 buf->f_bfree = st.f_bfree; 201 if (copy_to_user(p, &buf, sizeof(buf)))
202 buf->f_bavail = st.f_bavail; 202 return -EFAULT;
203 buf->f_files = st.f_files;
204 buf->f_ffree = st.f_ffree;
205 buf->f_fsid[0] = st.f_fsid.val[0];
206 buf->f_fsid[1] = st.f_fsid.val[1];
207
208 return 0; 203 return 0;
209} 204}
210 205
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
212asmlinkage long hpux_statfs(const char __user *pathname, 207asmlinkage long hpux_statfs(const char __user *pathname,
213 struct hpux_statfs __user *buf) 208 struct hpux_statfs __user *buf)
214{ 209{
215 struct path path; 210 struct kstatfs st;
216 int error; 211 int error = user_statfs(pathname, &st);
217 212 if (!error)
218 error = user_path(pathname, &path); 213 error = do_statfs_hpux(&st, buf);
219 if (!error) {
220 struct hpux_statfs tmp;
221 error = do_statfs_hpux(&path, &tmp);
222 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
223 error = -EFAULT;
224 path_put(&path);
225 }
226 return error; 214 return error;
227} 215}
228 216
229asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) 217asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
230{ 218{
231 struct file *file; 219 struct kstatfs st;
232 struct hpux_statfs tmp; 220 int error = fd_statfs(fd, &st);
233 int error; 221 if (!error)
234 222 error = do_statfs_hpux(&st, buf);
235 error = -EBADF;
236 file = fget(fd);
237 if (!file)
238 goto out;
239 error = do_statfs_hpux(&file->f_path, &tmp);
240 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
241 error = -EFAULT;
242 fput(file);
243 out:
244 return error; 223 return error;
245} 224}
246 225
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55ef..67a33cc27ef2 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -8,7 +8,7 @@
8#include <asm/errno.h> 8#include <asm/errno.h>
9 9
10static inline int 10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 11futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
12{ 12{
13 int op = (encoded_op >> 28) & 7; 13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15; 14 int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 19 oparg = 1 << oparg;
20 20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
22 return -EFAULT; 22 return -EFAULT;
23 23
24 pagefault_disable(); 24 pagefault_disable();
@@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
51 51
52/* Non-atomic version */ 52/* Non-atomic version */
53static inline int 53static inline int
54futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 54futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
55 u32 oldval, u32 newval)
55{ 56{
56 int err = 0; 57 u32 val;
57 int uval;
58 58
59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is 59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
60 * our gateway page, and causes no end of trouble... 60 * our gateway page, and causes no end of trouble...
@@ -62,15 +62,15 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
62 if (segment_eq(KERNEL_DS, get_fs()) && !uaddr) 62 if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
63 return -EFAULT; 63 return -EFAULT;
64 64
65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
66 return -EFAULT; 66 return -EFAULT;
67 67
68 err = get_user(uval, uaddr); 68 if (get_user(val, uaddr))
69 if (err) return -EFAULT; 69 return -EFAULT;
70 if (uval == oldval) 70 if (val == oldval && put_user(newval, uaddr))
71 err = put_user(newval, uaddr); 71 return -EFAULT;
72 if (err) return -EFAULT; 72 *uval = val;
73 return uval; 73 return 0;
74} 74}
75 75
76#endif /*__KERNEL__*/ 76#endif /*__KERNEL__*/
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 7c589ef81fb0..c94e4a3fe2ef 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -30,7 +30,7 @@
30 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ 30 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
31 : "cr0", "memory") 31 : "cr0", "memory")
32 32
33static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 33static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7; 35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15; 36 int cmp = (encoded_op >> 24) & 15;
@@ -40,7 +40,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
40 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 40 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
41 oparg = 1 << oparg; 41 oparg = 1 << oparg;
42 42
43 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 43 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
44 return -EFAULT; 44 return -EFAULT;
45 45
46 pagefault_disable(); 46 pagefault_disable();
@@ -82,35 +82,38 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
82} 82}
83 83
84static inline int 84static inline int
85futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 85futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
86 u32 oldval, u32 newval)
86{ 87{
87 int prev; 88 int ret = 0;
89 u32 prev;
88 90
89 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 91 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
90 return -EFAULT; 92 return -EFAULT;
91 93
92 __asm__ __volatile__ ( 94 __asm__ __volatile__ (
93 PPC_RELEASE_BARRIER 95 PPC_RELEASE_BARRIER
94"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\ 96"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
95 cmpw 0,%0,%3\n\ 97 cmpw 0,%1,%4\n\
96 bne- 3f\n" 98 bne- 3f\n"
97 PPC405_ERR77(0,%2) 99 PPC405_ERR77(0,%3)
98"2: stwcx. %4,0,%2\n\ 100"2: stwcx. %5,0,%3\n\
99 bne- 1b\n" 101 bne- 1b\n"
100 PPC_ACQUIRE_BARRIER 102 PPC_ACQUIRE_BARRIER
101"3: .section .fixup,\"ax\"\n\ 103"3: .section .fixup,\"ax\"\n\
1024: li %0,%5\n\ 1044: li %0,%6\n\
103 b 3b\n\ 105 b 3b\n\
104 .previous\n\ 106 .previous\n\
105 .section __ex_table,\"a\"\n\ 107 .section __ex_table,\"a\"\n\
106 .align 3\n\ 108 .align 3\n\
107 " PPC_LONG "1b,4b,2b,4b\n\ 109 " PPC_LONG "1b,4b,2b,4b\n\
108 .previous" \ 110 .previous" \
109 : "=&r" (prev), "+m" (*uaddr) 111 : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
110 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) 112 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
111 : "cc", "memory"); 113 : "cc", "memory");
112 114
113 return prev; 115 *uval = prev;
116 return ret;
114} 117}
115 118
116#endif /* __KERNEL__ */ 119#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 380d48bacd16..26b8c807f8f1 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -33,9 +33,25 @@
33// 33//
34//---------------------------------------------------------------------------- 34//----------------------------------------------------------------------------
35#include <linux/cache.h> 35#include <linux/cache.h>
36#include <linux/threads.h>
36#include <asm/types.h> 37#include <asm/types.h>
37#include <asm/mmu.h> 38#include <asm/mmu.h>
38 39
40/*
41 * We only have to have statically allocated lppaca structs on
42 * legacy iSeries, which supports at most 64 cpus.
43 */
44#ifdef CONFIG_PPC_ISERIES
45#if NR_CPUS < 64
46#define NR_LPPACAS NR_CPUS
47#else
48#define NR_LPPACAS 64
49#endif
50#else /* not iSeries */
51#define NR_LPPACAS 1
52#endif
53
54
39/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k 55/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
40 * alignment is sufficient to prevent this */ 56 * alignment is sufficient to prevent this */
41struct lppaca { 57struct lppaca {
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
13 * by Paul Mackerras <paulus@samba.org>. 13 * by Paul Mackerras <paulus@samba.org>.
14 */ 14 */
15 15
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <asm/atomic.h>
19#include <asm/system.h>
20
21/* 16/*
22 * the semaphore definition 17 * the semaphore definition
23 */ 18 */
@@ -33,47 +28,6 @@
33#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 28#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
34#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 29#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
35 30
36struct rw_semaphore {
37 long count;
38 spinlock_t wait_lock;
39 struct list_head wait_list;
40#ifdef CONFIG_DEBUG_LOCK_ALLOC
41 struct lockdep_map dep_map;
42#endif
43};
44
45#ifdef CONFIG_DEBUG_LOCK_ALLOC
46# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
47#else
48# define __RWSEM_DEP_MAP_INIT(lockname)
49#endif
50
51#define __RWSEM_INITIALIZER(name) \
52{ \
53 RWSEM_UNLOCKED_VALUE, \
54 __SPIN_LOCK_UNLOCKED((name).wait_lock), \
55 LIST_HEAD_INIT((name).wait_list) \
56 __RWSEM_DEP_MAP_INIT(name) \
57}
58
59#define DECLARE_RWSEM(name) \
60 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
61
62extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
63extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
64extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
65extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
66
67extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
68 struct lock_class_key *key);
69
70#define init_rwsem(sem) \
71 do { \
72 static struct lock_class_key __key; \
73 \
74 __init_rwsem((sem), #sem, &__key); \
75 } while (0)
76
77/* 31/*
78 * lock for reading 32 * lock for reading
79 */ 33 */
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
174 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); 128 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
175} 129}
176 130
177static inline int rwsem_is_locked(struct rw_semaphore *sem)
178{
179 return sem->count != 0;
180}
181
182#endif /* __KERNEL__ */ 131#endif /* __KERNEL__ */
183#endif /* _ASM_POWERPC_RWSEM_H */ 132#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ebf9846f3c3b..f4adf89d7614 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,20 +27,6 @@ extern unsigned long __toc_start;
27#ifdef CONFIG_PPC_BOOK3S 27#ifdef CONFIG_PPC_BOOK3S
28 28
29/* 29/*
30 * We only have to have statically allocated lppaca structs on
31 * legacy iSeries, which supports at most 64 cpus.
32 */
33#ifdef CONFIG_PPC_ISERIES
34#if NR_CPUS < 64
35#define NR_LPPACAS NR_CPUS
36#else
37#define NR_LPPACAS 64
38#endif
39#else /* not iSeries */
40#define NR_LPPACAS 1
41#endif
42
43/*
44 * The structure which the hypervisor knows about - this structure 30 * The structure which the hypervisor knows about - this structure
45 * should not cross a page boundary. The vpa_init/register_vpa call 31 * should not cross a page boundary. The vpa_init/register_vpa call
46 * is now known to fail if the lppaca structure crosses a page 32 * is now known to fail if the lppaca structure crosses a page
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fd4812329570..0dc95c0aa3be 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1516,7 +1516,8 @@ int start_topology_update(void)
1516{ 1516{
1517 int rc = 0; 1517 int rc = 0;
1518 1518
1519 if (firmware_has_feature(FW_FEATURE_VPHN) && 1519 /* Disabled until races with load balancing are fixed */
1520 if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
1520 get_lppaca()->shared_proc) { 1521 get_lppaca()->shared_proc) {
1521 vphn_enabled = 1; 1522 vphn_enabled = 1;
1522 setup_cpu_associativity_change_counters(); 1523 setup_cpu_associativity_change_counters();
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d32f86a..a3d2ce54ea2e 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
70 if (!IS_ERR(tmp)) { 70 if (!IS_ERR(tmp)) {
71 struct nameidata nd; 71 struct nameidata nd;
72 72
73 ret = path_lookup(tmp, LOOKUP_PARENT, &nd); 73 ret = kern_path_parent(tmp, &nd);
74 if (!ret) { 74 if (!ret) {
75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; 75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
76 ret = spufs_create(&nd, flags, mode, neighbor); 76 ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index fdb7384c0c4f..f0491cc28900 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
242 pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */ 242 pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */
243 pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE); 243 pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
244 244
245 for (i = 0; i < NR_CPUS; i++) { 245 for (i = 0; i < NR_LPPACAS; i++) {
246 if (lppaca_of(i).dyn_proc_status >= 2) 246 if (lppaca[i].dyn_proc_status >= 2)
247 continue; 247 continue;
248 248
249 snprintf(p, 32 - (p - buf), "@%d", i); 249 snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
251 251
252 dt_prop_str(dt, "device_type", device_type_cpu); 252 dt_prop_str(dt, "device_type", device_type_cpu);
253 253
254 index = lppaca_of(i).dyn_hv_phys_proc_index; 254 index = lppaca[i].dyn_hv_phys_proc_index;
255 d = &xIoHriProcessorVpd[index]; 255 d = &xIoHriProcessorVpd[index];
256 256
257 dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); 257 dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b0863410517f..2946ae10fbfd 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void)
680 * on but calling this function multiple times is fine. 680 * on but calling this function multiple times is fine.
681 */ 681 */
682 identify_cpu(0, mfspr(SPRN_PVR)); 682 identify_cpu(0, mfspr(SPRN_PVR));
683 initialise_paca(&boot_paca, 0);
683 684
684 powerpc_firmware_features |= FW_FEATURE_ISERIES; 685 powerpc_firmware_features |= FW_FEATURE_ISERIES;
685 powerpc_firmware_features |= FW_FEATURE_LPAR; 686 powerpc_firmware_features |= FW_FEATURE_LPAR;
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5c5d02de49e9..81cf36b691f1 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -7,7 +7,7 @@
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <asm/errno.h> 8#include <asm/errno.h>
9 9
10static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 10static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
11{ 11{
12 int op = (encoded_op >> 28) & 7; 12 int op = (encoded_op >> 28) & 7;
13 int cmp = (encoded_op >> 24) & 15; 13 int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 19 oparg = 1 << oparg;
20 20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
22 return -EFAULT; 22 return -EFAULT;
23 23
24 pagefault_disable(); 24 pagefault_disable();
@@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
39 return ret; 39 return ret;
40} 40}
41 41
42static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, 42static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
43 int oldval, int newval) 43 u32 oldval, u32 newval)
44{ 44{
45 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 45 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
46 return -EFAULT; 46 return -EFAULT;
47 47
48 return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); 48 return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
49} 49}
50 50
51#endif /* __KERNEL__ */ 51#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
43 43
44#ifdef __KERNEL__ 44#ifdef __KERNEL__
45 45
46#include <linux/list.h>
47#include <linux/spinlock.h>
48
49struct rwsem_waiter;
50
51extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
52extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
53extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
54extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
55extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
56
57/*
58 * the semaphore definition
59 */
60struct rw_semaphore {
61 signed long count;
62 spinlock_t wait_lock;
63 struct list_head wait_list;
64#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 struct lockdep_map dep_map;
66#endif
67};
68
69#ifndef __s390x__ 46#ifndef __s390x__
70#define RWSEM_UNLOCKED_VALUE 0x00000000 47#define RWSEM_UNLOCKED_VALUE 0x00000000
71#define RWSEM_ACTIVE_BIAS 0x00000001 48#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
81#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 58#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
82 59
83/* 60/*
84 * initialisation
85 */
86
87#ifdef CONFIG_DEBUG_LOCK_ALLOC
88# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
89#else
90# define __RWSEM_DEP_MAP_INIT(lockname)
91#endif
92
93#define __RWSEM_INITIALIZER(name) \
94 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
95 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
96
97#define DECLARE_RWSEM(name) \
98 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
99
100static inline void init_rwsem(struct rw_semaphore *sem)
101{
102 sem->count = RWSEM_UNLOCKED_VALUE;
103 spin_lock_init(&sem->wait_lock);
104 INIT_LIST_HEAD(&sem->wait_list);
105}
106
107extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
108 struct lock_class_key *key);
109
110#define init_rwsem(sem) \
111do { \
112 static struct lock_class_key __key; \
113 \
114 __init_rwsem((sem), #sem, &__key); \
115} while (0)
116
117
118/*
119 * lock for reading 61 * lock for reading
120 */ 62 */
121static inline void __down_read(struct rw_semaphore *sem) 63static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
377 return new; 319 return new;
378} 320}
379 321
380static inline int rwsem_is_locked(struct rw_semaphore *sem)
381{
382 return (sem->count != 0);
383}
384
385#endif /* __KERNEL__ */ 322#endif /* __KERNEL__ */
386#endif /* _S390_RWSEM_H */ 323#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d6b1ed0ec52b..2d9ea11f919a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,8 +83,8 @@ struct uaccess_ops {
83 size_t (*clear_user)(size_t, void __user *); 83 size_t (*clear_user)(size_t, void __user *);
84 size_t (*strnlen_user)(size_t, const char __user *); 84 size_t (*strnlen_user)(size_t, const char __user *);
85 size_t (*strncpy_from_user)(size_t, const char __user *, char *); 85 size_t (*strncpy_from_user)(size_t, const char __user *, char *);
86 int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); 86 int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
87 int (*futex_atomic_cmpxchg)(int __user *, int old, int new); 87 int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
88}; 88};
89 89
90extern struct uaccess_ops uaccess; 90extern struct uaccess_ops uaccess;
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 126011df14f1..1d2536cb630b 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *);
12extern size_t copy_to_user_std(size_t, void __user *, const void *); 12extern size_t copy_to_user_std(size_t, void __user *, const void *);
13extern size_t strnlen_user_std(size_t, const char __user *); 13extern size_t strnlen_user_std(size_t, const char __user *);
14extern size_t strncpy_from_user_std(size_t, const char __user *, char *); 14extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
15extern int futex_atomic_cmpxchg_std(int __user *, int, int); 15extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
16extern int futex_atomic_op_std(int, int __user *, int, int *); 16extern int futex_atomic_op_std(int, u32 __user *, int, int *);
17 17
18extern size_t copy_from_user_pt(size_t, const void __user *, void *); 18extern size_t copy_from_user_pt(size_t, const void __user *, void *);
19extern size_t copy_to_user_pt(size_t, void __user *, const void *); 19extern size_t copy_to_user_pt(size_t, void __user *, const void *);
20extern int futex_atomic_op_pt(int, int __user *, int, int *); 20extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
21extern int futex_atomic_cmpxchg_pt(int __user *, int, int); 21extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
22 22
23#endif /* __ARCH_S390_LIB_UACCESS_H */ 23#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 404f2de296dc..74833831417f 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,7 +302,7 @@ fault:
302 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 302 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
303 "m" (*uaddr) : "cc" ); 303 "m" (*uaddr) : "cc" );
304 304
305static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) 305static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
306{ 306{
307 int oldval = 0, newval, ret; 307 int oldval = 0, newval, ret;
308 308
@@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
335 return ret; 335 return ret;
336} 336}
337 337
338int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) 338int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
339{ 339{
340 int ret; 340 int ret;
341 341
@@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
354 return ret; 354 return ret;
355} 355}
356 356
357static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) 357static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
358 u32 oldval, u32 newval)
358{ 359{
359 int ret; 360 int ret;
360 361
361 asm volatile("0: cs %1,%4,0(%5)\n" 362 asm volatile("0: cs %1,%4,0(%5)\n"
362 "1: lr %0,%1\n" 363 "1: la %0,0\n"
363 "2:\n" 364 "2:\n"
364 EX_TABLE(0b,2b) EX_TABLE(1b,2b) 365 EX_TABLE(0b,2b) EX_TABLE(1b,2b)
365 : "=d" (ret), "+d" (oldval), "=m" (*uaddr) 366 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
366 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) 367 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
367 : "cc", "memory" ); 368 : "cc", "memory" );
369 *uval = oldval;
368 return ret; 370 return ret;
369} 371}
370 372
371int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) 373int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
374 u32 oldval, u32 newval)
372{ 375{
373 int ret; 376 int ret;
374 377
375 if (segment_eq(get_fs(), KERNEL_DS)) 378 if (segment_eq(get_fs(), KERNEL_DS))
376 return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); 379 return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
377 spin_lock(&current->mm->page_table_lock); 380 spin_lock(&current->mm->page_table_lock);
378 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); 381 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
379 if (!uaddr) { 382 if (!uaddr) {
@@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
382 } 385 }
383 get_page(virt_to_page(uaddr)); 386 get_page(virt_to_page(uaddr));
384 spin_unlock(&current->mm->page_table_lock); 387 spin_unlock(&current->mm->page_table_lock);
385 ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); 388 ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
386 put_page(virt_to_page(uaddr)); 389 put_page(virt_to_page(uaddr));
387 return ret; 390 return ret;
388} 391}
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index a6c4f7ed24a4..bb1a7eed42ce 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
255 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 255 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
256 "m" (*uaddr) : "cc"); 256 "m" (*uaddr) : "cc");
257 257
258int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) 258int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
259{ 259{
260 int oldval = 0, newval, ret; 260 int oldval = 0, newval, ret;
261 261
@@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
287 return ret; 287 return ret;
288} 288}
289 289
290int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) 290int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
291 u32 oldval, u32 newval)
291{ 292{
292 int ret; 293 int ret;
293 294
294 asm volatile( 295 asm volatile(
295 " sacf 256\n" 296 " sacf 256\n"
296 "0: cs %1,%4,0(%5)\n" 297 "0: cs %1,%4,0(%5)\n"
297 "1: lr %0,%1\n" 298 "1: la %0,0\n"
298 "2: sacf 0\n" 299 "2: sacf 0\n"
299 EX_TABLE(0b,2b) EX_TABLE(1b,2b) 300 EX_TABLE(0b,2b) EX_TABLE(1b,2b)
300 : "=d" (ret), "+d" (oldval), "=m" (*uaddr) 301 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
301 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) 302 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
302 : "cc", "memory" ); 303 : "cc", "memory" );
304 *uval = oldval;
303 return ret; 305 return ret;
304} 306}
305 307
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index a9f16a7f9aea..6cb9f193a95e 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -3,7 +3,7 @@
3 3
4#include <asm/system.h> 4#include <asm/system.h>
5 5
6static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, 6static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
7 int *oldval) 7 int *oldval)
8{ 8{
9 unsigned long flags; 9 unsigned long flags;
@@ -20,7 +20,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
20 return ret; 20 return ret;
21} 21}
22 22
23static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, 23static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
24 int *oldval) 24 int *oldval)
25{ 25{
26 unsigned long flags; 26 unsigned long flags;
@@ -37,7 +37,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
37 return ret; 37 return ret;
38} 38}
39 39
40static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, 40static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
41 int *oldval) 41 int *oldval)
42{ 42{
43 unsigned long flags; 43 unsigned long flags;
@@ -54,7 +54,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, 57static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
58 int *oldval) 58 int *oldval)
59{ 59{
60 unsigned long flags; 60 unsigned long flags;
@@ -71,7 +71,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
71 return ret; 71 return ret;
72} 72}
73 73
74static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, 74static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
75 int *oldval) 75 int *oldval)
76{ 76{
77 unsigned long flags; 77 unsigned long flags;
@@ -88,11 +88,13 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
88 return ret; 88 return ret;
89} 89}
90 90
91static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, 91static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
92 int oldval, int newval) 92 u32 __user *uaddr,
93 u32 oldval, u32 newval)
93{ 94{
94 unsigned long flags; 95 unsigned long flags;
95 int ret, prev = 0; 96 int ret;
97 u32 prev = 0;
96 98
97 local_irq_save(flags); 99 local_irq_save(flags);
98 100
@@ -102,10 +104,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
102 104
103 local_irq_restore(flags); 105 local_irq_restore(flags);
104 106
105 if (ret) 107 *uval = prev;
106 return ret; 108 return ret;
107
108 return prev;
109} 109}
110 110
111#endif /* __ASM_SH_FUTEX_IRQ_H */ 111#endif /* __ASM_SH_FUTEX_IRQ_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 68256ec5fa35..7be39a646fbd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -10,7 +10,7 @@
10/* XXX: UP variants, fix for SH-4A and SMP.. */ 10/* XXX: UP variants, fix for SH-4A and SMP.. */
11#include <asm/futex-irq.h> 11#include <asm/futex-irq.h>
12 12
13static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 13static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
14{ 14{
15 int op = (encoded_op >> 28) & 7; 15 int op = (encoded_op >> 28) & 7;
16 int cmp = (encoded_op >> 24) & 15; 16 int cmp = (encoded_op >> 24) & 15;
@@ -21,7 +21,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
22 oparg = 1 << oparg; 22 oparg = 1 << oparg;
23 23
24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
25 return -EFAULT; 25 return -EFAULT;
26 26
27 pagefault_disable(); 27 pagefault_disable();
@@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
65} 65}
66 66
67static inline int 67static inline int
68futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 68futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
69 u32 oldval, u32 newval)
69{ 70{
70 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 71 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
71 return -EFAULT; 72 return -EFAULT;
72 73
73 return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval); 74 return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
74} 75}
75 76
76#endif /* __KERNEL__ */ 77#endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
11#endif 11#endif
12 12
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18 14
19/*
20 * the semaphore definition
21 */
22struct rw_semaphore {
23 long count;
24#define RWSEM_UNLOCKED_VALUE 0x00000000 15#define RWSEM_UNLOCKED_VALUE 0x00000000
25#define RWSEM_ACTIVE_BIAS 0x00000001 16#define RWSEM_ACTIVE_BIAS 0x00000001
26#define RWSEM_ACTIVE_MASK 0x0000ffff 17#define RWSEM_ACTIVE_MASK 0x0000ffff
27#define RWSEM_WAITING_BIAS (-0x00010000) 18#define RWSEM_WAITING_BIAS (-0x00010000)
28#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 19#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
29#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 20#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
30 spinlock_t wait_lock;
31 struct list_head wait_list;
32#ifdef CONFIG_DEBUG_LOCK_ALLOC
33 struct lockdep_map dep_map;
34#endif
35};
36
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
39#else
40# define __RWSEM_DEP_MAP_INIT(lockname)
41#endif
42
43#define __RWSEM_INITIALIZER(name) \
44 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
45 LIST_HEAD_INIT((name).wait_list) \
46 __RWSEM_DEP_MAP_INIT(name) }
47
48#define DECLARE_RWSEM(name) \
49 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
50
51extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
52extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
53extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
54extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
55
56extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
57 struct lock_class_key *key);
58
59#define init_rwsem(sem) \
60do { \
61 static struct lock_class_key __key; \
62 \
63 __init_rwsem((sem), #sem, &__key); \
64} while (0)
65
66static inline void init_rwsem(struct rw_semaphore *sem)
67{
68 sem->count = RWSEM_UNLOCKED_VALUE;
69 spin_lock_init(&sem->wait_lock);
70 INIT_LIST_HEAD(&sem->wait_list);
71}
72 21
73/* 22/*
74 * lock for reading 23 * lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
179 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 128 return atomic_add_return(delta, (atomic_t *)(&sem->count));
180} 129}
181 130
182static inline int rwsem_is_locked(struct rw_semaphore *sem)
183{
184 return (sem->count != 0);
185}
186
187#endif /* __KERNEL__ */ 131#endif /* __KERNEL__ */
188#endif /* _ASM_SH_RWSEM_H */ 132#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 47f95839dc69..444e7bea23bc 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -30,7 +30,7 @@
30 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ 30 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
31 : "memory") 31 : "memory")
32 32
33static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 33static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7; 35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15; 36 int cmp = (encoded_op >> 24) & 15;
@@ -38,7 +38,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
38 int cmparg = (encoded_op << 20) >> 20; 38 int cmparg = (encoded_op << 20) >> 20;
39 int oldval = 0, ret, tem; 39 int oldval = 0, ret, tem;
40 40
41 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))) 41 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
42 return -EFAULT; 42 return -EFAULT;
43 if (unlikely((((unsigned long) uaddr) & 0x3UL))) 43 if (unlikely((((unsigned long) uaddr) & 0x3UL)))
44 return -EINVAL; 44 return -EINVAL;
@@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
85} 85}
86 86
87static inline int 87static inline int
88futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 88futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
89 u32 oldval, u32 newval)
89{ 90{
91 int ret = 0;
92
90 __asm__ __volatile__( 93 __asm__ __volatile__(
91 "\n1: casa [%3] %%asi, %2, %0\n" 94 "\n1: casa [%4] %%asi, %3, %1\n"
92 "2:\n" 95 "2:\n"
93 " .section .fixup,#alloc,#execinstr\n" 96 " .section .fixup,#alloc,#execinstr\n"
94 " .align 4\n" 97 " .align 4\n"
95 "3: sethi %%hi(2b), %0\n" 98 "3: sethi %%hi(2b), %0\n"
96 " jmpl %0 + %%lo(2b), %%g0\n" 99 " jmpl %0 + %%lo(2b), %%g0\n"
97 " mov %4, %0\n" 100 " mov %5, %0\n"
98 " .previous\n" 101 " .previous\n"
99 " .section __ex_table,\"a\"\n" 102 " .section __ex_table,\"a\"\n"
100 " .align 4\n" 103 " .align 4\n"
101 " .word 1b, 3b\n" 104 " .word 1b, 3b\n"
102 " .previous\n" 105 " .previous\n"
103 : "=r" (newval) 106 : "+r" (ret), "=r" (newval)
104 : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) 107 : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
105 : "memory"); 108 : "memory");
106 109
107 return newval; 110 *uval = newval;
111 return ret;
108} 112}
109 113
110#endif /* !(_SPARC64_FUTEX_H) */ 114#endif /* !(_SPARC64_FUTEX_H) */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
13 13
14#ifdef __KERNEL__ 14#ifdef __KERNEL__
15 15
16#include <linux/list.h>
17#include <linux/spinlock.h>
18
19struct rwsem_waiter;
20
21struct rw_semaphore {
22 signed long count;
23#define RWSEM_UNLOCKED_VALUE 0x00000000L 16#define RWSEM_UNLOCKED_VALUE 0x00000000L
24#define RWSEM_ACTIVE_BIAS 0x00000001L 17#define RWSEM_ACTIVE_BIAS 0x00000001L
25#define RWSEM_ACTIVE_MASK 0xffffffffL 18#define RWSEM_ACTIVE_MASK 0xffffffffL
26#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) 19#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
27#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 20#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 21#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
29 spinlock_t wait_lock;
30 struct list_head wait_list;
31#ifdef CONFIG_DEBUG_LOCK_ALLOC
32 struct lockdep_map dep_map;
33#endif
34};
35
36#ifdef CONFIG_DEBUG_LOCK_ALLOC
37# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
38#else
39# define __RWSEM_DEP_MAP_INIT(lockname)
40#endif
41
42#define __RWSEM_INITIALIZER(name) \
43{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
44 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
45
46#define DECLARE_RWSEM(name) \
47 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
48
49extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
50extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
51extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
52extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
53
54extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
55 struct lock_class_key *key);
56
57#define init_rwsem(sem) \
58do { \
59 static struct lock_class_key __key; \
60 \
61 __init_rwsem((sem), #sem, &__key); \
62} while (0)
63 22
64/* 23/*
65 * lock for reading 24 * lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
160 return atomic64_add_return(delta, (atomic64_t *)(&sem->count)); 119 return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
161} 120}
162 121
163static inline int rwsem_is_locked(struct rw_semaphore *sem)
164{
165 return (sem->count != 0);
166}
167
168#endif /* __KERNEL__ */ 122#endif /* __KERNEL__ */
169 123
170#endif /* _SPARC64_RWSEM_H */ 124#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
16#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)]) 16#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
17 17
18spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { 18spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
19 [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED 19 [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
20}; 20};
21 21
22#else /* SMP */ 22#else /* SMP */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index fe0d10dcae57..d03ec124a598 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -29,16 +29,16 @@
29#include <linux/uaccess.h> 29#include <linux/uaccess.h>
30#include <linux/errno.h> 30#include <linux/errno.h>
31 31
32extern struct __get_user futex_set(int __user *v, int i); 32extern struct __get_user futex_set(u32 __user *v, int i);
33extern struct __get_user futex_add(int __user *v, int n); 33extern struct __get_user futex_add(u32 __user *v, int n);
34extern struct __get_user futex_or(int __user *v, int n); 34extern struct __get_user futex_or(u32 __user *v, int n);
35extern struct __get_user futex_andn(int __user *v, int n); 35extern struct __get_user futex_andn(u32 __user *v, int n);
36extern struct __get_user futex_cmpxchg(int __user *v, int o, int n); 36extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
37 37
38#ifndef __tilegx__ 38#ifndef __tilegx__
39extern struct __get_user futex_xor(int __user *v, int n); 39extern struct __get_user futex_xor(u32 __user *v, int n);
40#else 40#else
41static inline struct __get_user futex_xor(int __user *uaddr, int n) 41static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
42{ 42{
43 struct __get_user asm_ret = __get_user_4(uaddr); 43 struct __get_user asm_ret = __get_user_4(uaddr);
44 if (!asm_ret.err) { 44 if (!asm_ret.err) {
@@ -53,7 +53,7 @@ static inline struct __get_user futex_xor(int __user *uaddr, int n)
53} 53}
54#endif 54#endif
55 55
56static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 56static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
57{ 57{
58 int op = (encoded_op >> 28) & 7; 58 int op = (encoded_op >> 28) & 7;
59 int cmp = (encoded_op >> 24) & 15; 59 int cmp = (encoded_op >> 24) & 15;
@@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
65 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 65 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
66 oparg = 1 << oparg; 66 oparg = 1 << oparg;
67 67
68 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 68 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
69 return -EFAULT; 69 return -EFAULT;
70 70
71 pagefault_disable(); 71 pagefault_disable();
@@ -119,16 +119,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
119 return ret; 119 return ret;
120} 120}
121 121
122static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, 122static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
123 int newval) 123 u32 oldval, u32 newval)
124{ 124{
125 struct __get_user asm_ret; 125 struct __get_user asm_ret;
126 126
127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
128 return -EFAULT; 128 return -EFAULT;
129 129
130 asm_ret = futex_cmpxchg(uaddr, oldval, newval); 130 asm_ret = futex_cmpxchg(uaddr, oldval, newval);
131 return asm_ret.err ? asm_ret.err : asm_ret.val; 131 *uval = asm_ret.val;
132 return asm_ret.err;
132} 133}
133 134
134#ifndef __tilegx__ 135#ifndef __tilegx__
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b23dcf..c70e047eed72 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
124#if 0 124#if 0
125void mconsole_proc(struct mc_request *req) 125void mconsole_proc(struct mc_request *req)
126{ 126{
127 struct nameidata nd;
128 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 127 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
129 struct file *file; 128 struct file *file;
130 int n, err; 129 int n;
131 char *ptr = req->request.data, *buf; 130 char *ptr = req->request.data, *buf;
132 mm_segment_t old_fs = get_fs(); 131 mm_segment_t old_fs = get_fs();
133 132
134 ptr += strlen("proc"); 133 ptr += strlen("proc");
135 ptr = skip_spaces(ptr); 134 ptr = skip_spaces(ptr);
136 135
137 err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); 136 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
138 if (err) {
139 mconsole_reply(req, "Failed to look up file", 1, 0);
140 goto out;
141 }
142
143 err = may_open(&nd.path, MAY_READ, O_RDONLY);
144 if (result) {
145 mconsole_reply(req, "Failed to open file", 1, 0);
146 path_put(&nd.path);
147 goto out;
148 }
149
150 file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
151 current_cred());
152 err = PTR_ERR(file);
153 if (IS_ERR(file)) { 137 if (IS_ERR(file)) {
154 mconsole_reply(req, "Failed to open file", 1, 0); 138 mconsole_reply(req, "Failed to open file", 1, 0);
155 path_put(&nd.path);
156 goto out; 139 goto out;
157 } 140 }
158 141
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
185 .no_cow = 0, \ 185 .no_cow = 0, \
186 .shared = 0, \ 186 .shared = 0, \
187 .cow = DEFAULT_COW, \ 187 .cow = DEFAULT_COW, \
188 .lock = SPIN_LOCK_UNLOCKED, \ 188 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
189 .request = NULL, \ 189 .request = NULL, \
190 .start_sg = 0, \ 190 .start_sg = 0, \
191 .end_sg = 0, \ 191 .end_sg = 0, \
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5fd..46a823882437 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
62 if (fseek(f, -4L, SEEK_END)) { 62 if (fseek(f, -4L, SEEK_END)) {
63 perror(argv[1]); 63 perror(argv[1]);
64 } 64 }
65 fread(&olen, sizeof olen, 1, f); 65
66 if (fread(&olen, sizeof(olen), 1, f) != 1) {
67 perror(argv[1]);
68 return 1;
69 }
70
66 ilen = ftell(f); 71 ilen = ftell(f);
67 olen = getle32(&olen); 72 olen = getle32(&olen);
68 fclose(f); 73 fclose(f);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index f729b2e9679c..f27b709754bf 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -853,4 +853,6 @@ ia32_sys_call_table:
853 .quad sys_fanotify_init 853 .quad sys_fanotify_init
854 .quad sys32_fanotify_mark 854 .quad sys32_fanotify_mark
855 .quad sys_prlimit64 /* 340 */ 855 .quad sys_prlimit64 /* 340 */
856 .quad sys_name_to_handle_at
857 .quad compat_sys_open_by_handle_at
856ia32_syscall_end: 858ia32_syscall_end:
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 000000000000..e656ad8c0a2e
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_CE4100_H_
2#define _ASM_CE4100_H_
3
4int ce4100_pci_init(void);
5
6#endif
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 1f11ce44e956..d09bb03653f0 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -37,7 +37,7 @@
37 "+m" (*uaddr), "=&r" (tem) \ 37 "+m" (*uaddr), "=&r" (tem) \
38 : "r" (oparg), "i" (-EFAULT), "1" (0)) 38 : "r" (oparg), "i" (-EFAULT), "1" (0))
39 39
40static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 40static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
41{ 41{
42 int op = (encoded_op >> 28) & 7; 42 int op = (encoded_op >> 28) & 7;
43 int cmp = (encoded_op >> 24) & 15; 43 int cmp = (encoded_op >> 24) & 15;
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
48 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 48 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
49 oparg = 1 << oparg; 49 oparg = 1 << oparg;
50 50
51 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 51 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
52 return -EFAULT; 52 return -EFAULT;
53 53
54#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 54#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
109 return ret; 109 return ret;
110} 110}
111 111
112static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, 112static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
113 int newval) 113 u32 oldval, u32 newval)
114{ 114{
115 int ret = 0;
115 116
116#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 117#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
117 /* Real i386 machines have no cmpxchg instruction */ 118 /* Real i386 machines have no cmpxchg instruction */
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
119 return -ENOSYS; 120 return -ENOSYS;
120#endif 121#endif
121 122
122 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 123 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
123 return -EFAULT; 124 return -EFAULT;
124 125
125 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" 126 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
126 "2:\t.section .fixup, \"ax\"\n" 127 "2:\t.section .fixup, \"ax\"\n"
127 "3:\tmov %2, %0\n" 128 "3:\tmov %3, %0\n"
128 "\tjmp 2b\n" 129 "\tjmp 2b\n"
129 "\t.previous\n" 130 "\t.previous\n"
130 _ASM_EXTABLE(1b, 3b) 131 _ASM_EXTABLE(1b, 3b)
131 : "=a" (oldval), "+m" (*uaddr) 132 : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
132 : "i" (-EFAULT), "r" (newval), "0" (oldval) 133 : "i" (-EFAULT), "r" (newval), "1" (oldval)
133 : "memory" 134 : "memory"
134 ); 135 );
135 136
136 return oldval; 137 *uval = oldval;
138 return ret;
137} 139}
138 140
139#endif 141#endif
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
37#endif 37#endif
38 38
39#ifdef __KERNEL__ 39#ifdef __KERNEL__
40
41#include <linux/list.h>
42#include <linux/spinlock.h>
43#include <linux/lockdep.h>
44#include <asm/asm.h> 40#include <asm/asm.h>
45 41
46struct rwsem_waiter;
47
48extern asmregparm struct rw_semaphore *
49 rwsem_down_read_failed(struct rw_semaphore *sem);
50extern asmregparm struct rw_semaphore *
51 rwsem_down_write_failed(struct rw_semaphore *sem);
52extern asmregparm struct rw_semaphore *
53 rwsem_wake(struct rw_semaphore *);
54extern asmregparm struct rw_semaphore *
55 rwsem_downgrade_wake(struct rw_semaphore *sem);
56
57/* 42/*
58 * the semaphore definition
59 *
60 * The bias values and the counter type limits the number of 43 * The bias values and the counter type limits the number of
61 * potential readers/writers to 32767 for 32 bits and 2147483647 44 * potential readers/writers to 32767 for 32 bits and 2147483647
62 * for 64 bits. 45 * for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
74#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 57#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
75#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 58#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
76 59
77typedef signed long rwsem_count_t;
78
79struct rw_semaphore {
80 rwsem_count_t count;
81 spinlock_t wait_lock;
82 struct list_head wait_list;
83#ifdef CONFIG_DEBUG_LOCK_ALLOC
84 struct lockdep_map dep_map;
85#endif
86};
87
88#ifdef CONFIG_DEBUG_LOCK_ALLOC
89# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
90#else
91# define __RWSEM_DEP_MAP_INIT(lockname)
92#endif
93
94
95#define __RWSEM_INITIALIZER(name) \
96{ \
97 RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
98 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
99}
100
101#define DECLARE_RWSEM(name) \
102 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
103
104extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
105 struct lock_class_key *key);
106
107#define init_rwsem(sem) \
108do { \
109 static struct lock_class_key __key; \
110 \
111 __init_rwsem((sem), #sem, &__key); \
112} while (0)
113
114/* 60/*
115 * lock for reading 61 * lock for reading
116 */ 62 */
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
133 */ 79 */
134static inline int __down_read_trylock(struct rw_semaphore *sem) 80static inline int __down_read_trylock(struct rw_semaphore *sem)
135{ 81{
136 rwsem_count_t result, tmp; 82 long result, tmp;
137 asm volatile("# beginning __down_read_trylock\n\t" 83 asm volatile("# beginning __down_read_trylock\n\t"
138 " mov %0,%1\n\t" 84 " mov %0,%1\n\t"
139 "1:\n\t" 85 "1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
155 */ 101 */
156static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) 102static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
157{ 103{
158 rwsem_count_t tmp; 104 long tmp;
159 asm volatile("# beginning down_write\n\t" 105 asm volatile("# beginning down_write\n\t"
160 LOCK_PREFIX " xadd %1,(%2)\n\t" 106 LOCK_PREFIX " xadd %1,(%2)\n\t"
161 /* adds 0xffff0001, returns the old value */ 107 /* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
180 */ 126 */
181static inline int __down_write_trylock(struct rw_semaphore *sem) 127static inline int __down_write_trylock(struct rw_semaphore *sem)
182{ 128{
183 rwsem_count_t ret = cmpxchg(&sem->count, 129 long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
184 RWSEM_UNLOCKED_VALUE, 130 RWSEM_ACTIVE_WRITE_BIAS);
185 RWSEM_ACTIVE_WRITE_BIAS);
186 if (ret == RWSEM_UNLOCKED_VALUE) 131 if (ret == RWSEM_UNLOCKED_VALUE)
187 return 1; 132 return 1;
188 return 0; 133 return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
193 */ 138 */
194static inline void __up_read(struct rw_semaphore *sem) 139static inline void __up_read(struct rw_semaphore *sem)
195{ 140{
196 rwsem_count_t tmp; 141 long tmp;
197 asm volatile("# beginning __up_read\n\t" 142 asm volatile("# beginning __up_read\n\t"
198 LOCK_PREFIX " xadd %1,(%2)\n\t" 143 LOCK_PREFIX " xadd %1,(%2)\n\t"
199 /* subtracts 1, returns the old value */ 144 /* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
211 */ 156 */
212static inline void __up_write(struct rw_semaphore *sem) 157static inline void __up_write(struct rw_semaphore *sem)
213{ 158{
214 rwsem_count_t tmp; 159 long tmp;
215 asm volatile("# beginning __up_write\n\t" 160 asm volatile("# beginning __up_write\n\t"
216 LOCK_PREFIX " xadd %1,(%2)\n\t" 161 LOCK_PREFIX " xadd %1,(%2)\n\t"
217 /* subtracts 0xffff0001, returns the old value */ 162 /* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
247/* 192/*
248 * implement atomic add functionality 193 * implement atomic add functionality
249 */ 194 */
250static inline void rwsem_atomic_add(rwsem_count_t delta, 195static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
251 struct rw_semaphore *sem)
252{ 196{
253 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" 197 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
254 : "+m" (sem->count) 198 : "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
258/* 202/*
259 * implement exchange and add functionality 203 * implement exchange and add functionality
260 */ 204 */
261static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, 205static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
262 struct rw_semaphore *sem)
263{ 206{
264 rwsem_count_t tmp = delta; 207 long tmp = delta;
265 208
266 asm volatile(LOCK_PREFIX "xadd %0,%1" 209 asm volatile(LOCK_PREFIX "xadd %0,%1"
267 : "+r" (tmp), "+m" (sem->count) 210 : "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
270 return tmp + delta; 213 return tmp + delta;
271} 214}
272 215
273static inline int rwsem_is_locked(struct rw_semaphore *sem)
274{
275 return (sem->count != 0);
276}
277
278#endif /* __KERNEL__ */ 216#endif /* __KERNEL__ */
279#endif /* _ASM_X86_RWSEM_H */ 217#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..f4c4973fc2ac 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,12 @@
346#define __NR_fanotify_init 338 346#define __NR_fanotify_init 338
347#define __NR_fanotify_mark 339 347#define __NR_fanotify_mark 339
348#define __NR_prlimit64 340 348#define __NR_prlimit64 340
349#define __NR_name_to_handle_at 341
350#define __NR_open_by_handle_at 342
349 351
350#ifdef __KERNEL__ 352#ifdef __KERNEL__
351 353
352#define NR_syscalls 341 354#define NR_syscalls 343
353 355
354#define __ARCH_WANT_IPC_PARSE_VERSION 356#define __ARCH_WANT_IPC_PARSE_VERSION
355#define __ARCH_WANT_OLD_READDIR 357#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..81a3d5b70235 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,10 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
670#define __NR_prlimit64 302 670#define __NR_prlimit64 302
671__SYSCALL(__NR_prlimit64, sys_prlimit64) 671__SYSCALL(__NR_prlimit64, sys_prlimit64)
672#define __NR_name_to_handle_at 303
673__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
674#define __NR_open_by_handle_at 304
675__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
672 676
673#ifndef __NO_STUBS 677#ifndef __NO_STUBS
674#define __ARCH_WANT_OLD_READDIR 678#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a433..3e094af443c3 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 176struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 178 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ 179 unsigned int base_dest_nodeid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 180 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 181 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 182 /* bits 28:21 */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae4025b..8508bfe52296 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
287static inline int 287static inline int
288HYPERVISOR_sched_op(int cmd, void *arg) 288HYPERVISOR_sched_op(int cmd, void *arg)
289{ 289{
290 return _hypercall2(int, sched_op_new, cmd, arg); 290 return _hypercall2(int, sched_op, cmd, arg);
291} 291}
292 292
293static inline long 293static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
422#endif 422#endif
423 423
424static inline int 424static inline int
425HYPERVISOR_suspend(unsigned long srec) 425HYPERVISOR_suspend(unsigned long start_info_mfn)
426{ 426{
427 return _hypercall3(int, sched_op, SCHEDOP_shutdown, 427 struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
428 SHUTDOWN_suspend, srec); 428
429 /*
430 * For a PV guest the tools require that the start_info mfn be
431 * present in rdx/edx when the hypercall is made. Per the
432 * hypercall calling convention this is the third hypercall
433 * argument, which is start_info_mfn here.
434 */
435 return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
429} 436}
430 437
431static inline int 438static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a33..c61934fbf22a 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
29 29
30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ 30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
31#define INVALID_P2M_ENTRY (~0UL) 31#define INVALID_P2M_ENTRY (~0UL)
32#define FOREIGN_FRAME_BIT (1UL<<31) 32#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
33#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
33#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) 34#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
35#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
34 36
35/* Maximum amount of memory we can handle in a domain in pages */ 37/* Maximum amount of memory we can handle in a domain in pages */
36#define MAX_DOMAIN_PAGES \ 38#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
41 43
42extern unsigned long get_phys_to_machine(unsigned long pfn); 44extern unsigned long get_phys_to_machine(unsigned long pfn);
43extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 45extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
46extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
47extern unsigned long set_phys_range_identity(unsigned long pfn_s,
48 unsigned long pfn_e);
44 49
45extern int m2p_add_override(unsigned long mfn, struct page *page); 50extern int m2p_add_override(unsigned long mfn, struct page *page);
46extern int m2p_remove_override(struct page *page); 51extern int m2p_remove_override(struct page *page);
47extern struct page *m2p_find_override(unsigned long mfn); 52extern struct page *m2p_find_override(unsigned long mfn);
48extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 53extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
49 54
55#ifdef CONFIG_XEN_DEBUG_FS
56extern int p2m_dump_show(struct seq_file *m, void *v);
57#endif
50static inline unsigned long pfn_to_mfn(unsigned long pfn) 58static inline unsigned long pfn_to_mfn(unsigned long pfn)
51{ 59{
52 unsigned long mfn; 60 unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
57 mfn = get_phys_to_machine(pfn); 65 mfn = get_phys_to_machine(pfn);
58 66
59 if (mfn != INVALID_P2M_ENTRY) 67 if (mfn != INVALID_P2M_ENTRY)
60 mfn &= ~FOREIGN_FRAME_BIT; 68 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
61 69
62 return mfn; 70 return mfn;
63} 71}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
73static inline unsigned long mfn_to_pfn(unsigned long mfn) 81static inline unsigned long mfn_to_pfn(unsigned long mfn)
74{ 82{
75 unsigned long pfn; 83 unsigned long pfn;
84 int ret = 0;
76 85
77 if (xen_feature(XENFEAT_auto_translated_physmap)) 86 if (xen_feature(XENFEAT_auto_translated_physmap))
78 return mfn; 87 return mfn;
79 88
89 if (unlikely((mfn >> machine_to_phys_order) != 0)) {
90 pfn = ~0;
91 goto try_override;
92 }
80 pfn = 0; 93 pfn = 0;
81 /* 94 /*
82 * The array access can fail (e.g., device space beyond end of RAM). 95 * The array access can fail (e.g., device space beyond end of RAM).
83 * In such cases it doesn't matter what we return (we return garbage), 96 * In such cases it doesn't matter what we return (we return garbage),
84 * but we must handle the fault without crashing! 97 * but we must handle the fault without crashing!
85 */ 98 */
86 __get_user(pfn, &machine_to_phys_mapping[mfn]); 99 ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
87 100try_override:
88 /* 101 /* ret might be < 0 if there are no entries in the m2p for mfn */
89 * If this appears to be a foreign mfn (because the pfn 102 if (ret < 0)
90 * doesn't map back to the mfn), then check the local override 103 pfn = ~0;
91 * table to see if there's a better pfn to use. 104 else if (get_phys_to_machine(pfn) != mfn)
105 /*
106 * If this appears to be a foreign mfn (because the pfn
107 * doesn't map back to the mfn), then check the local override
108 * table to see if there's a better pfn to use.
109 *
110 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
111 */
112 pfn = m2p_find_override_pfn(mfn, ~0);
113
114 /*
115 * pfn is ~0 if there are no entries in the m2p for mfn or if the
116 * entry doesn't map back to the mfn and m2p_override doesn't have a
117 * valid entry for it.
92 */ 118 */
93 if (get_phys_to_machine(pfn) != mfn) 119 if (pfn == ~0 &&
94 pfn = m2p_find_override_pfn(mfn, pfn); 120 get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
121 pfn = mfn;
95 122
96 return pfn; 123 return pfn;
97} 124}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3eaf8d3..aa8620989162 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
27 * its own functions. 27 * its own functions.
28 */ 28 */
29struct xen_pci_frontend_ops { 29struct xen_pci_frontend_ops {
30 int (*enable_msi)(struct pci_dev *dev, int **vectors); 30 int (*enable_msi)(struct pci_dev *dev, int vectors[]);
31 void (*disable_msi)(struct pci_dev *dev); 31 void (*disable_msi)(struct pci_dev *dev);
32 int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); 32 int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
33 void (*disable_msix)(struct pci_dev *dev); 33 void (*disable_msix)(struct pci_dev *dev);
34}; 34};
35 35
36extern struct xen_pci_frontend_ops *xen_pci_frontend; 36extern struct xen_pci_frontend_ops *xen_pci_frontend;
37 37
38static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, 38static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
39 int **vectors) 39 int vectors[])
40{ 40{
41 if (xen_pci_frontend && xen_pci_frontend->enable_msi) 41 if (xen_pci_frontend && xen_pci_frontend->enable_msi)
42 return xen_pci_frontend->enable_msi(dev, vectors); 42 return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
48 xen_pci_frontend->disable_msi(dev); 48 xen_pci_frontend->disable_msi(dev);
49} 49}
50static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, 50static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
51 int **vectors, int nvec) 51 int vectors[], int nvec)
52{ 52{
53 if (xen_pci_frontend && xen_pci_frontend->enable_msix) 53 if (xen_pci_frontend && xen_pci_frontend->enable_msix)
54 return xen_pci_frontend->enable_msix(dev, vectors, nvec); 54 return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a389179514..452932d34730 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
106 addr += size; 106 addr += size;
107 } 107 }
108 108
109 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", 109 if (num_scan_areas)
110 num_scan_areas); 110 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
111} 111}
112 112
113 113
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
143{ 143{
144 check_for_bios_corruption(); 144 check_for_bios_corruption();
145 schedule_delayed_work(&bios_check_work, 145 schedule_delayed_work(&bios_check_work,
146 round_jiffies_relative(corruption_check_period*HZ)); 146 round_jiffies_relative(corruption_check_period*HZ));
147} 147}
148 148
149static int start_periodic_check_for_corruption(void) 149static int start_periodic_check_for_corruption(void)
150{ 150{
151 if (!memory_corruption_check || corruption_check_period == 0) 151 if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
152 return 0; 152 return 0;
153 153
154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", 154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f2799..4a5a42b842ad 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
195cmd_incomplete: 195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status); 196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock); 197 spin_unlock(&pcc_lock);
198 return -EINVAL; 198 return 0;
199} 199}
200 200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy, 201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..c314b2199efd 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,5 @@ ENTRY(sys_call_table)
340 .long sys_fanotify_init 340 .long sys_fanotify_init
341 .long sys_fanotify_mark 341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */ 342 .long sys_prlimit64 /* 340 */
343 .long sys_name_to_handle_at
344 .long sys_open_by_handle_at
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a4..20e3f8702d1e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
229 for (address = VMALLOC_START & PMD_MASK; 229 for (address = VMALLOC_START & PMD_MASK;
230 address >= TASK_SIZE && address < FIXADDR_TOP; 230 address >= TASK_SIZE && address < FIXADDR_TOP;
231 address += PMD_SIZE) { 231 address += PMD_SIZE) {
232
233 unsigned long flags;
234 struct page *page; 232 struct page *page;
235 233
236 spin_lock_irqsave(&pgd_lock, flags); 234 spin_lock(&pgd_lock);
237 list_for_each_entry(page, &pgd_list, lru) { 235 list_for_each_entry(page, &pgd_list, lru) {
238 spinlock_t *pgt_lock; 236 spinlock_t *pgt_lock;
239 pmd_t *ret; 237 pmd_t *ret;
240 238
239 /* the pgt_lock only for Xen */
241 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 240 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
242 241
243 spin_lock(pgt_lock); 242 spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
247 if (!ret) 246 if (!ret)
248 break; 247 break;
249 } 248 }
250 spin_unlock_irqrestore(&pgd_lock, flags); 249 spin_unlock(&pgd_lock);
251 } 250 }
252} 251}
253 252
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
828 unsigned long address, unsigned int fault) 827 unsigned long address, unsigned int fault)
829{ 828{
830 if (fault & VM_FAULT_OOM) { 829 if (fault & VM_FAULT_OOM) {
830 /* Kernel mode? Handle exceptions or die: */
831 if (!(error_code & PF_USER)) {
832 up_read(&current->mm->mmap_sem);
833 no_context(regs, error_code, address);
834 return;
835 }
836
831 out_of_memory(regs, error_code, address); 837 out_of_memory(regs, error_code, address);
832 } else { 838 } else {
833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 839 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..c14a5422e152 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
105 105
106 for (address = start; address <= end; address += PGDIR_SIZE) { 106 for (address = start; address <= end; address += PGDIR_SIZE) {
107 const pgd_t *pgd_ref = pgd_offset_k(address); 107 const pgd_t *pgd_ref = pgd_offset_k(address);
108 unsigned long flags;
109 struct page *page; 108 struct page *page;
110 109
111 if (pgd_none(*pgd_ref)) 110 if (pgd_none(*pgd_ref))
112 continue; 111 continue;
113 112
114 spin_lock_irqsave(&pgd_lock, flags); 113 spin_lock(&pgd_lock);
115 list_for_each_entry(page, &pgd_list, lru) { 114 list_for_each_entry(page, &pgd_list, lru) {
116 pgd_t *pgd; 115 pgd_t *pgd;
117 spinlock_t *pgt_lock; 116 spinlock_t *pgt_lock;
118 117
119 pgd = (pgd_t *)page_address(page) + pgd_index(address); 118 pgd = (pgd_t *)page_address(page) + pgd_index(address);
119 /* the pgt_lock only for Xen */
120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
121 spin_lock(pgt_lock); 121 spin_lock(pgt_lock);
122 122
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
128 128
129 spin_unlock(pgt_lock); 129 spin_unlock(pgt_lock);
130 } 130 }
131 spin_unlock_irqrestore(&pgd_lock, flags); 131 spin_unlock(&pgd_lock);
132 } 132 }
133} 133}
134 134
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..1337c51b07d7 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -780,11 +780,7 @@ void __cpuinit numa_add_cpu(int cpu)
780 int physnid; 780 int physnid;
781 int nid = NUMA_NO_NODE; 781 int nid = NUMA_NO_NODE;
782 782
783 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); 783 nid = early_cpu_to_node(cpu);
784 if (apicid != BAD_APICID)
785 nid = apicid_to_node[apicid];
786 if (nid == NUMA_NO_NODE)
787 nid = early_cpu_to_node(cpu);
788 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); 784 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
789 785
790 /* 786 /*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3c..90825f2eb0f4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
57 57
58void update_page_count(int level, unsigned long pages) 58void update_page_count(int level, unsigned long pages)
59{ 59{
60 unsigned long flags;
61
62 /* Protect against CPA */ 60 /* Protect against CPA */
63 spin_lock_irqsave(&pgd_lock, flags); 61 spin_lock(&pgd_lock);
64 direct_pages_count[level] += pages; 62 direct_pages_count[level] += pages;
65 spin_unlock_irqrestore(&pgd_lock, flags); 63 spin_unlock(&pgd_lock);
66} 64}
67 65
68static void split_page_count(int level) 66static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
394try_preserve_large_page(pte_t *kpte, unsigned long address, 392try_preserve_large_page(pte_t *kpte, unsigned long address,
395 struct cpa_data *cpa) 393 struct cpa_data *cpa)
396{ 394{
397 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 395 unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
398 pte_t new_pte, old_pte, *tmp; 396 pte_t new_pte, old_pte, *tmp;
399 pgprot_t old_prot, new_prot, req_prot; 397 pgprot_t old_prot, new_prot, req_prot;
400 int i, do_split = 1; 398 int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
403 if (cpa->force_split) 401 if (cpa->force_split)
404 return 1; 402 return 1;
405 403
406 spin_lock_irqsave(&pgd_lock, flags); 404 spin_lock(&pgd_lock);
407 /* 405 /*
408 * Check for races, another CPU might have split this page 406 * Check for races, another CPU might have split this page
409 * up already: 407 * up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
498 } 496 }
499 497
500out_unlock: 498out_unlock:
501 spin_unlock_irqrestore(&pgd_lock, flags); 499 spin_unlock(&pgd_lock);
502 500
503 return do_split; 501 return do_split;
504} 502}
505 503
506static int split_large_page(pte_t *kpte, unsigned long address) 504static int split_large_page(pte_t *kpte, unsigned long address)
507{ 505{
508 unsigned long flags, pfn, pfninc = 1; 506 unsigned long pfn, pfninc = 1;
509 unsigned int i, level; 507 unsigned int i, level;
510 pte_t *pbase, *tmp; 508 pte_t *pbase, *tmp;
511 pgprot_t ref_prot; 509 pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
519 if (!base) 517 if (!base)
520 return -ENOMEM; 518 return -ENOMEM;
521 519
522 spin_lock_irqsave(&pgd_lock, flags); 520 spin_lock(&pgd_lock);
523 /* 521 /*
524 * Check for races, another CPU might have split this page 522 * Check for races, another CPU might have split this page
525 * up for us already: 523 * up for us already:
@@ -591,7 +589,7 @@ out_unlock:
591 */ 589 */
592 if (base) 590 if (base)
593 __free_page(base); 591 __free_page(base);
594 spin_unlock_irqrestore(&pgd_lock, flags); 592 spin_unlock(&pgd_lock);
595 593
596 return 0; 594 return 0;
597} 595}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96d..0113d19c8aa6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
121 121
122static void pgd_dtor(pgd_t *pgd) 122static void pgd_dtor(pgd_t *pgd)
123{ 123{
124 unsigned long flags; /* can be called from interrupt context */
125
126 if (SHARED_KERNEL_PMD) 124 if (SHARED_KERNEL_PMD)
127 return; 125 return;
128 126
129 spin_lock_irqsave(&pgd_lock, flags); 127 spin_lock(&pgd_lock);
130 pgd_list_del(pgd); 128 pgd_list_del(pgd);
131 spin_unlock_irqrestore(&pgd_lock, flags); 129 spin_unlock(&pgd_lock);
132} 130}
133 131
134/* 132/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
260{ 258{
261 pgd_t *pgd; 259 pgd_t *pgd;
262 pmd_t *pmds[PREALLOCATED_PMDS]; 260 pmd_t *pmds[PREALLOCATED_PMDS];
263 unsigned long flags;
264 261
265 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); 262 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
266 263
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
280 * respect to anything walking the pgd_list, so that they 277 * respect to anything walking the pgd_list, so that they
281 * never see a partially populated pgd. 278 * never see a partially populated pgd.
282 */ 279 */
283 spin_lock_irqsave(&pgd_lock, flags); 280 spin_lock(&pgd_lock);
284 281
285 pgd_ctor(mm, pgd); 282 pgd_ctor(mm, pgd);
286 pgd_prepopulate_pmd(mm, pgd, pmds); 283 pgd_prepopulate_pmd(mm, pgd, pmds);
287 284
288 spin_unlock_irqrestore(&pgd_lock, flags); 285 spin_unlock(&pgd_lock);
289 286
290 return pgd; 287 return pgd;
291 288
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e809..9260b3eb18d4 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/init.h> 35#include <linux/init.h>
36 36
37#include <asm/ce4100.h>
37#include <asm/pci_x86.h> 38#include <asm/pci_x86.h>
38 39
39struct sim_reg { 40struct sim_reg {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
306 .write = ce4100_conf_write, 307 .write = ce4100_conf_write,
307}; 308};
308 309
309static int __init ce4100_pci_init(void) 310int __init ce4100_pci_init(void)
310{ 311{
311 init_sim_regs(); 312 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf; 313 raw_pci_ops = &ce4100_pci_conf;
313 return 0; 314 /* Indicate caller that it should invoke pci_legacy_init() */
315 return 1;
314} 316}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a07d09f..8c4085a95ef1 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
20#include <asm/xen/pci.h> 20#include <asm/xen/pci.h>
21 21
22#ifdef CONFIG_ACPI 22#ifdef CONFIG_ACPI
23static int xen_hvm_register_pirq(u32 gsi, int triggering) 23static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
24 int trigger, int polarity)
24{ 25{
25 int rc, irq; 26 int rc, irq;
26 struct physdev_map_pirq map_irq; 27 struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
41 return -1; 42 return -1;
42 } 43 }
43 44
44 if (triggering == ACPI_EDGE_SENSITIVE) { 45 if (trigger == ACPI_EDGE_SENSITIVE) {
45 shareable = 0; 46 shareable = 0;
46 name = "ioapic-edge"; 47 name = "ioapic-edge";
47 } else { 48 } else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
55 56
56 return irq; 57 return irq;
57} 58}
58
59static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
60 int trigger, int polarity)
61{
62 return xen_hvm_register_pirq(gsi, trigger);
63}
64#endif 59#endif
65 60
66#if defined(CONFIG_PCI_MSI) 61#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
91 86
92static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 87static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
93{ 88{
94 int irq, pirq, ret = 0; 89 int irq, pirq;
95 struct msi_desc *msidesc; 90 struct msi_desc *msidesc;
96 struct msi_msg msg; 91 struct msi_msg msg;
97 92
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
99 __read_msi_msg(msidesc, &msg); 94 __read_msi_msg(msidesc, &msg);
100 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | 95 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
101 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); 96 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
102 if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { 97 if (msg.data != XEN_PIRQ_MSI_DATA ||
103 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 98 xen_irq_from_pirq(pirq) < 0) {
104 "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); 99 pirq = xen_allocate_pirq_msi(dev, msidesc);
105 if (irq < 0) 100 if (pirq < 0)
106 goto error; 101 goto error;
107 ret = set_irq_msi(irq, msidesc); 102 xen_msi_compose_msg(dev, pirq, &msg);
108 if (ret < 0) 103 __write_msi_msg(msidesc, &msg);
109 goto error_while; 104 dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
110 printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" 105 } else {
111 " pirq=%d\n", irq, pirq); 106 dev_dbg(&dev->dev,
112 return 0; 107 "xen: msi already bound to pirq=%d\n", pirq);
113 } 108 }
114 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 109 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
115 "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); 110 (type == PCI_CAP_ID_MSIX) ?
116 if (irq < 0 || pirq < 0) 111 "msi-x" : "msi");
112 if (irq < 0)
117 goto error; 113 goto error;
118 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); 114 dev_dbg(&dev->dev,
119 xen_msi_compose_msg(dev, pirq, &msg); 115 "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
120 ret = set_irq_msi(irq, msidesc);
121 if (ret < 0)
122 goto error_while;
123 write_msi_msg(irq, &msg);
124 } 116 }
125 return 0; 117 return 0;
126 118
127error_while:
128 unbind_from_irqhandler(irq, NULL);
129error: 119error:
130 if (ret == -ENODEV) 120 dev_err(&dev->dev,
131 dev_err(&dev->dev, "Xen PCI frontend has not registered" \ 121 "Xen PCI frontend has not registered MSI/MSI-X support!\n");
132 " MSI/MSI-X support!\n"); 122 return -ENODEV;
133
134 return ret;
135} 123}
136 124
137/* 125/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
150 return -ENOMEM; 138 return -ENOMEM;
151 139
152 if (type == PCI_CAP_ID_MSIX) 140 if (type == PCI_CAP_ID_MSIX)
153 ret = xen_pci_frontend_enable_msix(dev, &v, nvec); 141 ret = xen_pci_frontend_enable_msix(dev, v, nvec);
154 else 142 else
155 ret = xen_pci_frontend_enable_msi(dev, &v); 143 ret = xen_pci_frontend_enable_msi(dev, v);
156 if (ret) 144 if (ret)
157 goto error; 145 goto error;
158 i = 0; 146 i = 0;
159 list_for_each_entry(msidesc, &dev->msi_list, list) { 147 list_for_each_entry(msidesc, &dev->msi_list, list) {
160 irq = xen_allocate_pirq(v[i], 0, /* not sharable */ 148 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
161 (type == PCI_CAP_ID_MSIX) ? 149 (type == PCI_CAP_ID_MSIX) ?
162 "pcifront-msi-x" : "pcifront-msi"); 150 "pcifront-msi-x" :
163 if (irq < 0) { 151 "pcifront-msi");
164 ret = -1; 152 if (irq < 0)
165 goto free; 153 goto free;
166 }
167
168 ret = set_irq_msi(irq, msidesc);
169 if (ret)
170 goto error_while;
171 i++; 154 i++;
172 } 155 }
173 kfree(v); 156 kfree(v);
174 return 0; 157 return 0;
175 158
176error_while:
177 unbind_from_irqhandler(irq, NULL);
178error: 159error:
179 if (ret == -ENODEV) 160 dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
180 dev_err(&dev->dev, "Xen PCI frontend has not registered" \
181 " MSI/MSI-X support!\n");
182free: 161free:
183 kfree(v); 162 kfree(v);
184 return ret; 163 return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
193 xen_pci_frontend_disable_msix(dev); 172 xen_pci_frontend_disable_msix(dev);
194 else 173 else
195 xen_pci_frontend_disable_msi(dev); 174 xen_pci_frontend_disable_msi(dev);
175
176 /* Free the IRQ's and the msidesc using the generic code. */
177 default_teardown_msi_irqs(dev);
196} 178}
197 179
198static void xen_teardown_msi_irq(unsigned int irq) 180static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
200 xen_destroy_irq(irq); 182 xen_destroy_irq(irq);
201} 183}
202 184
185#ifdef CONFIG_XEN_DOM0
203static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 186static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
204{ 187{
205 int irq, ret; 188 int ret = 0;
206 struct msi_desc *msidesc; 189 struct msi_desc *msidesc;
207 190
208 list_for_each_entry(msidesc, &dev->msi_list, list) { 191 list_for_each_entry(msidesc, &dev->msi_list, list) {
209 irq = xen_create_msi_irq(dev, msidesc, type); 192 struct physdev_map_pirq map_irq;
210 if (irq < 0)
211 return -1;
212 193
213 ret = set_irq_msi(irq, msidesc); 194 memset(&map_irq, 0, sizeof(map_irq));
214 if (ret) 195 map_irq.domid = DOMID_SELF;
215 goto error; 196 map_irq.type = MAP_PIRQ_TYPE_MSI;
216 } 197 map_irq.index = -1;
217 return 0; 198 map_irq.pirq = -1;
199 map_irq.bus = dev->bus->number;
200 map_irq.devfn = dev->devfn;
218 201
219error: 202 if (type == PCI_CAP_ID_MSIX) {
220 xen_destroy_irq(irq); 203 int pos;
204 u32 table_offset, bir;
205
206 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
207
208 pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
209 &table_offset);
210 bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
211
212 map_irq.table_base = pci_resource_start(dev, bir);
213 map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
214 }
215
216 ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
217 if (ret) {
218 dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
219 goto out;
220 }
221
222 ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
223 map_irq.pirq, map_irq.index,
224 (type == PCI_CAP_ID_MSIX) ?
225 "msi-x" : "msi");
226 if (ret < 0)
227 goto out;
228 }
229 ret = 0;
230out:
221 return ret; 231 return ret;
222} 232}
223#endif 233#endif
234#endif
224 235
225static int xen_pcifront_enable_irq(struct pci_dev *dev) 236static int xen_pcifront_enable_irq(struct pci_dev *dev)
226{ 237{
227 int rc; 238 int rc;
228 int share = 1; 239 int share = 1;
240 u8 gsi;
229 241
230 dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); 242 rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
231 243 if (rc < 0) {
232 if (dev->irq < 0) 244 dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
233 return -EINVAL; 245 rc);
246 return rc;
247 }
234 248
235 if (dev->irq < NR_IRQS_LEGACY) 249 if (gsi < NR_IRQS_LEGACY)
236 share = 0; 250 share = 0;
237 251
238 rc = xen_allocate_pirq(dev->irq, share, "pcifront"); 252 rc = xen_allocate_pirq(gsi, share, "pcifront");
239 if (rc < 0) { 253 if (rc < 0) {
240 dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", 254 dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
241 dev->irq, rc); 255 gsi, rc);
242 return rc; 256 return rc;
243 } 257 }
258
259 dev->irq = rc;
260 dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
244 return 0; 261 return 0;
245} 262}
246 263
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a7178..cd6f184c3b3f 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,6 +15,7 @@
15#include <linux/serial_reg.h> 15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h> 16#include <linux/serial_8250.h>
17 17
18#include <asm/ce4100.h>
18#include <asm/setup.h> 19#include <asm/setup.h>
19#include <asm/io.h> 20#include <asm/io.h>
20 21
@@ -129,4 +130,5 @@ void __init x86_ce4100_early_setup(void)
129 x86_init.resources.probe_roms = x86_init_noop; 130 x86_init.resources.probe_roms = x86_init_noop;
130 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 131 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
131 x86_init.mpparse.find_smp_config = sdv_find_smp_config; 132 x86_init.mpparse.find_smp_config = sdv_find_smp_config;
133 x86_init.pci.init = ce4100_pci_init;
132} 134}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96a..a7b38d35c29a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
1364 memset(bd2, 0, sizeof(struct bau_desc)); 1364 memset(bd2, 0, sizeof(struct bau_desc));
1365 bd2->header.sw_ack_flag = 1; 1365 bd2->header.sw_ack_flag = 1;
1366 /* 1366 /*
1367 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub 1367 * base_dest_nodeid is the nasid of the first uvhub
1368 * in the partition. The bit map will indicate uvhub numbers, 1368 * in the partition. The bit map will indicate uvhub numbers,
1369 * which are 0-N in a partition. Pnodes are unique system-wide. 1369 * which are 0-N in a partition. Pnodes are unique system-wide.
1370 */ 1370 */
1371 bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 1371 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
1372 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1372 bd2->header.dest_subnodeid = 0x10; /* the LB */
1373 bd2->header.command = UV_NET_ENDPOINT_INTD; 1373 bd2->header.command = UV_NET_ENDPOINT_INTD;
1374 bd2->header.int_both = 1; 1374 bd2->header.int_both = 1;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc3..e4343fe488ed 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
48 help 48 help
49 Enable statistics output and various tuning options in debugfs. 49 Enable statistics output and various tuning options in debugfs.
50 Enabling this option may incur a significant performance overhead. 50 Enabling this option may incur a significant performance overhead.
51
52config XEN_DEBUG
53 bool "Enable Xen debug checks"
54 depends on XEN
55 default n
56 help
57 Enable various WARN_ON checks in the Xen MMU code.
58 Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45fb..49dbd78ec3cb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
1284 1284
1285 xen_setup_features(); 1285 xen_setup_features();
1286 1286
1287 pv_info = xen_info; 1287 pv_info.name = "Xen HVM";
1288 pv_info.kernel_rpl = 0;
1289 1288
1290 xen_domain_type = XEN_HVM_DOMAIN; 1289 xen_domain_type = XEN_HVM_DOMAIN;
1291 1290
1292 return 0; 1291 return 0;
1293} 1292}
1294 1293
1295void xen_hvm_init_shared_info(void) 1294void __ref xen_hvm_init_shared_info(void)
1296{ 1295{
1297 int cpu; 1296 int cpu;
1298 struct xen_add_to_physmap xatp; 1297 struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1331 switch (action) { 1330 switch (action) {
1332 case CPU_UP_PREPARE: 1331 case CPU_UP_PREPARE:
1333 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1332 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1333 if (xen_have_vector_callback)
1334 xen_init_lock_cpu(cpu);
1334 break; 1335 break;
1335 default: 1336 default:
1336 break; 1337 break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
1355 1356
1356 if (xen_feature(XENFEAT_hvm_callback_vector)) 1357 if (xen_feature(XENFEAT_hvm_callback_vector))
1357 xen_have_vector_callback = 1; 1358 xen_have_vector_callback = 1;
1359 xen_hvm_smp_init();
1358 register_cpu_notifier(&xen_hvm_cpu_notifier); 1360 register_cpu_notifier(&xen_hvm_cpu_notifier);
1359 xen_unplug_emulated_devices(); 1361 xen_unplug_emulated_devices();
1360 have_vcpu_info_placement = 0; 1362 have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..832765c0fb8c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/memblock.h> 48#include <linux/memblock.h>
49#include <linux/seq_file.h>
49 50
50#include <asm/pgtable.h> 51#include <asm/pgtable.h>
51#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
416 if (val & _PAGE_PRESENT) { 417 if (val & _PAGE_PRESENT) {
417 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 418 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
418 pteval_t flags = val & PTE_FLAGS_MASK; 419 pteval_t flags = val & PTE_FLAGS_MASK;
419 unsigned long mfn = pfn_to_mfn(pfn); 420 unsigned long mfn;
420 421
422 if (!xen_feature(XENFEAT_auto_translated_physmap))
423 mfn = get_phys_to_machine(pfn);
424 else
425 mfn = pfn;
421 /* 426 /*
422 * If there's no mfn for the pfn, then just create an 427 * If there's no mfn for the pfn, then just create an
423 * empty non-present pte. Unfortunately this loses 428 * empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
427 if (unlikely(mfn == INVALID_P2M_ENTRY)) { 432 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
428 mfn = 0; 433 mfn = 0;
429 flags = 0; 434 flags = 0;
435 } else {
436 /*
437 * Paramount to do this test _after_ the
438 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
439 * IDENTITY_FRAME_BIT resolves to true.
440 */
441 mfn &= ~FOREIGN_FRAME_BIT;
442 if (mfn & IDENTITY_FRAME_BIT) {
443 mfn &= ~IDENTITY_FRAME_BIT;
444 flags |= _PAGE_IOMAP;
445 }
430 } 446 }
431
432 val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 447 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
433 } 448 }
434 449
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
532} 547}
533PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 548PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
534 549
550#ifdef CONFIG_XEN_DEBUG
551pte_t xen_make_pte_debug(pteval_t pte)
552{
553 phys_addr_t addr = (pte & PTE_PFN_MASK);
554 phys_addr_t other_addr;
555 bool io_page = false;
556 pte_t _pte;
557
558 if (pte & _PAGE_IOMAP)
559 io_page = true;
560
561 _pte = xen_make_pte(pte);
562
563 if (!addr)
564 return _pte;
565
566 if (io_page &&
567 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
568 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
569 WARN(addr != other_addr,
570 "0x%lx is using VM_IO, but it is 0x%lx!\n",
571 (unsigned long)addr, (unsigned long)other_addr);
572 } else {
573 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
574 other_addr = (_pte.pte & PTE_PFN_MASK);
575 WARN((addr == other_addr) && (!io_page) && (!iomap_set),
576 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
577 (unsigned long)addr);
578 }
579
580 return _pte;
581}
582PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
583#endif
584
535pgd_t xen_make_pgd(pgdval_t pgd) 585pgd_t xen_make_pgd(pgdval_t pgd)
536{ 586{
537 pgd = pte_pfn_to_mfn(pgd); 587 pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
986 */ 1036 */
987void xen_mm_pin_all(void) 1037void xen_mm_pin_all(void)
988{ 1038{
989 unsigned long flags;
990 struct page *page; 1039 struct page *page;
991 1040
992 spin_lock_irqsave(&pgd_lock, flags); 1041 spin_lock(&pgd_lock);
993 1042
994 list_for_each_entry(page, &pgd_list, lru) { 1043 list_for_each_entry(page, &pgd_list, lru) {
995 if (!PagePinned(page)) { 1044 if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
998 } 1047 }
999 } 1048 }
1000 1049
1001 spin_unlock_irqrestore(&pgd_lock, flags); 1050 spin_unlock(&pgd_lock);
1002} 1051}
1003 1052
1004/* 1053/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
1099 */ 1148 */
1100void xen_mm_unpin_all(void) 1149void xen_mm_unpin_all(void)
1101{ 1150{
1102 unsigned long flags;
1103 struct page *page; 1151 struct page *page;
1104 1152
1105 spin_lock_irqsave(&pgd_lock, flags); 1153 spin_lock(&pgd_lock);
1106 1154
1107 list_for_each_entry(page, &pgd_list, lru) { 1155 list_for_each_entry(page, &pgd_list, lru) {
1108 if (PageSavePinned(page)) { 1156 if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
1112 } 1160 }
1113 } 1161 }
1114 1162
1115 spin_unlock_irqrestore(&pgd_lock, flags); 1163 spin_unlock(&pgd_lock);
1116} 1164}
1117 1165
1118void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1166void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
1942 1990
1943static __init void xen_post_allocator_init(void) 1991static __init void xen_post_allocator_init(void)
1944{ 1992{
1993#ifdef CONFIG_XEN_DEBUG
1994 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1995#endif
1945 pv_mmu_ops.set_pte = xen_set_pte; 1996 pv_mmu_ops.set_pte = xen_set_pte;
1946 pv_mmu_ops.set_pmd = xen_set_pmd; 1997 pv_mmu_ops.set_pmd = xen_set_pmd;
1947 pv_mmu_ops.set_pud = xen_set_pud; 1998 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2074 in_frames[i] = virt_to_mfn(vaddr); 2125 in_frames[i] = virt_to_mfn(vaddr);
2075 2126
2076 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); 2127 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2077 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); 2128 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2078 2129
2079 if (out_frames) 2130 if (out_frames)
2080 out_frames[i] = virt_to_pfn(vaddr); 2131 out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2353 2404
2354#ifdef CONFIG_XEN_DEBUG_FS 2405#ifdef CONFIG_XEN_DEBUG_FS
2355 2406
2407static int p2m_dump_open(struct inode *inode, struct file *filp)
2408{
2409 return single_open(filp, p2m_dump_show, NULL);
2410}
2411
2412static const struct file_operations p2m_dump_fops = {
2413 .open = p2m_dump_open,
2414 .read = seq_read,
2415 .llseek = seq_lseek,
2416 .release = single_release,
2417};
2418
2356static struct dentry *d_mmu_debug; 2419static struct dentry *d_mmu_debug;
2357 2420
2358static int __init xen_mmu_debugfs(void) 2421static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
2408 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, 2471 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2409 &mmu_stats.prot_commit_batched); 2472 &mmu_stats.prot_commit_batched);
2410 2473
2474 debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
2411 return 0; 2475 return 0;
2412} 2476}
2413fs_initcall(xen_mmu_debugfs); 2477fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff9..215a3ce61068 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively. 25 * 512 and 1024 entries respectively.
26 *
27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
28 *
29 * However not all entries are filled with MFNs. Specifically for all other
30 * leaf entries, or for the top root, or middle one, for which there is a void
31 * entry, we assume it is "missing". So (for example)
32 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
33 *
34 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 * that:
36 * pfn_to_mfn(0xc0000)=0xc0000
37 *
38 * The benefit of this is, that we can assume for non-RAM regions (think
39 * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
40 * get the PFN value to match the MFN.
41 *
42 * For this to work efficiently we have one new page p2m_identity and
43 * allocate (via reserved_brk) any other pages we need to cover the sides
44 * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
45 * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46 * no other fancy value).
47 *
48 * On lookup we spot that the entry points to p2m_identity and return the
49 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 * If the entry points to an allocated page, we just proceed as before and
51 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 * appropriate functions (pfn_to_mfn).
53 *
54 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58 *
59 * This simplistic diagram is used to explain the more subtle piece of code.
60 * There is also a digram of the P2M at the end that can help.
61 * Imagine your E820 looking as so:
62 *
63 * 1GB 2GB
64 * /-------------------+---------\/----\ /----------\ /---+-----\
65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66 * \-------------------+---------/\----/ \----------/ \---+-----/
67 * ^- 1029MB ^- 2001MB
68 *
69 * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70 * 2048MB = 524288 (0x80000)]
71 *
72 * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73 * is actually not present (would have to kick the balloon driver to put it in).
74 *
75 * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76 * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80 * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
81 * to end pfn. We reserve_brk top leaf pages if they are missing (means they
82 * point to p2m_mid_missing).
83 *
84 * With the E820 example above, 263424 is not 1GB aligned so we allocate a
85 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
86 * Each entry in the allocate page is "missing" (points to p2m_missing).
87 *
88 * Next stage is to determine if we need to do a more granular boundary check
89 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
90 * We check if the start pfn and end pfn violate that boundary check, and if
91 * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
92 * granularity of setting which PFNs are missing and which ones are identity.
93 * In our example 263424 and 512256 both fail the check so we reserve_brk two
94 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
95 * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
96 *
97 * At this point we would at minimum reserve_brk one page, but could be up to
98 * three. Each call to set_phys_range_identity has at maximum a three page
99 * cost. If we were to query the P2M at this stage, all those entries from
100 * start PFN through end PFN (so 1029MB -> 2001MB) would return
101 * INVALID_P2M_ENTRY ("missing").
102 *
103 * The next step is to walk from the start pfn to the end pfn setting
104 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
105 * If we find that the middle leaf is pointing to p2m_missing we can swap it
106 * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
107 * point we do not need to worry about boundary aligment (so no need to
108 * reserve_brk a middle page, figure out which PFNs are "missing" and which
109 * ones are identity), as that has been done earlier. If we find that the
110 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111 * that page (which covers 512 PFNs) and set the appropriate PFN with
112 * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113 * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114 * IDENTITY_FRAME_BIT set.
115 *
116 * All other regions that are void (or not filled) either point to p2m_missing
117 * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119 * contain the INVALID_P2M_ENTRY value and are considered "missing."
120 *
121 * This is what the p2m ends up looking (for the E820 above) with this
122 * fabulous drawing:
123 *
124 * p2m /--------------\
125 * /-----\ | &mfn_list[0],| /-----------------\
126 * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
127 * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
128 * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
129 * |-----| \ | [p2m_identity]+\\ | .... |
130 * | 2 |--\ \-------------------->| ... | \\ \----------------/
131 * |-----| \ \---------------/ \\
132 * | 3 |\ \ \\ p2m_identity
133 * |-----| \ \-------------------->/---------------\ /-----------------\
134 * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
135 * \-----/ / | [p2m_identity]+-->| ..., ~0 |
136 * / /---------------\ | .... | \-----------------/
137 * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
138 * / | IDENTITY[@256]|<----/ \---------------/
139 * / | ~0, ~0, .... |
140 * | \---------------/
141 * |
142 * p2m_missing p2m_missing
143 * /------------------\ /------------\
144 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
145 * | [p2m_mid_missing]+---->| ..., ~0 |
146 * \------------------/ \------------/
147 *
148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
26 */ 149 */
27 150
28#include <linux/init.h> 151#include <linux/init.h>
@@ -30,6 +153,7 @@
30#include <linux/list.h> 153#include <linux/list.h>
31#include <linux/hash.h> 154#include <linux/hash.h>
32#include <linux/sched.h> 155#include <linux/sched.h>
156#include <linux/seq_file.h>
33 157
34#include <asm/cache.h> 158#include <asm/cache.h>
35#include <asm/setup.h> 159#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
59static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); 183static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
60static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); 184static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
61 185
186static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
187
62RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 188RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
63RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 189RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
64 190
191/* We might hit two boundary violations at the start and end, at max each
192 * boundary violation will require three middle nodes. */
193RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
194
65static inline unsigned p2m_top_index(unsigned long pfn) 195static inline unsigned p2m_top_index(unsigned long pfn)
66{ 196{
67 BUG_ON(pfn >= MAX_P2M_PFN); 197 BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
136 * - After resume we're called from within stop_machine, but the mfn 266 * - After resume we're called from within stop_machine, but the mfn
137 * tree should alreay be completely allocated. 267 * tree should alreay be completely allocated.
138 */ 268 */
139void xen_build_mfn_list_list(void) 269void __ref xen_build_mfn_list_list(void)
140{ 270{
141 unsigned long pfn; 271 unsigned long pfn;
142 272
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
221 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); 351 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
222 p2m_top_init(p2m_top); 352 p2m_top_init(p2m_top);
223 353
354 p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
355 p2m_init(p2m_identity);
356
224 /* 357 /*
225 * The domain builder gives us a pre-constructed p2m array in 358 * The domain builder gives us a pre-constructed p2m array in
226 * mfn_list for all the pages initially given to us, so we just 359 * mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
266 mididx = p2m_mid_index(pfn); 399 mididx = p2m_mid_index(pfn);
267 idx = p2m_index(pfn); 400 idx = p2m_index(pfn);
268 401
402 /*
403 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
404 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
405 * would be wrong.
406 */
407 if (p2m_top[topidx][mididx] == p2m_identity)
408 return IDENTITY_FRAME(pfn);
409
269 return p2m_top[topidx][mididx][idx]; 410 return p2m_top[topidx][mididx][idx];
270} 411}
271EXPORT_SYMBOL_GPL(get_phys_to_machine); 412EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
335 p2m_top_mfn_p[topidx] = mid_mfn; 476 p2m_top_mfn_p[topidx] = mid_mfn;
336 } 477 }
337 478
338 if (p2m_top[topidx][mididx] == p2m_missing) { 479 if (p2m_top[topidx][mididx] == p2m_identity ||
480 p2m_top[topidx][mididx] == p2m_missing) {
339 /* p2m leaf page is missing */ 481 /* p2m leaf page is missing */
340 unsigned long *p2m; 482 unsigned long *p2m;
483 unsigned long *p2m_orig = p2m_top[topidx][mididx];
341 484
342 p2m = alloc_p2m_page(); 485 p2m = alloc_p2m_page();
343 if (!p2m) 486 if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
345 488
346 p2m_init(p2m); 489 p2m_init(p2m);
347 490
348 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) 491 if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
349 free_p2m_page(p2m); 492 free_p2m_page(p2m);
350 else 493 else
351 mid_mfn[mididx] = virt_to_mfn(p2m); 494 mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
354 return true; 497 return true;
355} 498}
356 499
500bool __early_alloc_p2m(unsigned long pfn)
501{
502 unsigned topidx, mididx, idx;
503
504 topidx = p2m_top_index(pfn);
505 mididx = p2m_mid_index(pfn);
506 idx = p2m_index(pfn);
507
508 /* Pfff.. No boundary cross-over, lets get out. */
509 if (!idx)
510 return false;
511
512 WARN(p2m_top[topidx][mididx] == p2m_identity,
513 "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
514 topidx, mididx);
515
516 /*
517 * Could be done by xen_build_dynamic_phys_to_machine..
518 */
519 if (p2m_top[topidx][mididx] != p2m_missing)
520 return false;
521
522 /* Boundary cross-over for the edges: */
523 if (idx) {
524 unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
525
526 p2m_init(p2m);
527
528 p2m_top[topidx][mididx] = p2m;
529
530 }
531 return idx != 0;
532}
533unsigned long set_phys_range_identity(unsigned long pfn_s,
534 unsigned long pfn_e)
535{
536 unsigned long pfn;
537
538 if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
539 return 0;
540
541 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
542 return pfn_e - pfn_s;
543
544 if (pfn_s > pfn_e)
545 return 0;
546
547 for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
548 pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
549 pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
550 {
551 unsigned topidx = p2m_top_index(pfn);
552 if (p2m_top[topidx] == p2m_mid_missing) {
553 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
554
555 p2m_mid_init(mid);
556
557 p2m_top[topidx] = mid;
558 }
559 }
560
561 __early_alloc_p2m(pfn_s);
562 __early_alloc_p2m(pfn_e);
563
564 for (pfn = pfn_s; pfn < pfn_e; pfn++)
565 if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
566 break;
567
568 if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
569 "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
570 (pfn_e - pfn_s) - (pfn - pfn_s)))
571 printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
572
573 return pfn - pfn_s;
574}
575
357/* Try to install p2m mapping; fail if intermediate bits missing */ 576/* Try to install p2m mapping; fail if intermediate bits missing */
358bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) 577bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
359{ 578{
360 unsigned topidx, mididx, idx; 579 unsigned topidx, mididx, idx;
361 580
581 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
582 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
583 return true;
584 }
362 if (unlikely(pfn >= MAX_P2M_PFN)) { 585 if (unlikely(pfn >= MAX_P2M_PFN)) {
363 BUG_ON(mfn != INVALID_P2M_ENTRY); 586 BUG_ON(mfn != INVALID_P2M_ENTRY);
364 return true; 587 return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
368 mididx = p2m_mid_index(pfn); 591 mididx = p2m_mid_index(pfn);
369 idx = p2m_index(pfn); 592 idx = p2m_index(pfn);
370 593
594 /* For sparse holes were the p2m leaf has real PFN along with
595 * PCI holes, stick in the PFN as the MFN value.
596 */
597 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
598 if (p2m_top[topidx][mididx] == p2m_identity)
599 return true;
600
601 /* Swap over from MISSING to IDENTITY if needed. */
602 if (p2m_top[topidx][mididx] == p2m_missing) {
603 WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
604 p2m_identity) != p2m_missing);
605 return true;
606 }
607 }
608
371 if (p2m_top[topidx][mididx] == p2m_missing) 609 if (p2m_top[topidx][mididx] == p2m_missing)
372 return mfn == INVALID_P2M_ENTRY; 610 return mfn == INVALID_P2M_ENTRY;
373 611
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
378 616
379bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 617bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
380{ 618{
381 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
382 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
383 return true;
384 }
385
386 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 619 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
387 if (!alloc_p2m(pfn)) 620 if (!alloc_p2m(pfn))
388 return false; 621 return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
421{ 654{
422 unsigned long flags; 655 unsigned long flags;
423 unsigned long pfn; 656 unsigned long pfn;
424 unsigned long address; 657 unsigned long uninitialized_var(address);
425 unsigned level; 658 unsigned level;
426 pte_t *ptep = NULL; 659 pte_t *ptep = NULL;
427 660
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
455 unsigned long flags; 688 unsigned long flags;
456 unsigned long mfn; 689 unsigned long mfn;
457 unsigned long pfn; 690 unsigned long pfn;
458 unsigned long address; 691 unsigned long uninitialized_var(address);
459 unsigned level; 692 unsigned level;
460 pte_t *ptep = NULL; 693 pte_t *ptep = NULL;
461 694
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
520 return ret; 753 return ret;
521} 754}
522EXPORT_SYMBOL_GPL(m2p_find_override_pfn); 755EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
756
757#ifdef CONFIG_XEN_DEBUG_FS
758
759int p2m_dump_show(struct seq_file *m, void *v)
760{
761 static const char * const level_name[] = { "top", "middle",
762 "entry", "abnormal" };
763 static const char * const type_name[] = { "identity", "missing",
764 "pfn", "abnormal"};
765#define TYPE_IDENTITY 0
766#define TYPE_MISSING 1
767#define TYPE_PFN 2
768#define TYPE_UNKNOWN 3
769 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
770 unsigned int uninitialized_var(prev_level);
771 unsigned int uninitialized_var(prev_type);
772
773 if (!p2m_top)
774 return 0;
775
776 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
777 unsigned topidx = p2m_top_index(pfn);
778 unsigned mididx = p2m_mid_index(pfn);
779 unsigned idx = p2m_index(pfn);
780 unsigned lvl, type;
781
782 lvl = 4;
783 type = TYPE_UNKNOWN;
784 if (p2m_top[topidx] == p2m_mid_missing) {
785 lvl = 0; type = TYPE_MISSING;
786 } else if (p2m_top[topidx] == NULL) {
787 lvl = 0; type = TYPE_UNKNOWN;
788 } else if (p2m_top[topidx][mididx] == NULL) {
789 lvl = 1; type = TYPE_UNKNOWN;
790 } else if (p2m_top[topidx][mididx] == p2m_identity) {
791 lvl = 1; type = TYPE_IDENTITY;
792 } else if (p2m_top[topidx][mididx] == p2m_missing) {
793 lvl = 1; type = TYPE_MISSING;
794 } else if (p2m_top[topidx][mididx][idx] == 0) {
795 lvl = 2; type = TYPE_UNKNOWN;
796 } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
797 lvl = 2; type = TYPE_IDENTITY;
798 } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
799 lvl = 2; type = TYPE_MISSING;
800 } else if (p2m_top[topidx][mididx][idx] == pfn) {
801 lvl = 2; type = TYPE_PFN;
802 } else if (p2m_top[topidx][mididx][idx] != pfn) {
803 lvl = 2; type = TYPE_PFN;
804 }
805 if (pfn == 0) {
806 prev_level = lvl;
807 prev_type = type;
808 }
809 if (pfn == MAX_DOMAIN_PAGES-1) {
810 lvl = 3;
811 type = TYPE_UNKNOWN;
812 }
813 if (prev_type != type) {
814 seq_printf(m, " [0x%lx->0x%lx] %s\n",
815 prev_pfn_type, pfn, type_name[prev_type]);
816 prev_pfn_type = pfn;
817 prev_type = type;
818 }
819 if (prev_level != lvl) {
820 seq_printf(m, " [0x%lx->0x%lx] level %s\n",
821 prev_pfn_level, pfn, level_name[prev_level]);
822 prev_pfn_level = pfn;
823 prev_level = lvl;
824 }
825 }
826 return 0;
827#undef TYPE_IDENTITY
828#undef TYPE_MISSING
829#undef TYPE_PFN
830#undef TYPE_UNKNOWN
831}
832#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a50d446..fa0269a99377 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
52 52
53static __init void xen_add_extra_mem(unsigned long pages) 53static __init void xen_add_extra_mem(unsigned long pages)
54{ 54{
55 unsigned long pfn;
56
55 u64 size = (u64)pages * PAGE_SIZE; 57 u64 size = (u64)pages * PAGE_SIZE;
56 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
57 59
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
66 xen_extra_mem_size += size; 68 xen_extra_mem_size += size;
67 69
68 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
71
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
69} 74}
70 75
71static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 76static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
104 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
105 start, end, ret); 110 start, end, ret);
106 if (ret == 1) { 111 if (ret == 1) {
107 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
108 len++; 113 len++;
109 } 114 }
110 } 115 }
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
138 return released; 143 return released;
139} 144}
140 145
146static unsigned long __init xen_set_identity(const struct e820entry *list,
147 ssize_t map_size)
148{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
150 phys_addr_t start_pci = last;
151 const struct e820entry *entry;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164
165 /* Skip over the 1MB region. */
166 if (last > end)
167 continue;
168
169 if (entry->type == E820_RAM) {
170 if (start > start_pci)
171 identity += set_phys_range_identity(
172 PFN_UP(start_pci), PFN_DOWN(start));
173
174 /* Without saving 'last' we would gooble RAM too
175 * at the end of the loop. */
176 last = end;
177 start_pci = end;
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 }
183 if (last > start_pci)
184 identity += set_phys_range_identity(
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187}
141/** 188/**
142 * machine_specific_memory_setup - Hook for machine specific memory setup. 189 * machine_specific_memory_setup - Hook for machine specific memory setup.
143 **/ 190 **/
144char * __init xen_memory_setup(void) 191char * __init xen_memory_setup(void)
145{ 192{
146 static struct e820entry map[E820MAX] __initdata; 193 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
147 195
148 unsigned long max_pfn = xen_start_info->nr_pages; 196 unsigned long max_pfn = xen_start_info->nr_pages;
149 unsigned long long mem_end; 197 unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
151 struct xen_memory_map memmap; 199 struct xen_memory_map memmap;
152 unsigned long extra_pages = 0; 200 unsigned long extra_pages = 0;
153 unsigned long extra_limit; 201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
154 int i; 203 int i;
155 int op; 204 int op;
156 205
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
176 } 225 }
177 BUG_ON(rc); 226 BUG_ON(rc);
178 227
228 memcpy(map_raw, map, sizeof(map));
179 e820.nr_map = 0; 229 e820.nr_map = 0;
180 xen_extra_mem_start = mem_end; 230 xen_extra_mem_start = mem_end;
181 for (i = 0; i < memmap.nr_entries; i++) { 231 for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
194 end -= delta; 244 end -= delta;
195 245
196 extra_pages += PFN_DOWN(delta); 246 extra_pages += PFN_DOWN(delta);
247 /*
248 * Set RAM below 4GB that is not for us to be unusable.
249 * This prevents "System RAM" address space from being
250 * used as potential resource for I/O address (happens
251 * when 'allocate_resource' is called).
252 */
253 if (delta &&
254 (xen_initial_domain() && end < 0x100000000ULL))
255 e820_add_region(end, delta, E820_UNUSABLE);
197 } 256 }
198 257
199 if (map[i].size > 0 && end > xen_extra_mem_start) 258 if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
251 310
252 xen_add_extra_mem(extra_pages); 311 xen_add_extra_mem(extra_pages);
253 312
313 /*
314 * Set P2M for all non-RAM pages and E820 gaps to be identity
315 * type PFNs. We supply it with the non-sanitized version
316 * of the E820.
317 */
318 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
319 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
254 return "Xen"; 320 return "Xen";
255} 321}
256 322
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c7959045..30612441ed99 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
509 xen_fill_possible_map(); 509 xen_fill_possible_map();
510 xen_init_spinlocks(); 510 xen_init_spinlocks();
511} 511}
512
513static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
514{
515 native_smp_prepare_cpus(max_cpus);
516 WARN_ON(xen_smp_intr_init(0));
517
518 if (!xen_have_vector_callback)
519 return;
520 xen_init_lock_cpu(0);
521 xen_init_spinlocks();
522}
523
524static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
525{
526 int rc;
527 rc = native_cpu_up(cpu);
528 WARN_ON (xen_smp_intr_init(cpu));
529 return rc;
530}
531
532static void xen_hvm_cpu_die(unsigned int cpu)
533{
534 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
535 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
536 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
537 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
538 native_cpu_die(cpu);
539}
540
541void __init xen_hvm_smp_init(void)
542{
543 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
544 smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
545 smp_ops.cpu_up = xen_hvm_cpu_up;
546 smp_ops.cpu_die = xen_hvm_cpu_die;
547 smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
548 smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
549}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a129b5..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
12#include "xen-ops.h" 12#include "xen-ops.h"
13#include "mmu.h" 13#include "mmu.h"
14 14
15void xen_pre_suspend(void) 15void xen_arch_pre_suspend(void)
16{ 16{
17 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); 17 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
18 xen_start_info->console.domU.mfn = 18 xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
26 BUG(); 26 BUG();
27} 27}
28 28
29void xen_hvm_post_suspend(int suspend_cancelled) 29void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM
31 int cpu; 32 int cpu;
32 xen_hvm_init_shared_info(); 33 xen_hvm_init_shared_info();
33 xen_callback_vector(); 34 xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
37 xen_setup_runstate_info(cpu); 38 xen_setup_runstate_info(cpu);
38 } 39 }
39 } 40 }
41#endif
40} 42}
41 43
42void xen_post_suspend(int suspend_cancelled) 44void xen_arch_post_suspend(int suspend_cancelled)
43{ 45{
44 xen_build_mfn_list_list(); 46 xen_build_mfn_list_list();
45 47
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a5..2e2d370a47b1 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
397 name = "<timer kasprintf failed>"; 397 name = "<timer kasprintf failed>";
398 398
399 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 399 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
400 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, 400 IRQF_DISABLED|IRQF_PERCPU|
401 IRQF_NOBALANCING|IRQF_TIMER|
402 IRQF_FORCE_RESUME,
401 name, NULL); 403 name, NULL);
402 404
403 evt = &per_cpu(xen_clock_events, cpu); 405 evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf985757..3112f55638c4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
64 64
65#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
66void xen_smp_init(void); 66void xen_smp_init(void);
67void __init xen_hvm_smp_init(void);
67 68
68extern cpumask_var_t xen_cpu_initialized_map; 69extern cpumask_var_t xen_cpu_initialized_map;
69#else 70#else
70static inline void xen_smp_init(void) {} 71static inline void xen_smp_init(void) {}
72static inline void xen_hvm_smp_init(void) {}
71#endif 73#endif
72 74
73#ifdef CONFIG_PARAVIRT_SPINLOCKS 75#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
17#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." 17#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
18#endif 18#endif
19 19
20#include <linux/list.h>
21#include <linux/spinlock.h>
22#include <asm/atomic.h>
23#include <asm/system.h>
24
25/*
26 * the semaphore definition
27 */
28struct rw_semaphore {
29 signed long count;
30#define RWSEM_UNLOCKED_VALUE 0x00000000 20#define RWSEM_UNLOCKED_VALUE 0x00000000
31#define RWSEM_ACTIVE_BIAS 0x00000001 21#define RWSEM_ACTIVE_BIAS 0x00000001
32#define RWSEM_ACTIVE_MASK 0x0000ffff 22#define RWSEM_ACTIVE_MASK 0x0000ffff
33#define RWSEM_WAITING_BIAS (-0x00010000) 23#define RWSEM_WAITING_BIAS (-0x00010000)
34#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 24#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
35#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
36 spinlock_t wait_lock;
37 struct list_head wait_list;
38};
39
40#define __RWSEM_INITIALIZER(name) \
41 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
42 LIST_HEAD_INIT((name).wait_list) }
43
44#define DECLARE_RWSEM(name) \
45 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
46
47extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
48extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
49extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
50extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
51
52static inline void init_rwsem(struct rw_semaphore *sem)
53{
54 sem->count = RWSEM_UNLOCKED_VALUE;
55 spin_lock_init(&sem->wait_lock);
56 INIT_LIST_HEAD(&sem->wait_list);
57}
58 26
59/* 27/*
60 * lock for reading 28 * lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
160 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 128 return atomic_add_return(delta, (atomic_t *)(&sem->count));
161} 129}
162 130
163static inline int rwsem_is_locked(struct rw_semaphore *sem)
164{
165 return (sem->count != 0);
166}
167
168#endif /* _XTENSA_RWSEM_H */ 131#endif /* _XTENSA_RWSEM_H */
diff --git a/block/blk-lib.c b/block/blk-lib.c
index eec78becb355..bd3e8df4d5e2 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -109,7 +109,6 @@ struct bio_batch
109 atomic_t done; 109 atomic_t done;
110 unsigned long flags; 110 unsigned long flags;
111 struct completion *wait; 111 struct completion *wait;
112 bio_end_io_t *end_io;
113}; 112};
114 113
115static void bio_batch_end_io(struct bio *bio, int err) 114static void bio_batch_end_io(struct bio *bio, int err)
@@ -122,12 +121,9 @@ static void bio_batch_end_io(struct bio *bio, int err)
122 else 121 else
123 clear_bit(BIO_UPTODATE, &bb->flags); 122 clear_bit(BIO_UPTODATE, &bb->flags);
124 } 123 }
125 if (bb) { 124 if (bb)
126 if (bb->end_io) 125 if (atomic_dec_and_test(&bb->done))
127 bb->end_io(bio, err); 126 complete(bb->wait);
128 atomic_inc(&bb->done);
129 complete(bb->wait);
130 }
131 bio_put(bio); 127 bio_put(bio);
132} 128}
133 129
@@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
150 int ret; 146 int ret;
151 struct bio *bio; 147 struct bio *bio;
152 struct bio_batch bb; 148 struct bio_batch bb;
153 unsigned int sz, issued = 0; 149 unsigned int sz;
154 DECLARE_COMPLETION_ONSTACK(wait); 150 DECLARE_COMPLETION_ONSTACK(wait);
155 151
156 atomic_set(&bb.done, 0); 152 atomic_set(&bb.done, 1);
157 bb.flags = 1 << BIO_UPTODATE; 153 bb.flags = 1 << BIO_UPTODATE;
158 bb.wait = &wait; 154 bb.wait = &wait;
159 bb.end_io = NULL;
160 155
161submit: 156submit:
162 ret = 0; 157 ret = 0;
@@ -185,12 +180,12 @@ submit:
185 break; 180 break;
186 } 181 }
187 ret = 0; 182 ret = 0;
188 issued++; 183 atomic_inc(&bb.done);
189 submit_bio(WRITE, bio); 184 submit_bio(WRITE, bio);
190 } 185 }
191 186
192 /* Wait for bios in-flight */ 187 /* Wait for bios in-flight */
193 while (issued != atomic_read(&bb.done)) 188 if (!atomic_dec_and_test(&bb.done))
194 wait_for_completion(&wait); 189 wait_for_completion(&wait);
195 190
196 if (!test_bit(BIO_UPTODATE, &bb.flags)) 191 if (!test_bit(BIO_UPTODATE, &bb.flags))
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d7aa39e349a6..9cb8668ff5f4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
120#define EXTENDED (1<<EXT_SHIFT) 120#define EXTENDED (1<<EXT_SHIFT)
121#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 121#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
122#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 122#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
123#define EMULATED_HD_DISK_MINOR_OFFSET (0)
124#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
125#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
126#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
123 127
124#define DEV_NAME "xvd" /* name in /dev */ 128#define DEV_NAME "xvd" /* name in /dev */
125 129
@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
281 info->shadow[id].request = req; 285 info->shadow[id].request = req;
282 286
283 ring_req->id = id; 287 ring_req->id = id;
284 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); 288 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
285 ring_req->handle = info->handle; 289 ring_req->handle = info->handle;
286 290
287 ring_req->operation = rq_data_dir(req) ? 291 ring_req->operation = rq_data_dir(req) ?
@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
317 rq_data_dir(req) ); 321 rq_data_dir(req) );
318 322
319 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 323 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
320 ring_req->seg[i] = 324 ring_req->u.rw.seg[i] =
321 (struct blkif_request_segment) { 325 (struct blkif_request_segment) {
322 .gref = ref, 326 .gref = ref,
323 .first_sect = fsect, 327 .first_sect = fsect,
@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
434 info->feature_flush ? "enabled" : "disabled"); 438 info->feature_flush ? "enabled" : "disabled");
435} 439}
436 440
441static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
442{
443 int major;
444 major = BLKIF_MAJOR(vdevice);
445 *minor = BLKIF_MINOR(vdevice);
446 switch (major) {
447 case XEN_IDE0_MAJOR:
448 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
449 *minor = ((*minor / 64) * PARTS_PER_DISK) +
450 EMULATED_HD_DISK_MINOR_OFFSET;
451 break;
452 case XEN_IDE1_MAJOR:
453 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
454 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
455 EMULATED_HD_DISK_MINOR_OFFSET;
456 break;
457 case XEN_SCSI_DISK0_MAJOR:
458 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
459 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
460 break;
461 case XEN_SCSI_DISK1_MAJOR:
462 case XEN_SCSI_DISK2_MAJOR:
463 case XEN_SCSI_DISK3_MAJOR:
464 case XEN_SCSI_DISK4_MAJOR:
465 case XEN_SCSI_DISK5_MAJOR:
466 case XEN_SCSI_DISK6_MAJOR:
467 case XEN_SCSI_DISK7_MAJOR:
468 *offset = (*minor / PARTS_PER_DISK) +
469 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
470 EMULATED_SD_DISK_NAME_OFFSET;
471 *minor = *minor +
472 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
473 EMULATED_SD_DISK_MINOR_OFFSET;
474 break;
475 case XEN_SCSI_DISK8_MAJOR:
476 case XEN_SCSI_DISK9_MAJOR:
477 case XEN_SCSI_DISK10_MAJOR:
478 case XEN_SCSI_DISK11_MAJOR:
479 case XEN_SCSI_DISK12_MAJOR:
480 case XEN_SCSI_DISK13_MAJOR:
481 case XEN_SCSI_DISK14_MAJOR:
482 case XEN_SCSI_DISK15_MAJOR:
483 *offset = (*minor / PARTS_PER_DISK) +
484 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
485 EMULATED_SD_DISK_NAME_OFFSET;
486 *minor = *minor +
487 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
488 EMULATED_SD_DISK_MINOR_OFFSET;
489 break;
490 case XENVBD_MAJOR:
491 *offset = *minor / PARTS_PER_DISK;
492 break;
493 default:
494 printk(KERN_WARNING "blkfront: your disk configuration is "
495 "incorrect, please use an xvd device instead\n");
496 return -ENODEV;
497 }
498 return 0;
499}
437 500
438static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 501static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
439 struct blkfront_info *info, 502 struct blkfront_info *info,
@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
441{ 504{
442 struct gendisk *gd; 505 struct gendisk *gd;
443 int nr_minors = 1; 506 int nr_minors = 1;
444 int err = -ENODEV; 507 int err;
445 unsigned int offset; 508 unsigned int offset;
446 int minor; 509 int minor;
447 int nr_parts; 510 int nr_parts;
@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
456 } 519 }
457 520
458 if (!VDEV_IS_EXTENDED(info->vdevice)) { 521 if (!VDEV_IS_EXTENDED(info->vdevice)) {
459 minor = BLKIF_MINOR(info->vdevice); 522 err = xen_translate_vdev(info->vdevice, &minor, &offset);
460 nr_parts = PARTS_PER_DISK; 523 if (err)
524 return err;
525 nr_parts = PARTS_PER_DISK;
461 } else { 526 } else {
462 minor = BLKIF_MINOR_EXT(info->vdevice); 527 minor = BLKIF_MINOR_EXT(info->vdevice);
463 nr_parts = PARTS_PER_EXT_DISK; 528 nr_parts = PARTS_PER_EXT_DISK;
529 offset = minor / nr_parts;
530 if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
531 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
532 "emulated IDE disks,\n\t choose an xvd device name"
533 "from xvde on\n", info->vdevice);
464 } 534 }
535 err = -ENODEV;
465 536
466 if ((minor % nr_parts) == 0) 537 if ((minor % nr_parts) == 0)
467 nr_minors = nr_parts; 538 nr_minors = nr_parts;
@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
475 if (gd == NULL) 546 if (gd == NULL)
476 goto release; 547 goto release;
477 548
478 offset = minor / nr_parts;
479
480 if (nr_minors > 1) { 549 if (nr_minors > 1) {
481 if (offset < 26) 550 if (offset < 26)
482 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 551 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
615{ 684{
616 int i; 685 int i;
617 for (i = 0; i < s->req.nr_segments; i++) 686 for (i = 0; i < s->req.nr_segments; i++)
618 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 687 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
619} 688}
620 689
621static irqreturn_t blkif_interrupt(int irq, void *dev_id) 690static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
932 /* Rewrite any grant references invalidated by susp/resume. */ 1001 /* Rewrite any grant references invalidated by susp/resume. */
933 for (j = 0; j < req->nr_segments; j++) 1002 for (j = 0; j < req->nr_segments; j++)
934 gnttab_grant_foreign_access_ref( 1003 gnttab_grant_foreign_access_ref(
935 req->seg[j].gref, 1004 req->u.rw.seg[j].gref,
936 info->xbdev->otherend_id, 1005 info->xbdev->otherend_id,
937 pfn_to_mfn(info->shadow[req->id].frame[j]), 1006 pfn_to_mfn(info->shadow[req->id].frame[j]),
938 rq_data_dir(info->shadow[req->id].request)); 1007 rq_data_dir(info->shadow[req->id].request));
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 7855f9f45b8e..62787e30d508 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -900,6 +900,14 @@ static void sender(void *send_info,
900 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); 900 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
901#endif 901#endif
902 902
903 /*
904 * last_timeout_jiffies is updated here to avoid
905 * smi_timeout() handler passing very large time_diff
906 * value to smi_event_handler() that causes
907 * the send command to abort.
908 */
909 smi_info->last_timeout_jiffies = jiffies;
910
903 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES); 911 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
904 912
905 if (smi_info->thread) 913 if (smi_info->thread)
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index cead8e6ff345..7f6f01a4b145 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -326,6 +326,7 @@ static DEFINE_PCI_DEVICE_TABLE(ioh_gpio_pcidev_id) = {
326 { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) }, 326 { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) },
327 { 0, } 327 { 0, }
328}; 328};
329MODULE_DEVICE_TABLE(pci, ioh_gpio_pcidev_id);
329 330
330static struct pci_driver ioh_gpio_driver = { 331static struct pci_driver ioh_gpio_driver = {
331 .name = "ml_ioh_gpio", 332 .name = "ml_ioh_gpio",
diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index 0eba0a75c804..2c6af8705103 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c
@@ -286,6 +286,7 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = {
286 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, 286 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
287 { 0, } 287 { 0, }
288}; 288};
289MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id);
289 290
290static struct pci_driver pch_gpio_driver = { 291static struct pci_driver pch_gpio_driver = {
291 .name = "pch_gpio", 292 .name = "pch_gpio",
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3e6f486f4605..2abe240dae58 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1553,7 +1553,17 @@
1553 1553
1554/* Backlight control */ 1554/* Backlight control */
1555#define BLC_PWM_CTL 0x61254 1555#define BLC_PWM_CTL 0x61254
1556#define BACKLIGHT_MODULATION_FREQ_SHIFT (17)
1556#define BLC_PWM_CTL2 0x61250 /* 965+ only */ 1557#define BLC_PWM_CTL2 0x61250 /* 965+ only */
1558#define BLM_COMBINATION_MODE (1 << 30)
1559/*
1560 * This is the most significant 15 bits of the number of backlight cycles in a
1561 * complete cycle of the modulated backlight control.
1562 *
1563 * The actual value is this field multiplied by two.
1564 */
1565#define BACKLIGHT_MODULATION_FREQ_MASK (0x7fff << 17)
1566#define BLM_LEGACY_MODE (1 << 16)
1557/* 1567/*
1558 * This is the number of cycles out of the backlight modulation cycle for which 1568 * This is the number of cycles out of the backlight modulation cycle for which
1559 * the backlight is on. 1569 * the backlight is on.
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index d860abeda70f..f8f86e57df22 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -30,6 +30,8 @@
30 30
31#include "intel_drv.h" 31#include "intel_drv.h"
32 32
33#define PCI_LBPC 0xf4 /* legacy/combination backlight modes */
34
33void 35void
34intel_fixed_panel_mode(struct drm_display_mode *fixed_mode, 36intel_fixed_panel_mode(struct drm_display_mode *fixed_mode,
35 struct drm_display_mode *adjusted_mode) 37 struct drm_display_mode *adjusted_mode)
@@ -110,6 +112,19 @@ done:
110 dev_priv->pch_pf_size = (width << 16) | height; 112 dev_priv->pch_pf_size = (width << 16) | height;
111} 113}
112 114
115static int is_backlight_combination_mode(struct drm_device *dev)
116{
117 struct drm_i915_private *dev_priv = dev->dev_private;
118
119 if (INTEL_INFO(dev)->gen >= 4)
120 return I915_READ(BLC_PWM_CTL2) & BLM_COMBINATION_MODE;
121
122 if (IS_GEN2(dev))
123 return I915_READ(BLC_PWM_CTL) & BLM_LEGACY_MODE;
124
125 return 0;
126}
127
113static u32 i915_read_blc_pwm_ctl(struct drm_i915_private *dev_priv) 128static u32 i915_read_blc_pwm_ctl(struct drm_i915_private *dev_priv)
114{ 129{
115 u32 val; 130 u32 val;
@@ -166,6 +181,9 @@ u32 intel_panel_get_max_backlight(struct drm_device *dev)
166 if (INTEL_INFO(dev)->gen < 4) 181 if (INTEL_INFO(dev)->gen < 4)
167 max &= ~1; 182 max &= ~1;
168 } 183 }
184
185 if (is_backlight_combination_mode(dev))
186 max *= 0xff;
169 } 187 }
170 188
171 DRM_DEBUG_DRIVER("max backlight PWM = %d\n", max); 189 DRM_DEBUG_DRIVER("max backlight PWM = %d\n", max);
@@ -183,6 +201,14 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
183 val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; 201 val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
184 if (IS_PINEVIEW(dev)) 202 if (IS_PINEVIEW(dev))
185 val >>= 1; 203 val >>= 1;
204
205 if (is_backlight_combination_mode(dev)){
206 u8 lbpc;
207
208 val &= ~1;
209 pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc);
210 val *= lbpc;
211 }
186 } 212 }
187 213
188 DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val); 214 DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val);
@@ -205,6 +231,16 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
205 231
206 if (HAS_PCH_SPLIT(dev)) 232 if (HAS_PCH_SPLIT(dev))
207 return intel_pch_panel_set_backlight(dev, level); 233 return intel_pch_panel_set_backlight(dev, level);
234
235 if (is_backlight_combination_mode(dev)){
236 u32 max = intel_panel_get_max_backlight(dev);
237 u8 lbpc;
238
239 lbpc = level * 0xfe / max + 1;
240 level /= lbpc;
241 pci_write_config_byte(dev->pdev, PCI_LBPC, lbpc);
242 }
243
208 tmp = I915_READ(BLC_PWM_CTL); 244 tmp = I915_READ(BLC_PWM_CTL);
209 if (IS_PINEVIEW(dev)) { 245 if (IS_PINEVIEW(dev)) {
210 tmp &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1); 246 tmp &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d270b3ff896b..6140ea1de45a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2194,7 +2194,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
2194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 2194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2195 } 2195 }
2196 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2196 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2197 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
2198 r700_vram_gtt_location(rdev, &rdev->mc); 2197 r700_vram_gtt_location(rdev, &rdev->mc);
2199 radeon_update_bandwidth_info(rdev); 2198 radeon_update_bandwidth_info(rdev);
2200 2199
@@ -2934,7 +2933,7 @@ static int evergreen_startup(struct radeon_device *rdev)
2934 /* XXX: ontario has problems blitting to gart at the moment */ 2933 /* XXX: ontario has problems blitting to gart at the moment */
2935 if (rdev->family == CHIP_PALM) { 2934 if (rdev->family == CHIP_PALM) {
2936 rdev->asic->copy = NULL; 2935 rdev->asic->copy = NULL;
2937 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 2936 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2938 } 2937 }
2939 2938
2940 /* allocate wb buffer */ 2939 /* allocate wb buffer */
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2adfb03f479b..2be698e78ff2 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -623,7 +623,7 @@ done:
623 dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 623 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
624 return r; 624 return r;
625 } 625 }
626 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 626 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
627 return 0; 627 return 0;
628} 628}
629 629
@@ -631,7 +631,7 @@ void evergreen_blit_fini(struct radeon_device *rdev)
631{ 631{
632 int r; 632 int r;
633 633
634 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 634 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
635 if (rdev->r600_blit.shader_obj == NULL) 635 if (rdev->r600_blit.shader_obj == NULL)
636 return; 636 return;
637 /* If we can't reserve the bo, unref should be enough to destroy 637 /* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 93fa735c8c1a..e372f9e1e5ce 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -70,23 +70,6 @@ MODULE_FIRMWARE(FIRMWARE_R520);
70 70
71void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 71void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
72{ 72{
73 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
74 u32 tmp;
75
76 /* make sure flip is at vb rather than hb */
77 tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset);
78 tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL;
79 /* make sure pending bit is asserted */
80 tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
81 WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp);
82
83 /* set pageflip to happen as late as possible in the vblank interval.
84 * same field for crtc1/2
85 */
86 tmp = RREG32(RADEON_CRTC_GEN_CNTL);
87 tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK;
88 WREG32(RADEON_CRTC_GEN_CNTL, tmp);
89
90 /* enable the pflip int */ 73 /* enable the pflip int */
91 radeon_irq_kms_pflip_irq_get(rdev, crtc); 74 radeon_irq_kms_pflip_irq_get(rdev, crtc);
92} 75}
@@ -1041,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1041 return r; 1024 return r;
1042 } 1025 }
1043 rdev->cp.ready = true; 1026 rdev->cp.ready = true;
1044 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 1027 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1045 return 0; 1028 return 0;
1046} 1029}
1047 1030
@@ -1059,7 +1042,7 @@ void r100_cp_fini(struct radeon_device *rdev)
1059void r100_cp_disable(struct radeon_device *rdev) 1042void r100_cp_disable(struct radeon_device *rdev)
1060{ 1043{
1061 /* Disable ring */ 1044 /* Disable ring */
1062 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 1045 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1063 rdev->cp.ready = false; 1046 rdev->cp.ready = false;
1064 WREG32(RADEON_CP_CSQ_MODE, 0); 1047 WREG32(RADEON_CP_CSQ_MODE, 0);
1065 WREG32(RADEON_CP_CSQ_CNTL, 0); 1048 WREG32(RADEON_CP_CSQ_CNTL, 0);
@@ -2329,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
2329 /* FIXME we don't use the second aperture yet when we could use it */ 2312 /* FIXME we don't use the second aperture yet when we could use it */
2330 if (rdev->mc.visible_vram_size > rdev->mc.aper_size) 2313 if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2331 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2314 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2332 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
2333 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2315 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2334 if (rdev->flags & RADEON_IS_IGP) { 2316 if (rdev->flags & RADEON_IS_IGP) {
2335 uint32_t tom; 2317 uint32_t tom;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index de88624d5f87..9b3fad23b76c 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1255,7 +1255,6 @@ int r600_mc_init(struct radeon_device *rdev)
1255 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1255 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1256 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1256 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1257 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1257 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1258 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
1259 r600_vram_gtt_location(rdev, &rdev->mc); 1258 r600_vram_gtt_location(rdev, &rdev->mc);
1260 1259
1261 if (rdev->flags & RADEON_IS_IGP) { 1260 if (rdev->flags & RADEON_IS_IGP) {
@@ -1937,7 +1936,7 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
1937 */ 1936 */
1938void r600_cp_stop(struct radeon_device *rdev) 1937void r600_cp_stop(struct radeon_device *rdev)
1939{ 1938{
1940 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 1939 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1941 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); 1940 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1942 WREG32(SCRATCH_UMSK, 0); 1941 WREG32(SCRATCH_UMSK, 0);
1943} 1942}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 41f7aafc97c4..df68d91e8190 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -558,7 +558,7 @@ done:
558 dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 558 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
559 return r; 559 return r;
560 } 560 }
561 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 561 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
562 return 0; 562 return 0;
563} 563}
564 564
@@ -566,7 +566,7 @@ void r600_blit_fini(struct radeon_device *rdev)
566{ 566{
567 int r; 567 int r;
568 568
569 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 569 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
570 if (rdev->r600_blit.shader_obj == NULL) 570 if (rdev->r600_blit.shader_obj == NULL)
571 return; 571 return;
572 /* If we can't reserve the bo, unref should be enough to destroy 572 /* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 56c48b67ef3d..6b3429495118 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -345,7 +345,6 @@ struct radeon_mc {
345 * about vram size near mc fb location */ 345 * about vram size near mc fb location */
346 u64 mc_vram_size; 346 u64 mc_vram_size;
347 u64 visible_vram_size; 347 u64 visible_vram_size;
348 u64 active_vram_size;
349 u64 gtt_size; 348 u64 gtt_size;
350 u64 gtt_start; 349 u64 gtt_start;
351 u64 gtt_end; 350 u64 gtt_end;
@@ -1448,6 +1447,7 @@ extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *m
1448extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 1447extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
1449extern int radeon_resume_kms(struct drm_device *dev); 1448extern int radeon_resume_kms(struct drm_device *dev);
1450extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); 1449extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
1450extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
1451 1451
1452/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */ 1452/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */
1453extern bool r600_card_posted(struct radeon_device *rdev); 1453extern bool r600_card_posted(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index e75d63b8e21d..793c5e6026ad 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -834,6 +834,9 @@ static struct radeon_asic sumo_asic = {
834 .pm_finish = &evergreen_pm_finish, 834 .pm_finish = &evergreen_pm_finish,
835 .pm_init_profile = &rs780_pm_init_profile, 835 .pm_init_profile = &rs780_pm_init_profile,
836 .pm_get_dynpm_state = &r600_pm_get_dynpm_state, 836 .pm_get_dynpm_state = &r600_pm_get_dynpm_state,
837 .pre_page_flip = &evergreen_pre_page_flip,
838 .page_flip = &evergreen_page_flip,
839 .post_page_flip = &evergreen_post_page_flip,
837}; 840};
838 841
839static struct radeon_asic btc_asic = { 842static struct radeon_asic btc_asic = {
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index df95eb83dac6..1fe95dfe48c9 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -156,9 +156,12 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
156{ 156{
157 struct radeon_device *rdev = dev->dev_private; 157 struct radeon_device *rdev = dev->dev_private;
158 struct drm_radeon_gem_info *args = data; 158 struct drm_radeon_gem_info *args = data;
159 struct ttm_mem_type_manager *man;
160
161 man = &rdev->mman.bdev.man[TTM_PL_VRAM];
159 162
160 args->vram_size = rdev->mc.real_vram_size; 163 args->vram_size = rdev->mc.real_vram_size;
161 args->vram_visible = rdev->mc.real_vram_size; 164 args->vram_visible = (u64)man->size << PAGE_SHIFT;
162 if (rdev->stollen_vga_memory) 165 if (rdev->stollen_vga_memory)
163 args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory); 166 args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory);
164 args->vram_visible -= radeon_fbdev_total_size(rdev); 167 args->vram_visible -= radeon_fbdev_total_size(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index cf0638c3b7c7..78968b738e88 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -443,7 +443,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
443 (target_fb->bits_per_pixel * 8)); 443 (target_fb->bits_per_pixel * 8));
444 crtc_pitch |= crtc_pitch << 16; 444 crtc_pitch |= crtc_pitch << 16;
445 445
446 446 crtc_offset_cntl |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
447 if (tiling_flags & RADEON_TILING_MACRO) { 447 if (tiling_flags & RADEON_TILING_MACRO) {
448 if (ASIC_IS_R300(rdev)) 448 if (ASIC_IS_R300(rdev))
449 crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN | 449 crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
@@ -502,6 +502,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
502 gen_cntl_val = RREG32(gen_cntl_reg); 502 gen_cntl_val = RREG32(gen_cntl_reg);
503 gen_cntl_val &= ~(0xf << 8); 503 gen_cntl_val &= ~(0xf << 8);
504 gen_cntl_val |= (format << 8); 504 gen_cntl_val |= (format << 8);
505 gen_cntl_val &= ~RADEON_CRTC_VSTAT_MODE_MASK;
505 WREG32(gen_cntl_reg, gen_cntl_val); 506 WREG32(gen_cntl_reg, gen_cntl_val);
506 507
507 crtc_offset = (u32)base; 508 crtc_offset = (u32)base;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e5b2cf10cbf4..8389b4c63d12 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -589,6 +589,20 @@ void radeon_ttm_fini(struct radeon_device *rdev)
589 DRM_INFO("radeon: ttm finalized\n"); 589 DRM_INFO("radeon: ttm finalized\n");
590} 590}
591 591
592/* this should only be called at bootup or when userspace
593 * isn't running */
594void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
595{
596 struct ttm_mem_type_manager *man;
597
598 if (!rdev->mman.initialized)
599 return;
600
601 man = &rdev->mman.bdev.man[TTM_PL_VRAM];
602 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
603 man->size = size >> PAGE_SHIFT;
604}
605
592static struct vm_operations_struct radeon_ttm_vm_ops; 606static struct vm_operations_struct radeon_ttm_vm_ops;
593static const struct vm_operations_struct *ttm_vm_ops = NULL; 607static const struct vm_operations_struct *ttm_vm_ops = NULL;
594 608
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 5afe294ed51f..8af4679db23e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -751,7 +751,6 @@ void rs600_mc_init(struct radeon_device *rdev)
751 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 751 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
752 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 752 rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
753 rdev->mc.visible_vram_size = rdev->mc.aper_size; 753 rdev->mc.visible_vram_size = rdev->mc.aper_size;
754 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
755 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); 754 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
756 base = RREG32_MC(R_000004_MC_FB_LOCATION); 755 base = RREG32_MC(R_000004_MC_FB_LOCATION);
757 base = G_000004_MC_FB_START(base) << 16; 756 base = G_000004_MC_FB_START(base) << 16;
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 6638c8e4c81b..66c949b7c18c 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -157,7 +157,6 @@ void rs690_mc_init(struct radeon_device *rdev)
157 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 157 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
158 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 158 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
159 rdev->mc.visible_vram_size = rdev->mc.aper_size; 159 rdev->mc.visible_vram_size = rdev->mc.aper_size;
160 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
161 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION); 160 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
162 base = G_000100_MC_FB_START(base) << 16; 161 base = G_000100_MC_FB_START(base) << 16;
163 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); 162 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d8ba67690656..714ad45757d0 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -307,7 +307,7 @@ static void rv770_mc_program(struct radeon_device *rdev)
307 */ 307 */
308void r700_cp_stop(struct radeon_device *rdev) 308void r700_cp_stop(struct radeon_device *rdev)
309{ 309{
310 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 310 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
312 WREG32(SCRATCH_UMSK, 0); 312 WREG32(SCRATCH_UMSK, 0);
313} 313}
@@ -1123,7 +1123,6 @@ int rv770_mc_init(struct radeon_device *rdev)
1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1125 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1125 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1126 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
1127 r700_vram_gtt_location(rdev, &rdev->mc); 1126 r700_vram_gtt_location(rdev, &rdev->mc);
1128 radeon_update_bandwidth_info(rdev); 1127 radeon_update_bandwidth_info(rdev);
1129 1128
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 3f49dd376f02..6e06019015a5 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -37,7 +37,7 @@
37#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */ 37#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */
38#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */ 38#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */
39#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */ 39#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */
40#define SIO_LOCK_KEY 0xAA /* Key to diasble Super-I/O */ 40#define SIO_LOCK_KEY 0xAA /* Key to disable Super-I/O */
41 41
42#define SIO_REG_LDSEL 0x07 /* Logical device select */ 42#define SIO_REG_LDSEL 0x07 /* Logical device select */
43#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ 43#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */
@@ -2111,7 +2111,6 @@ static int f71882fg_remove(struct platform_device *pdev)
2111 int nr_fans = (data->type == f71882fg) ? 4 : 3; 2111 int nr_fans = (data->type == f71882fg) ? 4 : 3;
2112 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START); 2112 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
2113 2113
2114 platform_set_drvdata(pdev, NULL);
2115 if (data->hwmon_dev) 2114 if (data->hwmon_dev)
2116 hwmon_device_unregister(data->hwmon_dev); 2115 hwmon_device_unregister(data->hwmon_dev);
2117 2116
@@ -2178,6 +2177,7 @@ static int f71882fg_remove(struct platform_device *pdev)
2178 } 2177 }
2179 } 2178 }
2180 2179
2180 platform_set_drvdata(pdev, NULL);
2181 kfree(data); 2181 kfree(data);
2182 2182
2183 return 0; 2183 return 0;
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 2e067dd2ee51..50ea1f43bdc1 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -29,6 +29,7 @@
29#include <linux/pci.h> 29#include <linux/pci.h>
30#include <linux/mutex.h> 30#include <linux/mutex.h>
31#include <linux/ktime.h> 31#include <linux/ktime.h>
32#include <linux/slab.h>
32 33
33#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */ 34#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */
34#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */ 35#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index ef3bcb1ce864..61653f079671 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -249,7 +249,7 @@ static struct i2c_adapter ocores_adapter = {
249static int ocores_i2c_of_probe(struct platform_device* pdev, 249static int ocores_i2c_of_probe(struct platform_device* pdev,
250 struct ocores_i2c* i2c) 250 struct ocores_i2c* i2c)
251{ 251{
252 __be32* val; 252 const __be32* val;
253 253
254 val = of_get_property(pdev->dev.of_node, "regstep", NULL); 254 val = of_get_property(pdev->dev.of_node, "regstep", NULL);
255 if (!val) { 255 if (!val) {
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 829a2a1029f7..58a58c7eaa17 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -378,9 +378,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
378 * REVISIT: Some wkup sources might not be needed. 378 * REVISIT: Some wkup sources might not be needed.
379 */ 379 */
380 dev->westate = OMAP_I2C_WE_ALL; 380 dev->westate = OMAP_I2C_WE_ALL;
381 if (dev->rev < OMAP_I2C_REV_ON_4430) 381 omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
382 omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
383 dev->westate);
384 } 382 }
385 } 383 }
386 omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0); 384 omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
diff --git a/drivers/media/common/tuners/tda8290.c b/drivers/media/common/tuners/tda8290.c
index bc6a67768af1..8c4852114eeb 100644
--- a/drivers/media/common/tuners/tda8290.c
+++ b/drivers/media/common/tuners/tda8290.c
@@ -658,13 +658,13 @@ static int tda8290_probe(struct tuner_i2c_props *i2c_props)
658#define TDA8290_ID 0x89 658#define TDA8290_ID 0x89
659 u8 reg = 0x1f, id; 659 u8 reg = 0x1f, id;
660 struct i2c_msg msg_read[] = { 660 struct i2c_msg msg_read[] = {
661 { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg }, 661 { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
662 { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id }, 662 { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
663 }; 663 };
664 664
665 /* detect tda8290 */ 665 /* detect tda8290 */
666 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) { 666 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
667 printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n", 667 printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
668 __func__, reg); 668 __func__, reg);
669 return -ENODEV; 669 return -ENODEV;
670 } 670 }
@@ -685,13 +685,13 @@ static int tda8295_probe(struct tuner_i2c_props *i2c_props)
685#define TDA8295C2_ID 0x8b 685#define TDA8295C2_ID 0x8b
686 u8 reg = 0x2f, id; 686 u8 reg = 0x2f, id;
687 struct i2c_msg msg_read[] = { 687 struct i2c_msg msg_read[] = {
688 { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg }, 688 { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
689 { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id }, 689 { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
690 }; 690 };
691 691
692 /* detect tda8290 */ 692 /* detect tda8295 */
693 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) { 693 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
694 printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n", 694 printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
695 __func__, reg); 695 __func__, reg);
696 return -ENODEV; 696 return -ENODEV;
697 } 697 }
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index defd83964ce2..193cdb77b76a 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -870,6 +870,23 @@ static int dib7070p_tuner_attach(struct dvb_usb_adapter *adap)
870 return 0; 870 return 0;
871} 871}
872 872
873static int stk7700p_pid_filter(struct dvb_usb_adapter *adapter, int index,
874 u16 pid, int onoff)
875{
876 struct dib0700_state *st = adapter->dev->priv;
877 if (st->is_dib7000pc)
878 return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
879 return dib7000m_pid_filter(adapter->fe, index, pid, onoff);
880}
881
882static int stk7700p_pid_filter_ctrl(struct dvb_usb_adapter *adapter, int onoff)
883{
884 struct dib0700_state *st = adapter->dev->priv;
885 if (st->is_dib7000pc)
886 return dib7000p_pid_filter_ctrl(adapter->fe, onoff);
887 return dib7000m_pid_filter_ctrl(adapter->fe, onoff);
888}
889
873static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff) 890static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff)
874{ 891{
875 return dib7000p_pid_filter(adapter->fe, index, pid, onoff); 892 return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
@@ -1875,8 +1892,8 @@ struct dvb_usb_device_properties dib0700_devices[] = {
1875 { 1892 {
1876 .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, 1893 .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
1877 .pid_filter_count = 32, 1894 .pid_filter_count = 32,
1878 .pid_filter = stk70x0p_pid_filter, 1895 .pid_filter = stk7700p_pid_filter,
1879 .pid_filter_ctrl = stk70x0p_pid_filter_ctrl, 1896 .pid_filter_ctrl = stk7700p_pid_filter_ctrl,
1880 .frontend_attach = stk7700p_frontend_attach, 1897 .frontend_attach = stk7700p_frontend_attach,
1881 .tuner_attach = stk7700p_tuner_attach, 1898 .tuner_attach = stk7700p_tuner_attach,
1882 1899
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.c b/drivers/media/dvb/dvb-usb/lmedm04.c
index 9eea4188303b..46ccd01a7696 100644
--- a/drivers/media/dvb/dvb-usb/lmedm04.c
+++ b/drivers/media/dvb/dvb-usb/lmedm04.c
@@ -659,7 +659,7 @@ static int lme2510_download_firmware(struct usb_device *dev,
659} 659}
660 660
661/* Default firmware for LME2510C */ 661/* Default firmware for LME2510C */
662const char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw"; 662char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
663 663
664static void lme_coldreset(struct usb_device *dev) 664static void lme_coldreset(struct usb_device *dev)
665{ 665{
@@ -1006,7 +1006,7 @@ static struct dvb_usb_device_properties lme2510c_properties = {
1006 .caps = DVB_USB_IS_AN_I2C_ADAPTER, 1006 .caps = DVB_USB_IS_AN_I2C_ADAPTER,
1007 .usb_ctrl = DEVICE_SPECIFIC, 1007 .usb_ctrl = DEVICE_SPECIFIC,
1008 .download_firmware = lme2510_download_firmware, 1008 .download_firmware = lme2510_download_firmware,
1009 .firmware = lme_firmware, 1009 .firmware = (const char *)&lme_firmware,
1010 .size_of_priv = sizeof(struct lme2510_state), 1010 .size_of_priv = sizeof(struct lme2510_state),
1011 .num_adapters = 1, 1011 .num_adapters = 1,
1012 .adapter = { 1012 .adapter = {
@@ -1109,5 +1109,5 @@ module_exit(lme2510_module_exit);
1109 1109
1110MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>"); 1110MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>");
1111MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0"); 1111MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0");
1112MODULE_VERSION("1.74"); 1112MODULE_VERSION("1.75");
1113MODULE_LICENSE("GPL"); 1113MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb/frontends/dib7000m.c b/drivers/media/dvb/frontends/dib7000m.c
index c7f5ccf54aa5..289a79837f24 100644
--- a/drivers/media/dvb/frontends/dib7000m.c
+++ b/drivers/media/dvb/frontends/dib7000m.c
@@ -1285,6 +1285,25 @@ struct i2c_adapter * dib7000m_get_i2c_master(struct dvb_frontend *demod, enum di
1285} 1285}
1286EXPORT_SYMBOL(dib7000m_get_i2c_master); 1286EXPORT_SYMBOL(dib7000m_get_i2c_master);
1287 1287
1288int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff)
1289{
1290 struct dib7000m_state *state = fe->demodulator_priv;
1291 u16 val = dib7000m_read_word(state, 294 + state->reg_offs) & 0xffef;
1292 val |= (onoff & 0x1) << 4;
1293 dprintk("PID filter enabled %d", onoff);
1294 return dib7000m_write_word(state, 294 + state->reg_offs, val);
1295}
1296EXPORT_SYMBOL(dib7000m_pid_filter_ctrl);
1297
1298int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id, u16 pid, u8 onoff)
1299{
1300 struct dib7000m_state *state = fe->demodulator_priv;
1301 dprintk("PID filter: index %x, PID %d, OnOff %d", id, pid, onoff);
1302 return dib7000m_write_word(state, 300 + state->reg_offs + id,
1303 onoff ? (1 << 13) | pid : 0);
1304}
1305EXPORT_SYMBOL(dib7000m_pid_filter);
1306
1288#if 0 1307#if 0
1289/* used with some prototype boards */ 1308/* used with some prototype boards */
1290int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods, 1309int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods,
diff --git a/drivers/media/dvb/frontends/dib7000m.h b/drivers/media/dvb/frontends/dib7000m.h
index 113819ce9f0d..81fcf2241c64 100644
--- a/drivers/media/dvb/frontends/dib7000m.h
+++ b/drivers/media/dvb/frontends/dib7000m.h
@@ -46,6 +46,8 @@ extern struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
46extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *, 46extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *,
47 enum dibx000_i2c_interface, 47 enum dibx000_i2c_interface,
48 int); 48 int);
49extern int dib7000m_pid_filter(struct dvb_frontend *, u8 id, u16 pid, u8 onoff);
50extern int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff);
49#else 51#else
50static inline 52static inline
51struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap, 53struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
@@ -63,6 +65,19 @@ struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *demod,
63 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__); 65 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
64 return NULL; 66 return NULL;
65} 67}
68static inline int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id,
69 u16 pid, u8 onoff)
70{
71 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
72 return -ENODEV;
73}
74
75static inline int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe,
76 uint8_t onoff)
77{
78 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
79 return -ENODEV;
80}
66#endif 81#endif
67 82
68/* TODO 83/* TODO
diff --git a/drivers/media/dvb/mantis/mantis_pci.c b/drivers/media/dvb/mantis/mantis_pci.c
index 59feeb84aec7..10a432a79d00 100644
--- a/drivers/media/dvb/mantis/mantis_pci.c
+++ b/drivers/media/dvb/mantis/mantis_pci.c
@@ -22,7 +22,6 @@
22#include <linux/moduleparam.h> 22#include <linux/moduleparam.h>
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include <asm/pgtable.h>
26#include <asm/page.h> 25#include <asm/page.h>
27#include <linux/kmod.h> 26#include <linux/kmod.h>
28#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
diff --git a/drivers/media/rc/ir-raw.c b/drivers/media/rc/ir-raw.c
index 73230ff93b8a..01f258a2a57a 100644
--- a/drivers/media/rc/ir-raw.c
+++ b/drivers/media/rc/ir-raw.c
@@ -112,7 +112,7 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
112{ 112{
113 ktime_t now; 113 ktime_t now;
114 s64 delta; /* ns */ 114 s64 delta; /* ns */
115 struct ir_raw_event ev; 115 DEFINE_IR_RAW_EVENT(ev);
116 int rc = 0; 116 int rc = 0;
117 117
118 if (!dev->raw) 118 if (!dev->raw)
@@ -125,7 +125,6 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
125 * being called for the first time, note that delta can't 125 * being called for the first time, note that delta can't
126 * possibly be negative. 126 * possibly be negative.
127 */ 127 */
128 ev.duration = 0;
129 if (delta > IR_MAX_DURATION || !dev->raw->last_type) 128 if (delta > IR_MAX_DURATION || !dev->raw->last_type)
130 type |= IR_START_EVENT; 129 type |= IR_START_EVENT;
131 else 130 else
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 6df0a4980645..e4f8eac7f717 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -148,6 +148,7 @@ enum mceusb_model_type {
148 MCE_GEN2_TX_INV, 148 MCE_GEN2_TX_INV,
149 POLARIS_EVK, 149 POLARIS_EVK,
150 CX_HYBRID_TV, 150 CX_HYBRID_TV,
151 MULTIFUNCTION,
151}; 152};
152 153
153struct mceusb_model { 154struct mceusb_model {
@@ -155,9 +156,10 @@ struct mceusb_model {
155 u32 mce_gen2:1; 156 u32 mce_gen2:1;
156 u32 mce_gen3:1; 157 u32 mce_gen3:1;
157 u32 tx_mask_normal:1; 158 u32 tx_mask_normal:1;
158 u32 is_polaris:1;
159 u32 no_tx:1; 159 u32 no_tx:1;
160 160
161 int ir_intfnum;
162
161 const char *rc_map; /* Allow specify a per-board map */ 163 const char *rc_map; /* Allow specify a per-board map */
162 const char *name; /* per-board name */ 164 const char *name; /* per-board name */
163}; 165};
@@ -179,7 +181,6 @@ static const struct mceusb_model mceusb_model[] = {
179 .tx_mask_normal = 1, 181 .tx_mask_normal = 1,
180 }, 182 },
181 [POLARIS_EVK] = { 183 [POLARIS_EVK] = {
182 .is_polaris = 1,
183 /* 184 /*
184 * In fact, the EVK is shipped without 185 * In fact, the EVK is shipped without
185 * remotes, but we should have something handy, 186 * remotes, but we should have something handy,
@@ -189,10 +190,13 @@ static const struct mceusb_model mceusb_model[] = {
189 .name = "Conexant Hybrid TV (cx231xx) MCE IR", 190 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
190 }, 191 },
191 [CX_HYBRID_TV] = { 192 [CX_HYBRID_TV] = {
192 .is_polaris = 1,
193 .no_tx = 1, /* tx isn't wired up at all */ 193 .no_tx = 1, /* tx isn't wired up at all */
194 .name = "Conexant Hybrid TV (cx231xx) MCE IR", 194 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
195 }, 195 },
196 [MULTIFUNCTION] = {
197 .mce_gen2 = 1,
198 .ir_intfnum = 2,
199 },
196}; 200};
197 201
198static struct usb_device_id mceusb_dev_table[] = { 202static struct usb_device_id mceusb_dev_table[] = {
@@ -216,8 +220,9 @@ static struct usb_device_id mceusb_dev_table[] = {
216 { USB_DEVICE(VENDOR_PHILIPS, 0x206c) }, 220 { USB_DEVICE(VENDOR_PHILIPS, 0x206c) },
217 /* Philips/Spinel plus IR transceiver for ASUS */ 221 /* Philips/Spinel plus IR transceiver for ASUS */
218 { USB_DEVICE(VENDOR_PHILIPS, 0x2088) }, 222 { USB_DEVICE(VENDOR_PHILIPS, 0x2088) },
219 /* Realtek MCE IR Receiver */ 223 /* Realtek MCE IR Receiver and card reader */
220 { USB_DEVICE(VENDOR_REALTEK, 0x0161) }, 224 { USB_DEVICE(VENDOR_REALTEK, 0x0161),
225 .driver_info = MULTIFUNCTION },
221 /* SMK/Toshiba G83C0004D410 */ 226 /* SMK/Toshiba G83C0004D410 */
222 { USB_DEVICE(VENDOR_SMK, 0x031d), 227 { USB_DEVICE(VENDOR_SMK, 0x031d),
223 .driver_info = MCE_GEN2_TX_INV }, 228 .driver_info = MCE_GEN2_TX_INV },
@@ -1101,7 +1106,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1101 bool is_gen3; 1106 bool is_gen3;
1102 bool is_microsoft_gen1; 1107 bool is_microsoft_gen1;
1103 bool tx_mask_normal; 1108 bool tx_mask_normal;
1104 bool is_polaris; 1109 int ir_intfnum;
1105 1110
1106 dev_dbg(&intf->dev, "%s called\n", __func__); 1111 dev_dbg(&intf->dev, "%s called\n", __func__);
1107 1112
@@ -1110,13 +1115,11 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1110 is_gen3 = mceusb_model[model].mce_gen3; 1115 is_gen3 = mceusb_model[model].mce_gen3;
1111 is_microsoft_gen1 = mceusb_model[model].mce_gen1; 1116 is_microsoft_gen1 = mceusb_model[model].mce_gen1;
1112 tx_mask_normal = mceusb_model[model].tx_mask_normal; 1117 tx_mask_normal = mceusb_model[model].tx_mask_normal;
1113 is_polaris = mceusb_model[model].is_polaris; 1118 ir_intfnum = mceusb_model[model].ir_intfnum;
1114 1119
1115 if (is_polaris) { 1120 /* There are multi-function devices with non-IR interfaces */
1116 /* Interface 0 is IR */ 1121 if (idesc->desc.bInterfaceNumber != ir_intfnum)
1117 if (idesc->desc.bInterfaceNumber) 1122 return -ENODEV;
1118 return -ENODEV;
1119 }
1120 1123
1121 /* step through the endpoints to find first bulk in and out endpoint */ 1124 /* step through the endpoints to find first bulk in and out endpoint */
1122 for (i = 0; i < idesc->desc.bNumEndpoints; ++i) { 1125 for (i = 0; i < idesc->desc.bNumEndpoints; ++i) {
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 273d9d674792..d4d64492a057 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -385,8 +385,9 @@ static void nvt_cir_regs_init(struct nvt_dev *nvt)
385 385
386static void nvt_cir_wake_regs_init(struct nvt_dev *nvt) 386static void nvt_cir_wake_regs_init(struct nvt_dev *nvt)
387{ 387{
388 /* set number of bytes needed for wake key comparison (default 67) */ 388 /* set number of bytes needed for wake from s3 (default 65) */
389 nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_LEN, CIR_WAKE_FIFO_CMP_DEEP); 389 nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_CMP_BYTES,
390 CIR_WAKE_FIFO_CMP_DEEP);
390 391
391 /* set tolerance/variance allowed per byte during wake compare */ 392 /* set tolerance/variance allowed per byte during wake compare */
392 nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE, 393 nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE,
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h
index 1df82351cb03..048135eea702 100644
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -305,8 +305,11 @@ struct nvt_dev {
305#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20 305#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20
306#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10 306#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10
307 307
308/* CIR Wake FIFO buffer is 67 bytes long */ 308/*
309#define CIR_WAKE_FIFO_LEN 67 309 * The CIR Wake FIFO buffer is 67 bytes long, but the stock remote wakes
310 * the system comparing only 65 bytes (fails with this set to 67)
311 */
312#define CIR_WAKE_FIFO_CMP_BYTES 65
310/* CIR Wake byte comparison tolerance */ 313/* CIR Wake byte comparison tolerance */
311#define CIR_WAKE_CMP_TOLERANCE 5 314#define CIR_WAKE_CMP_TOLERANCE 5
312 315
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 512a2f4ada0e..5b4422ef4e6d 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -850,7 +850,7 @@ static ssize_t store_protocols(struct device *device,
850 count++; 850 count++;
851 } else { 851 } else {
852 for (i = 0; i < ARRAY_SIZE(proto_names); i++) { 852 for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
853 if (!strncasecmp(tmp, proto_names[i].name, strlen(proto_names[i].name))) { 853 if (!strcasecmp(tmp, proto_names[i].name)) {
854 tmp += strlen(proto_names[i].name); 854 tmp += strlen(proto_names[i].name);
855 mask = proto_names[i].type; 855 mask = proto_names[i].type;
856 break; 856 break;
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index e41e4ad5cc40..9c475c600fc9 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -1758,7 +1758,12 @@ static int vidioc_reqbufs(struct file *file, void *priv,
1758 if (rc < 0) 1758 if (rc < 0)
1759 return rc; 1759 return rc;
1760 1760
1761 return videobuf_reqbufs(&fh->vb_vidq, rb); 1761 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1762 rc = videobuf_reqbufs(&fh->vb_vidq, rb);
1763 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1764 rc = videobuf_reqbufs(&fh->vb_vbiq, rb);
1765
1766 return rc;
1762} 1767}
1763 1768
1764static int vidioc_querybuf(struct file *file, void *priv, 1769static int vidioc_querybuf(struct file *file, void *priv,
@@ -1772,7 +1777,12 @@ static int vidioc_querybuf(struct file *file, void *priv,
1772 if (rc < 0) 1777 if (rc < 0)
1773 return rc; 1778 return rc;
1774 1779
1775 return videobuf_querybuf(&fh->vb_vidq, b); 1780 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1781 rc = videobuf_querybuf(&fh->vb_vidq, b);
1782 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1783 rc = videobuf_querybuf(&fh->vb_vbiq, b);
1784
1785 return rc;
1776} 1786}
1777 1787
1778static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b) 1788static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1785,7 +1795,12 @@ static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
1785 if (rc < 0) 1795 if (rc < 0)
1786 return rc; 1796 return rc;
1787 1797
1788 return videobuf_qbuf(&fh->vb_vidq, b); 1798 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1799 rc = videobuf_qbuf(&fh->vb_vidq, b);
1800 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1801 rc = videobuf_qbuf(&fh->vb_vbiq, b);
1802
1803 return rc;
1789} 1804}
1790 1805
1791static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b) 1806static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1806,7 +1821,12 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
1806 dev->greenscreen_detected = 0; 1821 dev->greenscreen_detected = 0;
1807 } 1822 }
1808 1823
1809 return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK); 1824 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1825 rc = videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
1826 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1827 rc = videobuf_dqbuf(&fh->vb_vbiq, b, file->f_flags & O_NONBLOCK);
1828
1829 return rc;
1810} 1830}
1811 1831
1812static struct v4l2_file_operations au0828_v4l_fops = { 1832static struct v4l2_file_operations au0828_v4l_fops = {
diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 87177733cf92..68ad1963f421 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c
@@ -95,6 +95,53 @@ static const struct cx18_card cx18_card_hvr1600_esmt = {
95 .i2c = &cx18_i2c_std, 95 .i2c = &cx18_i2c_std,
96}; 96};
97 97
98static const struct cx18_card cx18_card_hvr1600_s5h1411 = {
99 .type = CX18_CARD_HVR_1600_S5H1411,
100 .name = "Hauppauge HVR-1600",
101 .comment = "Simultaneous Digital and Analog TV capture supported\n",
102 .v4l2_capabilities = CX18_CAP_ENCODER,
103 .hw_audio_ctrl = CX18_HW_418_AV,
104 .hw_muxer = CX18_HW_CS5345,
105 .hw_all = CX18_HW_TVEEPROM | CX18_HW_418_AV | CX18_HW_TUNER |
106 CX18_HW_CS5345 | CX18_HW_DVB | CX18_HW_GPIO_RESET_CTRL |
107 CX18_HW_Z8F0811_IR_HAUP,
108 .video_inputs = {
109 { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 },
110 { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 },
111 { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 },
112 { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 },
113 { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 },
114 },
115 .audio_inputs = {
116 { CX18_CARD_INPUT_AUD_TUNER,
117 CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 },
118 { CX18_CARD_INPUT_LINE_IN1,
119 CX18_AV_AUDIO_SERIAL1, CS5345_IN_2 },
120 { CX18_CARD_INPUT_LINE_IN2,
121 CX18_AV_AUDIO_SERIAL1, CS5345_IN_3 },
122 },
123 .radio_input = { CX18_CARD_INPUT_AUD_TUNER,
124 CX18_AV_AUDIO_SERIAL1, CS5345_IN_4 },
125 .ddr = {
126 /* ESMT M13S128324A-5B memory */
127 .chip_config = 0x003,
128 .refresh = 0x30c,
129 .timing1 = 0x44220e82,
130 .timing2 = 0x08,
131 .tune_lane = 0,
132 .initial_emrs = 0,
133 },
134 .gpio_init.initial_value = 0x3001,
135 .gpio_init.direction = 0x3001,
136 .gpio_i2c_slave_reset = {
137 .active_lo_mask = 0x3001,
138 .msecs_asserted = 10,
139 .msecs_recovery = 40,
140 .ir_reset_mask = 0x0001,
141 },
142 .i2c = &cx18_i2c_std,
143};
144
98static const struct cx18_card cx18_card_hvr1600_samsung = { 145static const struct cx18_card cx18_card_hvr1600_samsung = {
99 .type = CX18_CARD_HVR_1600_SAMSUNG, 146 .type = CX18_CARD_HVR_1600_SAMSUNG,
100 .name = "Hauppauge HVR-1600 (Preproduction)", 147 .name = "Hauppauge HVR-1600 (Preproduction)",
@@ -523,7 +570,8 @@ static const struct cx18_card *cx18_card_list[] = {
523 &cx18_card_toshiba_qosmio_dvbt, 570 &cx18_card_toshiba_qosmio_dvbt,
524 &cx18_card_leadtek_pvr2100, 571 &cx18_card_leadtek_pvr2100,
525 &cx18_card_leadtek_dvr3100h, 572 &cx18_card_leadtek_dvr3100h,
526 &cx18_card_gotview_dvd3 573 &cx18_card_gotview_dvd3,
574 &cx18_card_hvr1600_s5h1411
527}; 575};
528 576
529const struct cx18_card *cx18_get_card(u16 index) 577const struct cx18_card *cx18_get_card(u16 index)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 944af8adbe0c..b1c3cbd92743 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -157,6 +157,7 @@ MODULE_PARM_DESC(cardtype,
157 "\t\t\t 7 = Leadtek WinFast PVR2100\n" 157 "\t\t\t 7 = Leadtek WinFast PVR2100\n"
158 "\t\t\t 8 = Leadtek WinFast DVR3100 H\n" 158 "\t\t\t 8 = Leadtek WinFast DVR3100 H\n"
159 "\t\t\t 9 = GoTView PCI DVD3 Hybrid\n" 159 "\t\t\t 9 = GoTView PCI DVD3 Hybrid\n"
160 "\t\t\t 10 = Hauppauge HVR 1600 (S5H1411)\n"
160 "\t\t\t 0 = Autodetect (default)\n" 161 "\t\t\t 0 = Autodetect (default)\n"
161 "\t\t\t-1 = Ignore this card\n\t\t"); 162 "\t\t\t-1 = Ignore this card\n\t\t");
162MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60"); 163MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60");
@@ -337,6 +338,7 @@ void cx18_read_eeprom(struct cx18 *cx, struct tveeprom *tv)
337 switch (cx->card->type) { 338 switch (cx->card->type) {
338 case CX18_CARD_HVR_1600_ESMT: 339 case CX18_CARD_HVR_1600_ESMT:
339 case CX18_CARD_HVR_1600_SAMSUNG: 340 case CX18_CARD_HVR_1600_SAMSUNG:
341 case CX18_CARD_HVR_1600_S5H1411:
340 tveeprom_hauppauge_analog(&c, tv, eedata); 342 tveeprom_hauppauge_analog(&c, tv, eedata);
341 break; 343 break;
342 case CX18_CARD_YUAN_MPC718: 344 case CX18_CARD_YUAN_MPC718:
@@ -365,7 +367,25 @@ static void cx18_process_eeprom(struct cx18 *cx)
365 from the model number. Use the cardtype module option if you 367 from the model number. Use the cardtype module option if you
366 have one of these preproduction models. */ 368 have one of these preproduction models. */
367 switch (tv.model) { 369 switch (tv.model) {
368 case 74000 ... 74999: 370 case 74301: /* Retail models */
371 case 74321:
372 case 74351: /* OEM models */
373 case 74361:
374 /* Digital side is s5h1411/tda18271 */
375 cx->card = cx18_get_card(CX18_CARD_HVR_1600_S5H1411);
376 break;
377 case 74021: /* Retail models */
378 case 74031:
379 case 74041:
380 case 74141:
381 case 74541: /* OEM models */
382 case 74551:
383 case 74591:
384 case 74651:
385 case 74691:
386 case 74751:
387 case 74891:
388 /* Digital side is s5h1409/mxl5005s */
369 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT); 389 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
370 break; 390 break;
371 case 0x718: 391 case 0x718:
@@ -377,7 +397,8 @@ static void cx18_process_eeprom(struct cx18 *cx)
377 CX18_ERR("Invalid EEPROM\n"); 397 CX18_ERR("Invalid EEPROM\n");
378 return; 398 return;
379 default: 399 default:
380 CX18_ERR("Unknown model %d, defaulting to HVR-1600\n", tv.model); 400 CX18_ERR("Unknown model %d, defaulting to original HVR-1600 "
401 "(cardtype=1)\n", tv.model);
381 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT); 402 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
382 break; 403 break;
383 } 404 }
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 306caac6d3fc..f736679d2517 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -85,7 +85,8 @@
85#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */ 85#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */
86#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */ 86#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */
87#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */ 87#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */
88#define CX18_CARD_LAST 8 88#define CX18_CARD_HVR_1600_S5H1411 9 /* Hauppauge HVR 1600 s5h1411/tda18271*/
89#define CX18_CARD_LAST 9
89 90
90#define CX18_ENC_STREAM_TYPE_MPG 0 91#define CX18_ENC_STREAM_TYPE_MPG 0
91#define CX18_ENC_STREAM_TYPE_TS 1 92#define CX18_ENC_STREAM_TYPE_TS 1
diff --git a/drivers/media/video/cx18/cx18-dvb.c b/drivers/media/video/cx18/cx18-dvb.c
index f0381d62518d..f41922bd4020 100644
--- a/drivers/media/video/cx18/cx18-dvb.c
+++ b/drivers/media/video/cx18/cx18-dvb.c
@@ -29,6 +29,8 @@
29#include "cx18-gpio.h" 29#include "cx18-gpio.h"
30#include "s5h1409.h" 30#include "s5h1409.h"
31#include "mxl5005s.h" 31#include "mxl5005s.h"
32#include "s5h1411.h"
33#include "tda18271.h"
32#include "zl10353.h" 34#include "zl10353.h"
33 35
34#include <linux/firmware.h> 36#include <linux/firmware.h>
@@ -77,6 +79,32 @@ static struct s5h1409_config hauppauge_hvr1600_config = {
77}; 79};
78 80
79/* 81/*
82 * CX18_CARD_HVR_1600_S5H1411
83 */
84static struct s5h1411_config hcw_s5h1411_config = {
85 .output_mode = S5H1411_SERIAL_OUTPUT,
86 .gpio = S5H1411_GPIO_OFF,
87 .vsb_if = S5H1411_IF_44000,
88 .qam_if = S5H1411_IF_4000,
89 .inversion = S5H1411_INVERSION_ON,
90 .status_mode = S5H1411_DEMODLOCKING,
91 .mpeg_timing = S5H1411_MPEGTIMING_CONTINOUS_NONINVERTING_CLOCK,
92};
93
94static struct tda18271_std_map hauppauge_tda18271_std_map = {
95 .atsc_6 = { .if_freq = 5380, .agc_mode = 3, .std = 3,
96 .if_lvl = 6, .rfagc_top = 0x37 },
97 .qam_6 = { .if_freq = 4000, .agc_mode = 3, .std = 0,
98 .if_lvl = 6, .rfagc_top = 0x37 },
99};
100
101static struct tda18271_config hauppauge_tda18271_config = {
102 .std_map = &hauppauge_tda18271_std_map,
103 .gate = TDA18271_GATE_DIGITAL,
104 .output_opt = TDA18271_OUTPUT_LT_OFF,
105};
106
107/*
80 * CX18_CARD_LEADTEK_DVR3100H 108 * CX18_CARD_LEADTEK_DVR3100H
81 */ 109 */
82/* Information/confirmation of proper config values provided by Terry Wu */ 110/* Information/confirmation of proper config values provided by Terry Wu */
@@ -244,6 +272,7 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed)
244 switch (cx->card->type) { 272 switch (cx->card->type) {
245 case CX18_CARD_HVR_1600_ESMT: 273 case CX18_CARD_HVR_1600_ESMT:
246 case CX18_CARD_HVR_1600_SAMSUNG: 274 case CX18_CARD_HVR_1600_SAMSUNG:
275 case CX18_CARD_HVR_1600_S5H1411:
247 v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL); 276 v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL);
248 v |= 0x00400000; /* Serial Mode */ 277 v |= 0x00400000; /* Serial Mode */
249 v |= 0x00002000; /* Data Length - Byte */ 278 v |= 0x00002000; /* Data Length - Byte */
@@ -455,6 +484,15 @@ static int dvb_register(struct cx18_stream *stream)
455 ret = 0; 484 ret = 0;
456 } 485 }
457 break; 486 break;
487 case CX18_CARD_HVR_1600_S5H1411:
488 dvb->fe = dvb_attach(s5h1411_attach,
489 &hcw_s5h1411_config,
490 &cx->i2c_adap[0]);
491 if (dvb->fe != NULL)
492 dvb_attach(tda18271_attach, dvb->fe,
493 0x60, &cx->i2c_adap[0],
494 &hauppauge_tda18271_config);
495 break;
458 case CX18_CARD_LEADTEK_DVR3100H: 496 case CX18_CARD_LEADTEK_DVR3100H:
459 dvb->fe = dvb_attach(zl10353_attach, 497 dvb->fe = dvb_attach(zl10353_attach,
460 &leadtek_dvr3100h_demod, 498 &leadtek_dvr3100h_demod,
diff --git a/drivers/media/video/cx23885/cx23885-i2c.c b/drivers/media/video/cx23885/cx23885-i2c.c
index ed3d8f55029b..307ff543c254 100644
--- a/drivers/media/video/cx23885/cx23885-i2c.c
+++ b/drivers/media/video/cx23885/cx23885-i2c.c
@@ -122,10 +122,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
122 122
123 if (!i2c_wait_done(i2c_adap)) 123 if (!i2c_wait_done(i2c_adap))
124 goto eio; 124 goto eio;
125 if (!i2c_slave_did_ack(i2c_adap)) {
126 retval = -ENXIO;
127 goto err;
128 }
129 if (i2c_debug) { 125 if (i2c_debug) {
130 printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]); 126 printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]);
131 if (!(ctrl & I2C_NOSTOP)) 127 if (!(ctrl & I2C_NOSTOP))
@@ -158,7 +154,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
158 154
159 eio: 155 eio:
160 retval = -EIO; 156 retval = -EIO;
161 err:
162 if (i2c_debug) 157 if (i2c_debug)
163 printk(KERN_ERR " ERR: %d\n", retval); 158 printk(KERN_ERR " ERR: %d\n", retval);
164 return retval; 159 return retval;
@@ -209,10 +204,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
209 204
210 if (!i2c_wait_done(i2c_adap)) 205 if (!i2c_wait_done(i2c_adap))
211 goto eio; 206 goto eio;
212 if (cnt == 0 && !i2c_slave_did_ack(i2c_adap)) {
213 retval = -ENXIO;
214 goto err;
215 }
216 msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff; 207 msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff;
217 if (i2c_debug) { 208 if (i2c_debug) {
218 dprintk(1, " %02x", msg->buf[cnt]); 209 dprintk(1, " %02x", msg->buf[cnt]);
@@ -224,7 +215,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
224 215
225 eio: 216 eio:
226 retval = -EIO; 217 retval = -EIO;
227 err:
228 if (i2c_debug) 218 if (i2c_debug)
229 printk(KERN_ERR " ERR: %d\n", retval); 219 printk(KERN_ERR " ERR: %d\n", retval);
230 return retval; 220 return retval;
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 6fc09dd41b9d..35796e035247 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -2015,7 +2015,8 @@ static int cx25840_probe(struct i2c_client *client,
2015 kfree(state); 2015 kfree(state);
2016 return err; 2016 return err;
2017 } 2017 }
2018 v4l2_ctrl_cluster(2, &state->volume); 2018 if (!is_cx2583x(state))
2019 v4l2_ctrl_cluster(2, &state->volume);
2019 v4l2_ctrl_handler_setup(&state->hdl); 2020 v4l2_ctrl_handler_setup(&state->hdl);
2020 2021
2021 if (client->dev.platform_data) { 2022 if (client->dev.platform_data) {
diff --git a/drivers/media/video/ivtv/ivtv-irq.c b/drivers/media/video/ivtv/ivtv-irq.c
index 9b4faf009196..9c29e964d400 100644
--- a/drivers/media/video/ivtv/ivtv-irq.c
+++ b/drivers/media/video/ivtv/ivtv-irq.c
@@ -628,22 +628,66 @@ static void ivtv_irq_enc_pio_complete(struct ivtv *itv)
628static void ivtv_irq_dma_err(struct ivtv *itv) 628static void ivtv_irq_dma_err(struct ivtv *itv)
629{ 629{
630 u32 data[CX2341X_MBOX_MAX_DATA]; 630 u32 data[CX2341X_MBOX_MAX_DATA];
631 u32 status;
631 632
632 del_timer(&itv->dma_timer); 633 del_timer(&itv->dma_timer);
634
633 ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data); 635 ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data);
636 status = read_reg(IVTV_REG_DMASTATUS);
634 IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1], 637 IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1],
635 read_reg(IVTV_REG_DMASTATUS), itv->cur_dma_stream); 638 status, itv->cur_dma_stream);
636 write_reg(read_reg(IVTV_REG_DMASTATUS) & 3, IVTV_REG_DMASTATUS); 639 /*
640 * We do *not* write back to the IVTV_REG_DMASTATUS register to
641 * clear the error status, if either the encoder write (0x02) or
642 * decoder read (0x01) bus master DMA operation do not indicate
643 * completed. We can race with the DMA engine, which may have
644 * transitioned to completed status *after* we read the register.
645 * Setting a IVTV_REG_DMASTATUS flag back to "busy" status, after the
646 * DMA engine has completed, will cause the DMA engine to stop working.
647 */
648 status &= 0x3;
649 if (status == 0x3)
650 write_reg(status, IVTV_REG_DMASTATUS);
651
637 if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) && 652 if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) &&
638 itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) { 653 itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) {
639 struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream]; 654 struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream];
640 655
641 /* retry */ 656 if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) {
642 if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) 657 /* retry */
658 /*
659 * FIXME - handle cases of DMA error similar to
660 * encoder below, except conditioned on status & 0x1
661 */
643 ivtv_dma_dec_start(s); 662 ivtv_dma_dec_start(s);
644 else 663 return;
645 ivtv_dma_enc_start(s); 664 } else {
646 return; 665 if ((status & 0x2) == 0) {
666 /*
667 * CX2341x Bus Master DMA write is ongoing.
668 * Reset the timer and let it complete.
669 */
670 itv->dma_timer.expires =
671 jiffies + msecs_to_jiffies(600);
672 add_timer(&itv->dma_timer);
673 return;
674 }
675
676 if (itv->dma_retries < 3) {
677 /*
678 * CX2341x Bus Master DMA write has ended.
679 * Retry the write, starting with the first
680 * xfer segment. Just retrying the current
681 * segment is not sufficient.
682 */
683 s->sg_processed = 0;
684 itv->dma_retries++;
685 ivtv_dma_enc_start_xfer(s);
686 return;
687 }
688 /* Too many retries, give up on this one */
689 }
690
647 } 691 }
648 if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) { 692 if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) {
649 ivtv_udma_start(itv); 693 ivtv_udma_start(itv);
diff --git a/drivers/media/video/mem2mem_testdev.c b/drivers/media/video/mem2mem_testdev.c
index c179041d91f8..e7e717800ee2 100644
--- a/drivers/media/video/mem2mem_testdev.c
+++ b/drivers/media/video/mem2mem_testdev.c
@@ -1011,7 +1011,6 @@ static int m2mtest_remove(struct platform_device *pdev)
1011 v4l2_m2m_release(dev->m2m_dev); 1011 v4l2_m2m_release(dev->m2m_dev);
1012 del_timer_sync(&dev->timer); 1012 del_timer_sync(&dev->timer);
1013 video_unregister_device(dev->vfd); 1013 video_unregister_device(dev->vfd);
1014 video_device_release(dev->vfd);
1015 v4l2_device_unregister(&dev->v4l2_dev); 1014 v4l2_device_unregister(&dev->v4l2_dev);
1016 kfree(dev); 1015 kfree(dev);
1017 1016
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b63f8cafa671..561909b65ce6 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -57,7 +57,7 @@
57#include <linux/usb.h> 57#include <linux/usb.h>
58 58
59#define S2255_MAJOR_VERSION 1 59#define S2255_MAJOR_VERSION 1
60#define S2255_MINOR_VERSION 20 60#define S2255_MINOR_VERSION 21
61#define S2255_RELEASE 0 61#define S2255_RELEASE 0
62#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \ 62#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \
63 S2255_MINOR_VERSION, \ 63 S2255_MINOR_VERSION, \
@@ -312,9 +312,9 @@ struct s2255_fh {
312}; 312};
313 313
314/* current cypress EEPROM firmware version */ 314/* current cypress EEPROM firmware version */
315#define S2255_CUR_USB_FWVER ((3 << 8) | 6) 315#define S2255_CUR_USB_FWVER ((3 << 8) | 11)
316/* current DSP FW version */ 316/* current DSP FW version */
317#define S2255_CUR_DSP_FWVER 8 317#define S2255_CUR_DSP_FWVER 10102
318/* Need DSP version 5+ for video status feature */ 318/* Need DSP version 5+ for video status feature */
319#define S2255_MIN_DSP_STATUS 5 319#define S2255_MIN_DSP_STATUS 5
320#define S2255_MIN_DSP_COLORFILTER 8 320#define S2255_MIN_DSP_COLORFILTER 8
@@ -492,9 +492,11 @@ static void planar422p_to_yuv_packed(const unsigned char *in,
492 492
493static void s2255_reset_dsppower(struct s2255_dev *dev) 493static void s2255_reset_dsppower(struct s2255_dev *dev)
494{ 494{
495 s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b0b, NULL, 0, 1); 495 s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b01, NULL, 0, 1);
496 msleep(10); 496 msleep(10);
497 s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1); 497 s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1);
498 msleep(600);
499 s2255_vendor_req(dev, 0x10, 0x0000, 0x0000, NULL, 0, 1);
498 return; 500 return;
499} 501}
500 502
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6625c057be05..150b5f3cd401 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1529,7 +1529,7 @@ void mmc_rescan(struct work_struct *work)
1529 * still present 1529 * still present
1530 */ 1530 */
1531 if (host->bus_ops && host->bus_ops->detect && !host->bus_dead 1531 if (host->bus_ops && host->bus_ops->detect && !host->bus_dead
1532 && mmc_card_is_removable(host)) 1532 && !(host->caps & MMC_CAP_NONREMOVABLE))
1533 host->bus_ops->detect(host); 1533 host->bus_ops->detect(host);
1534 1534
1535 /* 1535 /*
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index a8c3e1c9b02a..4aaa88f8ab5f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1230,10 +1230,32 @@ static int inval_cache_and_wait_for_operation(
1230 sleep_time = chip_op_time / 2; 1230 sleep_time = chip_op_time / 2;
1231 1231
1232 for (;;) { 1232 for (;;) {
1233 if (chip->state != chip_state) {
1234 /* Someone's suspended the operation: sleep */
1235 DECLARE_WAITQUEUE(wait, current);
1236 set_current_state(TASK_UNINTERRUPTIBLE);
1237 add_wait_queue(&chip->wq, &wait);
1238 mutex_unlock(&chip->mutex);
1239 schedule();
1240 remove_wait_queue(&chip->wq, &wait);
1241 mutex_lock(&chip->mutex);
1242 continue;
1243 }
1244
1233 status = map_read(map, cmd_adr); 1245 status = map_read(map, cmd_adr);
1234 if (map_word_andequal(map, status, status_OK, status_OK)) 1246 if (map_word_andequal(map, status, status_OK, status_OK))
1235 break; 1247 break;
1236 1248
1249 if (chip->erase_suspended && chip_state == FL_ERASING) {
1250 /* Erase suspend occured while sleep: reset timeout */
1251 timeo = reset_timeo;
1252 chip->erase_suspended = 0;
1253 }
1254 if (chip->write_suspended && chip_state == FL_WRITING) {
1255 /* Write suspend occured while sleep: reset timeout */
1256 timeo = reset_timeo;
1257 chip->write_suspended = 0;
1258 }
1237 if (!timeo) { 1259 if (!timeo) {
1238 map_write(map, CMD(0x70), cmd_adr); 1260 map_write(map, CMD(0x70), cmd_adr);
1239 chip->state = FL_STATUS; 1261 chip->state = FL_STATUS;
@@ -1257,27 +1279,6 @@ static int inval_cache_and_wait_for_operation(
1257 timeo--; 1279 timeo--;
1258 } 1280 }
1259 mutex_lock(&chip->mutex); 1281 mutex_lock(&chip->mutex);
1260
1261 while (chip->state != chip_state) {
1262 /* Someone's suspended the operation: sleep */
1263 DECLARE_WAITQUEUE(wait, current);
1264 set_current_state(TASK_UNINTERRUPTIBLE);
1265 add_wait_queue(&chip->wq, &wait);
1266 mutex_unlock(&chip->mutex);
1267 schedule();
1268 remove_wait_queue(&chip->wq, &wait);
1269 mutex_lock(&chip->mutex);
1270 }
1271 if (chip->erase_suspended && chip_state == FL_ERASING) {
1272 /* Erase suspend occured while sleep: reset timeout */
1273 timeo = reset_timeo;
1274 chip->erase_suspended = 0;
1275 }
1276 if (chip->write_suspended && chip_state == FL_WRITING) {
1277 /* Write suspend occured while sleep: reset timeout */
1278 timeo = reset_timeo;
1279 chip->write_suspended = 0;
1280 }
1281 } 1282 }
1282 1283
1283 /* Done and happy. */ 1284 /* Done and happy. */
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index d72a5fb2d041..4e1be51cc122 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1935,14 +1935,14 @@ static void jedec_reset(u32 base, struct map_info *map, struct cfi_private *cfi)
1935} 1935}
1936 1936
1937 1937
1938static int cfi_jedec_setup(struct cfi_private *p_cfi, int index) 1938static int cfi_jedec_setup(struct map_info *map, struct cfi_private *cfi, int index)
1939{ 1939{
1940 int i,num_erase_regions; 1940 int i,num_erase_regions;
1941 uint8_t uaddr; 1941 uint8_t uaddr;
1942 1942
1943 if (! (jedec_table[index].devtypes & p_cfi->device_type)) { 1943 if (!(jedec_table[index].devtypes & cfi->device_type)) {
1944 DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n", 1944 DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n",
1945 jedec_table[index].name, 4 * (1<<p_cfi->device_type)); 1945 jedec_table[index].name, 4 * (1<<cfi->device_type));
1946 return 0; 1946 return 0;
1947 } 1947 }
1948 1948
@@ -1950,27 +1950,28 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
1950 1950
1951 num_erase_regions = jedec_table[index].nr_regions; 1951 num_erase_regions = jedec_table[index].nr_regions;
1952 1952
1953 p_cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL); 1953 cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
1954 if (!p_cfi->cfiq) { 1954 if (!cfi->cfiq) {
1955 //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name); 1955 //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name);
1956 return 0; 1956 return 0;
1957 } 1957 }
1958 1958
1959 memset(p_cfi->cfiq,0,sizeof(struct cfi_ident)); 1959 memset(cfi->cfiq, 0, sizeof(struct cfi_ident));
1960 1960
1961 p_cfi->cfiq->P_ID = jedec_table[index].cmd_set; 1961 cfi->cfiq->P_ID = jedec_table[index].cmd_set;
1962 p_cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions; 1962 cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
1963 p_cfi->cfiq->DevSize = jedec_table[index].dev_size; 1963 cfi->cfiq->DevSize = jedec_table[index].dev_size;
1964 p_cfi->cfi_mode = CFI_MODE_JEDEC; 1964 cfi->cfi_mode = CFI_MODE_JEDEC;
1965 cfi->sector_erase_cmd = CMD(0x30);
1965 1966
1966 for (i=0; i<num_erase_regions; i++){ 1967 for (i=0; i<num_erase_regions; i++){
1967 p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i]; 1968 cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
1968 } 1969 }
1969 p_cfi->cmdset_priv = NULL; 1970 cfi->cmdset_priv = NULL;
1970 1971
1971 /* This may be redundant for some cases, but it doesn't hurt */ 1972 /* This may be redundant for some cases, but it doesn't hurt */
1972 p_cfi->mfr = jedec_table[index].mfr_id; 1973 cfi->mfr = jedec_table[index].mfr_id;
1973 p_cfi->id = jedec_table[index].dev_id; 1974 cfi->id = jedec_table[index].dev_id;
1974 1975
1975 uaddr = jedec_table[index].uaddr; 1976 uaddr = jedec_table[index].uaddr;
1976 1977
@@ -1978,8 +1979,8 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
1978 our brains explode when we see the datasheets talking about address 1979 our brains explode when we see the datasheets talking about address
1979 lines numbered from A-1 to A18. The CFI table has unlock addresses 1980 lines numbered from A-1 to A18. The CFI table has unlock addresses
1980 in device-words according to the mode the device is connected in */ 1981 in device-words according to the mode the device is connected in */
1981 p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / p_cfi->device_type; 1982 cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / cfi->device_type;
1982 p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / p_cfi->device_type; 1983 cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / cfi->device_type;
1983 1984
1984 return 1; /* ok */ 1985 return 1; /* ok */
1985} 1986}
@@ -2175,7 +2176,7 @@ static int jedec_probe_chip(struct map_info *map, __u32 base,
2175 "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n", 2176 "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n",
2176 __func__, cfi->mfr, cfi->id, 2177 __func__, cfi->mfr, cfi->id,
2177 cfi->addr_unlock1, cfi->addr_unlock2 ); 2178 cfi->addr_unlock1, cfi->addr_unlock2 );
2178 if (!cfi_jedec_setup(cfi, i)) 2179 if (!cfi_jedec_setup(map, cfi, i))
2179 return 0; 2180 return 0;
2180 goto ok_out; 2181 goto ok_out;
2181 } 2182 }
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 77d64ce19e9f..92de7e3a49a5 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -151,6 +151,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
151 printk(KERN_ERR MOD_NAME 151 printk(KERN_ERR MOD_NAME
152 " %s(): Unable to register resource %pR - kernel bug?\n", 152 " %s(): Unable to register resource %pR - kernel bug?\n",
153 __func__, &window->rsrc); 153 __func__, &window->rsrc);
154 return -EBUSY;
154 } 155 }
155 156
156 157
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index cb20c67995d8..e0a2373bf0e2 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -413,7 +413,6 @@ error3:
413error2: 413error2:
414 list_del(&new->list); 414 list_del(&new->list);
415error1: 415error1:
416 kfree(new);
417 return ret; 416 return ret;
418} 417}
419 418
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 15682ec8530e..28af71c61834 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -968,6 +968,6 @@ static void __exit omap_nand_exit(void)
968module_init(omap_nand_init); 968module_init(omap_nand_init);
969module_exit(omap_nand_exit); 969module_exit(omap_nand_exit);
970 970
971MODULE_ALIAS(DRIVER_NAME); 971MODULE_ALIAS("platform:" DRIVER_NAME);
972MODULE_LICENSE("GPL"); 972MODULE_LICENSE("GPL");
973MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards"); 973MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards");
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index e78914938c5c..ac08750748a3 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -131,7 +131,7 @@ static struct platform_driver generic_onenand_driver = {
131 .remove = __devexit_p(generic_onenand_remove), 131 .remove = __devexit_p(generic_onenand_remove),
132}; 132};
133 133
134MODULE_ALIAS(DRIVER_NAME); 134MODULE_ALIAS("platform:" DRIVER_NAME);
135 135
136static int __init generic_onenand_init(void) 136static int __init generic_onenand_init(void)
137{ 137{
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index ac31f461cc1c..c849cacf4b2f 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -860,7 +860,7 @@ static void __exit omap2_onenand_exit(void)
860module_init(omap2_onenand_init); 860module_init(omap2_onenand_init);
861module_exit(omap2_onenand_exit); 861module_exit(omap2_onenand_exit);
862 862
863MODULE_ALIAS(DRIVER_NAME); 863MODULE_ALIAS("platform:" DRIVER_NAME);
864MODULE_LICENSE("GPL"); 864MODULE_LICENSE("GPL");
865MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 865MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
866MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3"); 866MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 39214e512452..7ca0eded2561 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -425,11 +425,6 @@ static irqreturn_t ariadne_interrupt(int irq, void *data)
425 int csr0, boguscnt; 425 int csr0, boguscnt;
426 int handled = 0; 426 int handled = 0;
427 427
428 if (dev == NULL) {
429 printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n");
430 return IRQ_NONE;
431 }
432
433 lance->RAP = CSR0; /* PCnet-ISA Controller Status */ 428 lance->RAP = CSR0; /* PCnet-ISA Controller Status */
434 429
435 if (!(lance->RDP & INTR)) /* Check if any interrupt has been */ 430 if (!(lance->RDP & INTR)) /* Check if any interrupt has been */
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 7897d114b290..8849699c66c4 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1211,6 +1211,7 @@ struct bnx2x {
1211 /* DCBX Negotation results */ 1211 /* DCBX Negotation results */
1212 struct dcbx_features dcbx_local_feat; 1212 struct dcbx_features dcbx_local_feat;
1213 u32 dcbx_error; 1213 u32 dcbx_error;
1214 u32 pending_max;
1214}; 1215};
1215 1216
1216/** 1217/**
@@ -1616,8 +1617,8 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
1616/* CMNG constants, as derived from system spec calculations */ 1617/* CMNG constants, as derived from system spec calculations */
1617/* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */ 1618/* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */
1618#define DEF_MIN_RATE 100 1619#define DEF_MIN_RATE 100
1619/* resolution of the rate shaping timer - 100 usec */ 1620/* resolution of the rate shaping timer - 400 usec */
1620#define RS_PERIODIC_TIMEOUT_USEC 100 1621#define RS_PERIODIC_TIMEOUT_USEC 400
1621/* number of bytes in single QM arbitration cycle - 1622/* number of bytes in single QM arbitration cycle -
1622 * coefficient for calculating the fairness timer */ 1623 * coefficient for calculating the fairness timer */
1623#define QM_ARB_BYTES 160000 1624#define QM_ARB_BYTES 160000
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 93798129061b..a71b32940533 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -996,6 +996,23 @@ void bnx2x_free_skbs(struct bnx2x *bp)
996 bnx2x_free_rx_skbs(bp); 996 bnx2x_free_rx_skbs(bp);
997} 997}
998 998
999void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1000{
1001 /* load old values */
1002 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1003
1004 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1005 /* leave all but MAX value */
1006 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1007
1008 /* set new MAX value */
1009 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1010 & FUNC_MF_CFG_MAX_BW_MASK;
1011
1012 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1013 }
1014}
1015
999static void bnx2x_free_msix_irqs(struct bnx2x *bp) 1016static void bnx2x_free_msix_irqs(struct bnx2x *bp)
1000{ 1017{
1001 int i, offset = 1; 1018 int i, offset = 1;
@@ -1464,6 +1481,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1464 1481
1465 bnx2x_set_eth_mac(bp, 1); 1482 bnx2x_set_eth_mac(bp, 1);
1466 1483
1484 if (bp->pending_max) {
1485 bnx2x_update_max_mf_config(bp, bp->pending_max);
1486 bp->pending_max = 0;
1487 }
1488
1467 if (bp->port.pmf) 1489 if (bp->port.pmf)
1468 bnx2x_initial_phy_init(bp, load_mode); 1490 bnx2x_initial_phy_init(bp, load_mode);
1469 1491
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index 326ba44b3ded..85ea7f26b51f 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -341,6 +341,15 @@ void bnx2x_dcbx_init(struct bnx2x *bp);
341 */ 341 */
342int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state); 342int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state);
343 343
344/**
345 * Updates MAX part of MF configuration in HW
346 * (if required)
347 *
348 * @param bp
349 * @param value
350 */
351void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value);
352
344/* dev_close main block */ 353/* dev_close main block */
345int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode); 354int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode);
346 355
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index ef2919987a10..7e92f9d0dcfd 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -238,7 +238,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
238 speed |= (cmd->speed_hi << 16); 238 speed |= (cmd->speed_hi << 16);
239 239
240 if (IS_MF_SI(bp)) { 240 if (IS_MF_SI(bp)) {
241 u32 param = 0, part; 241 u32 part;
242 u32 line_speed = bp->link_vars.line_speed; 242 u32 line_speed = bp->link_vars.line_speed;
243 243
244 /* use 10G if no link detected */ 244 /* use 10G if no link detected */
@@ -251,24 +251,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
251 REQ_BC_VER_4_SET_MF_BW); 251 REQ_BC_VER_4_SET_MF_BW);
252 return -EINVAL; 252 return -EINVAL;
253 } 253 }
254
254 part = (speed * 100) / line_speed; 255 part = (speed * 100) / line_speed;
256
255 if (line_speed < speed || !part) { 257 if (line_speed < speed || !part) {
256 BNX2X_DEV_INFO("Speed setting should be in a range " 258 BNX2X_DEV_INFO("Speed setting should be in a range "
257 "from 1%% to 100%% " 259 "from 1%% to 100%% "
258 "of actual line speed\n"); 260 "of actual line speed\n");
259 return -EINVAL; 261 return -EINVAL;
260 } 262 }
261 /* load old values */
262 param = bp->mf_config[BP_VN(bp)];
263 263
264 /* leave only MIN value */ 264 if (bp->state != BNX2X_STATE_OPEN)
265 param &= FUNC_MF_CFG_MIN_BW_MASK; 265 /* store value for following "load" */
266 266 bp->pending_max = part;
267 /* set new MAX value */ 267 else
268 param |= (part << FUNC_MF_CFG_MAX_BW_SHIFT) 268 bnx2x_update_max_mf_config(bp, part);
269 & FUNC_MF_CFG_MAX_BW_MASK;
270 269
271 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, param);
272 return 0; 270 return 0;
273 } 271 }
274 272
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 032ae184b605..aa032339e321 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -2092,8 +2092,9 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
2092 bnx2x_calc_vn_weight_sum(bp); 2092 bnx2x_calc_vn_weight_sum(bp);
2093 2093
2094 /* calculate and set min-max rate for each vn */ 2094 /* calculate and set min-max rate for each vn */
2095 for (vn = VN_0; vn < E1HVN_MAX; vn++) 2095 if (bp->port.pmf)
2096 bnx2x_init_vn_minmax(bp, vn); 2096 for (vn = VN_0; vn < E1HVN_MAX; vn++)
2097 bnx2x_init_vn_minmax(bp, vn);
2097 2098
2098 /* always enable rate shaping and fairness */ 2099 /* always enable rate shaping and fairness */
2099 bp->cmng.flags.cmng_enables |= 2100 bp->cmng.flags.cmng_enables |=
@@ -2162,13 +2163,6 @@ static void bnx2x_link_attn(struct bnx2x *bp)
2162 bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP); 2163 bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
2163 } 2164 }
2164 2165
2165 /* indicate link status only if link status actually changed */
2166 if (prev_link_status != bp->link_vars.link_status)
2167 bnx2x_link_report(bp);
2168
2169 if (IS_MF(bp))
2170 bnx2x_link_sync_notify(bp);
2171
2172 if (bp->link_vars.link_up && bp->link_vars.line_speed) { 2166 if (bp->link_vars.link_up && bp->link_vars.line_speed) {
2173 int cmng_fns = bnx2x_get_cmng_fns_mode(bp); 2167 int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
2174 2168
@@ -2180,6 +2174,13 @@ static void bnx2x_link_attn(struct bnx2x *bp)
2180 DP(NETIF_MSG_IFUP, 2174 DP(NETIF_MSG_IFUP,
2181 "single function mode without fairness\n"); 2175 "single function mode without fairness\n");
2182 } 2176 }
2177
2178 if (IS_MF(bp))
2179 bnx2x_link_sync_notify(bp);
2180
2181 /* indicate link status only if link status actually changed */
2182 if (prev_link_status != bp->link_vars.link_status)
2183 bnx2x_link_report(bp);
2183} 2184}
2184 2185
2185void bnx2x__link_status_update(struct bnx2x *bp) 2186void bnx2x__link_status_update(struct bnx2x *bp)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 1024ae158227..a5d5d0b5b155 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -281,23 +281,23 @@ static inline int __check_agg_selection_timer(struct port *port)
281} 281}
282 282
283/** 283/**
284 * __get_rx_machine_lock - lock the port's RX machine 284 * __get_state_machine_lock - lock the port's state machines
285 * @port: the port we're looking at 285 * @port: the port we're looking at
286 * 286 *
287 */ 287 */
288static inline void __get_rx_machine_lock(struct port *port) 288static inline void __get_state_machine_lock(struct port *port)
289{ 289{
290 spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 290 spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
291} 291}
292 292
293/** 293/**
294 * __release_rx_machine_lock - unlock the port's RX machine 294 * __release_state_machine_lock - unlock the port's state machines
295 * @port: the port we're looking at 295 * @port: the port we're looking at
296 * 296 *
297 */ 297 */
298static inline void __release_rx_machine_lock(struct port *port) 298static inline void __release_state_machine_lock(struct port *port)
299{ 299{
300 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 300 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
301} 301}
302 302
303/** 303/**
@@ -388,14 +388,14 @@ static u8 __get_duplex(struct port *port)
388} 388}
389 389
390/** 390/**
391 * __initialize_port_locks - initialize a port's RX machine spinlock 391 * __initialize_port_locks - initialize a port's STATE machine spinlock
392 * @port: the port we're looking at 392 * @port: the port we're looking at
393 * 393 *
394 */ 394 */
395static inline void __initialize_port_locks(struct port *port) 395static inline void __initialize_port_locks(struct port *port)
396{ 396{
397 // make sure it isn't called twice 397 // make sure it isn't called twice
398 spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 398 spin_lock_init(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
399} 399}
400 400
401//conversions 401//conversions
@@ -1025,9 +1025,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1025{ 1025{
1026 rx_states_t last_state; 1026 rx_states_t last_state;
1027 1027
1028 // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback)
1029 __get_rx_machine_lock(port);
1030
1031 // keep current State Machine state to compare later if it was changed 1028 // keep current State Machine state to compare later if it was changed
1032 last_state = port->sm_rx_state; 1029 last_state = port->sm_rx_state;
1033 1030
@@ -1133,7 +1130,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1133 pr_err("%s: An illegal loopback occurred on adapter (%s).\n" 1130 pr_err("%s: An illegal loopback occurred on adapter (%s).\n"
1134 "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n", 1131 "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n",
1135 port->slave->dev->master->name, port->slave->dev->name); 1132 port->slave->dev->master->name, port->slave->dev->name);
1136 __release_rx_machine_lock(port);
1137 return; 1133 return;
1138 } 1134 }
1139 __update_selected(lacpdu, port); 1135 __update_selected(lacpdu, port);
@@ -1153,7 +1149,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1153 break; 1149 break;
1154 } 1150 }
1155 } 1151 }
1156 __release_rx_machine_lock(port);
1157} 1152}
1158 1153
1159/** 1154/**
@@ -2155,6 +2150,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2155 goto re_arm; 2150 goto re_arm;
2156 } 2151 }
2157 2152
2153 /* Lock around state machines to protect data accessed
2154 * by all (e.g., port->sm_vars). ad_rx_machine may run
2155 * concurrently due to incoming LACPDU.
2156 */
2157 __get_state_machine_lock(port);
2158
2158 ad_rx_machine(NULL, port); 2159 ad_rx_machine(NULL, port);
2159 ad_periodic_machine(port); 2160 ad_periodic_machine(port);
2160 ad_port_selection_logic(port); 2161 ad_port_selection_logic(port);
@@ -2164,6 +2165,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2164 // turn off the BEGIN bit, since we already handled it 2165 // turn off the BEGIN bit, since we already handled it
2165 if (port->sm_vars & AD_PORT_BEGIN) 2166 if (port->sm_vars & AD_PORT_BEGIN)
2166 port->sm_vars &= ~AD_PORT_BEGIN; 2167 port->sm_vars &= ~AD_PORT_BEGIN;
2168
2169 __release_state_machine_lock(port);
2167 } 2170 }
2168 2171
2169re_arm: 2172re_arm:
@@ -2200,7 +2203,10 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
2200 case AD_TYPE_LACPDU: 2203 case AD_TYPE_LACPDU:
2201 pr_debug("Received LACPDU on port %d\n", 2204 pr_debug("Received LACPDU on port %d\n",
2202 port->actor_port_number); 2205 port->actor_port_number);
2206 /* Protect against concurrent state machines */
2207 __get_state_machine_lock(port);
2203 ad_rx_machine(lacpdu, port); 2208 ad_rx_machine(lacpdu, port);
2209 __release_state_machine_lock(port);
2204 break; 2210 break;
2205 2211
2206 case AD_TYPE_MARKER: 2212 case AD_TYPE_MARKER:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 2c46a154f2c6..b28baff70864 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -264,7 +264,8 @@ struct ad_bond_info {
264struct ad_slave_info { 264struct ad_slave_info {
265 struct aggregator aggregator; // 802.3ad aggregator structure 265 struct aggregator aggregator; // 802.3ad aggregator structure
266 struct port port; // 802.3ad port structure 266 struct port port; // 802.3ad port structure
267 spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt 267 spinlock_t state_machine_lock; /* mutex state machines vs.
268 incoming LACPDU */
268 u16 id; 269 u16 id;
269}; 270};
270 271
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5933621ac3ff..fc27a9926d9e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -528,8 +528,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
528 vnet_hdr_len = q->vnet_hdr_sz; 528 vnet_hdr_len = q->vnet_hdr_sz;
529 529
530 err = -EINVAL; 530 err = -EINVAL;
531 if ((len -= vnet_hdr_len) < 0) 531 if (len < vnet_hdr_len)
532 goto err; 532 goto err;
533 len -= vnet_hdr_len;
533 534
534 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 535 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0,
535 sizeof(vnet_hdr)); 536 sizeof(vnet_hdr));
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 27e6f6d43cac..e3ebd90ae651 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -49,8 +49,8 @@
49#include <asm/processor.h> 49#include <asm/processor.h>
50 50
51#define DRV_NAME "r6040" 51#define DRV_NAME "r6040"
52#define DRV_VERSION "0.26" 52#define DRV_VERSION "0.27"
53#define DRV_RELDATE "30May2010" 53#define DRV_RELDATE "23Feb2011"
54 54
55/* PHY CHIP Address */ 55/* PHY CHIP Address */
56#define PHY1_ADDR 1 /* For MAC1 */ 56#define PHY1_ADDR 1 /* For MAC1 */
@@ -69,6 +69,8 @@
69 69
70/* MAC registers */ 70/* MAC registers */
71#define MCR0 0x00 /* Control register 0 */ 71#define MCR0 0x00 /* Control register 0 */
72#define MCR0_PROMISC 0x0020 /* Promiscuous mode */
73#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */
72#define MCR1 0x04 /* Control register 1 */ 74#define MCR1 0x04 /* Control register 1 */
73#define MAC_RST 0x0001 /* Reset the MAC */ 75#define MAC_RST 0x0001 /* Reset the MAC */
74#define MBCR 0x08 /* Bus control */ 76#define MBCR 0x08 /* Bus control */
@@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev)
851{ 853{
852 struct r6040_private *lp = netdev_priv(dev); 854 struct r6040_private *lp = netdev_priv(dev);
853 void __iomem *ioaddr = lp->base; 855 void __iomem *ioaddr = lp->base;
854 u16 *adrp;
855 u16 reg;
856 unsigned long flags; 856 unsigned long flags;
857 struct netdev_hw_addr *ha; 857 struct netdev_hw_addr *ha;
858 int i; 858 int i;
859 u16 *adrp;
860 u16 hash_table[4] = { 0 };
861
862 spin_lock_irqsave(&lp->lock, flags);
859 863
860 /* MAC Address */ 864 /* Keep our MAC Address */
861 adrp = (u16 *)dev->dev_addr; 865 adrp = (u16 *)dev->dev_addr;
862 iowrite16(adrp[0], ioaddr + MID_0L); 866 iowrite16(adrp[0], ioaddr + MID_0L);
863 iowrite16(adrp[1], ioaddr + MID_0M); 867 iowrite16(adrp[1], ioaddr + MID_0M);
864 iowrite16(adrp[2], ioaddr + MID_0H); 868 iowrite16(adrp[2], ioaddr + MID_0H);
865 869
866 /* Promiscous Mode */
867 spin_lock_irqsave(&lp->lock, flags);
868
869 /* Clear AMCP & PROM bits */ 870 /* Clear AMCP & PROM bits */
870 reg = ioread16(ioaddr) & ~0x0120; 871 lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN);
871 if (dev->flags & IFF_PROMISC) {
872 reg |= 0x0020;
873 lp->mcr0 |= 0x0020;
874 }
875 /* Too many multicast addresses
876 * accept all traffic */
877 else if ((netdev_mc_count(dev) > MCAST_MAX) ||
878 (dev->flags & IFF_ALLMULTI))
879 reg |= 0x0020;
880 872
881 iowrite16(reg, ioaddr); 873 /* Promiscuous mode */
882 spin_unlock_irqrestore(&lp->lock, flags); 874 if (dev->flags & IFF_PROMISC)
875 lp->mcr0 |= MCR0_PROMISC;
883 876
884 /* Build the hash table */ 877 /* Enable multicast hash table function to
885 if (netdev_mc_count(dev) > MCAST_MAX) { 878 * receive all multicast packets. */
886 u16 hash_table[4]; 879 else if (dev->flags & IFF_ALLMULTI) {
887 u32 crc; 880 lp->mcr0 |= MCR0_HASH_EN;
888 881
889 for (i = 0; i < 4; i++) 882 for (i = 0; i < MCAST_MAX ; i++) {
890 hash_table[i] = 0; 883 iowrite16(0, ioaddr + MID_1L + 8 * i);
884 iowrite16(0, ioaddr + MID_1M + 8 * i);
885 iowrite16(0, ioaddr + MID_1H + 8 * i);
886 }
891 887
888 for (i = 0; i < 4; i++)
889 hash_table[i] = 0xffff;
890 }
891 /* Use internal multicast address registers if the number of
892 * multicast addresses is not greater than MCAST_MAX. */
893 else if (netdev_mc_count(dev) <= MCAST_MAX) {
894 i = 0;
892 netdev_for_each_mc_addr(ha, dev) { 895 netdev_for_each_mc_addr(ha, dev) {
893 char *addrs = ha->addr; 896 u16 *adrp = (u16 *) ha->addr;
897 iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
898 iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
899 iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
900 i++;
901 }
902 while (i < MCAST_MAX) {
903 iowrite16(0, ioaddr + MID_1L + 8 * i);
904 iowrite16(0, ioaddr + MID_1M + 8 * i);
905 iowrite16(0, ioaddr + MID_1H + 8 * i);
906 i++;
907 }
908 }
909 /* Otherwise, Enable multicast hash table function. */
910 else {
911 u32 crc;
894 912
895 if (!(*addrs & 1)) 913 lp->mcr0 |= MCR0_HASH_EN;
896 continue; 914
915 for (i = 0; i < MCAST_MAX ; i++) {
916 iowrite16(0, ioaddr + MID_1L + 8 * i);
917 iowrite16(0, ioaddr + MID_1M + 8 * i);
918 iowrite16(0, ioaddr + MID_1H + 8 * i);
919 }
897 920
898 crc = ether_crc_le(6, addrs); 921 /* Build multicast hash table */
922 netdev_for_each_mc_addr(ha, dev) {
923 u8 *addrs = ha->addr;
924
925 crc = ether_crc(ETH_ALEN, addrs);
899 crc >>= 26; 926 crc >>= 26;
900 hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf)); 927 hash_table[crc >> 4] |= 1 << (crc & 0xf);
901 } 928 }
902 /* Fill the MAC hash tables with their values */ 929 }
930
931 iowrite16(lp->mcr0, ioaddr + MCR0);
932
933 /* Fill the MAC hash tables with their values */
934 if (lp->mcr0 && MCR0_HASH_EN) {
903 iowrite16(hash_table[0], ioaddr + MAR0); 935 iowrite16(hash_table[0], ioaddr + MAR0);
904 iowrite16(hash_table[1], ioaddr + MAR1); 936 iowrite16(hash_table[1], ioaddr + MAR1);
905 iowrite16(hash_table[2], ioaddr + MAR2); 937 iowrite16(hash_table[2], ioaddr + MAR2);
906 iowrite16(hash_table[3], ioaddr + MAR3); 938 iowrite16(hash_table[3], ioaddr + MAR3);
907 } 939 }
908 /* Multicast Address 1~4 case */ 940
909 i = 0; 941 spin_unlock_irqrestore(&lp->lock, flags);
910 netdev_for_each_mc_addr(ha, dev) {
911 if (i >= MCAST_MAX)
912 break;
913 adrp = (u16 *) ha->addr;
914 iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
915 iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
916 iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
917 i++;
918 }
919 while (i < MCAST_MAX) {
920 iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
921 iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
922 iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
923 i++;
924 }
925} 942}
926 943
927static void netdev_get_drvinfo(struct net_device *dev, 944static void netdev_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 64bfdae5956f..d70bde95460b 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1178,6 +1178,11 @@ static int smsc911x_open(struct net_device *dev)
1178 smsc911x_reg_write(pdata, HW_CFG, 0x00050000); 1178 smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
1179 smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740); 1179 smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740);
1180 1180
1181 /* Increase the legal frame size of VLAN tagged frames to 1522 bytes */
1182 spin_lock_irq(&pdata->mac_lock);
1183 smsc911x_mac_write(pdata, VLAN1, ETH_P_8021Q);
1184 spin_unlock_irq(&pdata->mac_lock);
1185
1181 /* Make sure EEPROM has finished loading before setting GPIO_CFG */ 1186 /* Make sure EEPROM has finished loading before setting GPIO_CFG */
1182 timeout = 50; 1187 timeout = 50;
1183 while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) && 1188 while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) &&
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 3a5a6fcc0ead..492b7d807fe8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
243 243
244#ifdef CONFIG_PCI_MSI 244#ifdef CONFIG_PCI_MSI
245static int pci_frontend_enable_msix(struct pci_dev *dev, 245static int pci_frontend_enable_msix(struct pci_dev *dev,
246 int **vector, int nvec) 246 int vector[], int nvec)
247{ 247{
248 int err; 248 int err;
249 int i; 249 int i;
@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
277 if (likely(!err)) { 277 if (likely(!err)) {
278 if (likely(!op.value)) { 278 if (likely(!op.value)) {
279 /* we get the result */ 279 /* we get the result */
280 for (i = 0; i < nvec; i++) 280 for (i = 0; i < nvec; i++) {
281 *(*vector+i) = op.msix_entries[i].vector; 281 if (op.msix_entries[i].vector <= 0) {
282 return 0; 282 dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
283 i, op.msix_entries[i].vector);
284 err = -EINVAL;
285 vector[i] = -1;
286 continue;
287 }
288 vector[i] = op.msix_entries[i].vector;
289 }
283 } else { 290 } else {
284 printk(KERN_DEBUG "enable msix get value %x\n", 291 printk(KERN_DEBUG "enable msix get value %x\n",
285 op.value); 292 op.value);
286 return op.value;
287 } 293 }
288 } else { 294 } else {
289 dev_err(&dev->dev, "enable msix get err %x\n", err); 295 dev_err(&dev->dev, "enable msix get err %x\n", err);
290 return err;
291 } 296 }
297 return err;
292} 298}
293 299
294static void pci_frontend_disable_msix(struct pci_dev *dev) 300static void pci_frontend_disable_msix(struct pci_dev *dev)
@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
310 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); 316 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
311} 317}
312 318
313static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) 319static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
314{ 320{
315 int err; 321 int err;
316 struct xen_pci_op op = { 322 struct xen_pci_op op = {
@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
324 330
325 err = do_pci_op(pdev, &op); 331 err = do_pci_op(pdev, &op);
326 if (likely(!err)) { 332 if (likely(!err)) {
327 *(*vector) = op.value; 333 vector[0] = op.value;
334 if (op.value <= 0) {
335 dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
336 op.value);
337 err = -EINVAL;
338 vector[0] = -1;
339 }
328 } else { 340 } else {
329 dev_err(&dev->dev, "pci frontend enable msi failed for dev " 341 dev_err(&dev->dev, "pci frontend enable msi failed for dev "
330 "%x:%x\n", op.bus, op.devfn); 342 "%x:%x\n", op.bus, op.devfn);
@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
733 745
734 pcifront_free_roots(pdev); 746 pcifront_free_roots(pdev);
735 747
736 /*For PCIE_AER error handling job*/ 748 cancel_work_sync(&pdev->op_work);
737 flush_scheduled_work();
738 749
739 if (pdev->irq >= 0) 750 if (pdev->irq >= 0)
740 unbind_from_irqhandler(pdev->irq, pdev); 751 unbind_from_irqhandler(pdev->irq, pdev);
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 158cecbec718..4a109835e420 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -282,6 +282,9 @@ int core_tmr_lun_reset(
282 282
283 atomic_set(&task->task_active, 0); 283 atomic_set(&task->task_active, 0);
284 atomic_set(&task->task_stop, 0); 284 atomic_set(&task->task_stop, 0);
285 } else {
286 if (atomic_read(&task->task_execute_queue) != 0)
287 transport_remove_task_from_execute_queue(task, dev);
285 } 288 }
286 __transport_stop_task_timer(task, &flags); 289 __transport_stop_task_timer(task, &flags);
287 290
@@ -301,6 +304,7 @@ int core_tmr_lun_reset(
301 DEBUG_LR("LUN_RESET: got t_transport_active = 1 for" 304 DEBUG_LR("LUN_RESET: got t_transport_active = 1 for"
302 " task: %p, t_fe_count: %d dev: %p\n", task, 305 " task: %p, t_fe_count: %d dev: %p\n", task,
303 fe_count, dev); 306 fe_count, dev);
307 atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
304 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, 308 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock,
305 flags); 309 flags);
306 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); 310 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
@@ -310,6 +314,7 @@ int core_tmr_lun_reset(
310 } 314 }
311 DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p," 315 DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p,"
312 " t_fe_count: %d dev: %p\n", task, fe_count, dev); 316 " t_fe_count: %d dev: %p\n", task, fe_count, dev);
317 atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
313 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags); 318 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags);
314 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); 319 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
315 320
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 236e22d8cfae..4bbf6c147f89 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1207,7 +1207,7 @@ transport_get_task_from_execute_queue(struct se_device *dev)
1207 * 1207 *
1208 * 1208 *
1209 */ 1209 */
1210static void transport_remove_task_from_execute_queue( 1210void transport_remove_task_from_execute_queue(
1211 struct se_task *task, 1211 struct se_task *task,
1212 struct se_device *dev) 1212 struct se_device *dev)
1213{ 1213{
@@ -5549,7 +5549,8 @@ static void transport_generic_wait_for_tasks(
5549 5549
5550 atomic_set(&T_TASK(cmd)->transport_lun_stop, 0); 5550 atomic_set(&T_TASK(cmd)->transport_lun_stop, 0);
5551 } 5551 }
5552 if (!atomic_read(&T_TASK(cmd)->t_transport_active)) 5552 if (!atomic_read(&T_TASK(cmd)->t_transport_active) ||
5553 atomic_read(&T_TASK(cmd)->t_transport_aborted))
5553 goto remove; 5554 goto remove;
5554 5555
5555 atomic_set(&T_TASK(cmd)->t_transport_stop, 1); 5556 atomic_set(&T_TASK(cmd)->t_transport_stop, 1);
@@ -5956,6 +5957,9 @@ static void transport_processing_shutdown(struct se_device *dev)
5956 5957
5957 atomic_set(&task->task_active, 0); 5958 atomic_set(&task->task_active, 0);
5958 atomic_set(&task->task_stop, 0); 5959 atomic_set(&task->task_stop, 0);
5960 } else {
5961 if (atomic_read(&task->task_execute_queue) != 0)
5962 transport_remove_task_from_execute_queue(task, dev);
5959 } 5963 }
5960 __transport_stop_task_timer(task, &flags); 5964 __transport_stop_task_timer(task, &flags);
5961 5965
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index eca855a55c0d..3de4ba0260a5 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -646,7 +646,7 @@ static int __devexit cpwd_remove(struct platform_device *op)
646 struct cpwd *p = dev_get_drvdata(&op->dev); 646 struct cpwd *p = dev_get_drvdata(&op->dev);
647 int i; 647 int i;
648 648
649 for (i = 0; i < 4; i++) { 649 for (i = 0; i < WD_NUMDEVS; i++) {
650 misc_deregister(&p->devs[i].misc); 650 misc_deregister(&p->devs[i].misc);
651 651
652 if (!p->enabled) { 652 if (!p->enabled) {
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 24b966d5061a..204a5603c4ae 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -710,7 +710,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
710 return 0; 710 return 0;
711} 711}
712 712
713static void __devexit hpwdt_exit_nmi_decoding(void) 713static void hpwdt_exit_nmi_decoding(void)
714{ 714{
715 unregister_die_notifier(&die_notifier); 715 unregister_die_notifier(&die_notifier);
716 if (cru_rom_addr) 716 if (cru_rom_addr)
@@ -726,7 +726,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
726 return 0; 726 return 0;
727} 727}
728 728
729static void __devexit hpwdt_exit_nmi_decoding(void) 729static void hpwdt_exit_nmi_decoding(void)
730{ 730{
731} 731}
732#endif /* CONFIG_HPWDT_NMI_DECODING */ 732#endif /* CONFIG_HPWDT_NMI_DECODING */
diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c
index c7d67e9a7465..79906255eeb6 100644
--- a/drivers/watchdog/sbc_fitpc2_wdt.c
+++ b/drivers/watchdog/sbc_fitpc2_wdt.c
@@ -201,11 +201,14 @@ static struct miscdevice fitpc2_wdt_miscdev = {
201static int __init fitpc2_wdt_init(void) 201static int __init fitpc2_wdt_init(void)
202{ 202{
203 int err; 203 int err;
204 const char *brd_name;
204 205
205 if (!strstr(dmi_get_system_info(DMI_BOARD_NAME), "SBC-FITPC2")) 206 brd_name = dmi_get_system_info(DMI_BOARD_NAME);
207
208 if (!brd_name || !strstr(brd_name, "SBC-FITPC2"))
206 return -ENODEV; 209 return -ENODEV;
207 210
208 pr_info("%s found\n", dmi_get_system_info(DMI_BOARD_NAME)); 211 pr_info("%s found\n", brd_name);
209 212
210 if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) { 213 if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) {
211 pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT); 214 pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT);
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index 0461858e07d0..b61ab1c54293 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -508,7 +508,7 @@ static int __init sch311x_detect(int sio_config_port, unsigned short *addr)
508 sch311x_sio_outb(sio_config_port, 0x07, 0x0a); 508 sch311x_sio_outb(sio_config_port, 0x07, 0x0a);
509 509
510 /* Check if Logical Device Register is currently active */ 510 /* Check if Logical Device Register is currently active */
511 if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0) 511 if ((sch311x_sio_inb(sio_config_port, 0x30) & 0x01) == 0)
512 printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n"); 512 printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n");
513 513
514 /* Get the base address of the runtime registers */ 514 /* Get the base address of the runtime registers */
diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c
index a6c12dec91a1..df2a64dc9672 100644
--- a/drivers/watchdog/w83697ug_wdt.c
+++ b/drivers/watchdog/w83697ug_wdt.c
@@ -109,7 +109,7 @@ static int w83697ug_select_wd_register(void)
109 outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */ 109 outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */
110 outb_p(0x30, WDT_EFER); /* select CR30 */ 110 outb_p(0x30, WDT_EFER); /* select CR30 */
111 c = inb_p(WDT_EFDR); 111 c = inb_p(WDT_EFDR);
112 outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */ 112 outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
113 113
114 return 0; 114 return 0;
115} 115}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 43f9f02c7db0..718050ace08f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
232 set_phys_to_machine(pfn, frame_list[i]); 232 set_phys_to_machine(pfn, frame_list[i]);
233 233
234 /* Link back into the page tables if not highmem. */ 234 /* Link back into the page tables if not highmem. */
235 if (pfn < max_low_pfn) { 235 if (!xen_hvm_domain() && pfn < max_low_pfn) {
236 int ret; 236 int ret;
237 ret = HYPERVISOR_update_va_mapping( 237 ret = HYPERVISOR_update_va_mapping(
238 (unsigned long)__va(pfn << PAGE_SHIFT), 238 (unsigned long)__va(pfn << PAGE_SHIFT),
@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
280 280
281 scrub_page(page); 281 scrub_page(page);
282 282
283 if (!PageHighMem(page)) { 283 if (!xen_hvm_domain() && !PageHighMem(page)) {
284 ret = HYPERVISOR_update_va_mapping( 284 ret = HYPERVISOR_update_va_mapping(
285 (unsigned long)__va(pfn << PAGE_SHIFT), 285 (unsigned long)__va(pfn << PAGE_SHIFT),
286 __pte_ma(0), 0); 286 __pte_ma(0), 0);
@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
296 /* No more mappings: invalidate P2M and add to balloon. */ 296 /* No more mappings: invalidate P2M and add to balloon. */
297 for (i = 0; i < nr_pages; i++) { 297 for (i = 0; i < nr_pages; i++) {
298 pfn = mfn_to_pfn(frame_list[i]); 298 pfn = mfn_to_pfn(frame_list[i]);
299 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 299 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
300 balloon_append(pfn_to_page(pfn)); 300 balloon_append(pfn_to_page(pfn));
301 } 301 }
302 302
@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
392 392
393static int __init balloon_init(void) 393static int __init balloon_init(void)
394{ 394{
395 unsigned long pfn, extra_pfn_end; 395 unsigned long pfn, nr_pages, extra_pfn_end;
396 struct page *page; 396 struct page *page;
397 397
398 if (!xen_pv_domain()) 398 if (!xen_domain())
399 return -ENODEV; 399 return -ENODEV;
400 400
401 pr_info("xen_balloon: Initialising balloon driver.\n"); 401 pr_info("xen_balloon: Initialising balloon driver.\n");
402 402
403 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); 403 if (xen_pv_domain())
404 nr_pages = xen_start_info->nr_pages;
405 else
406 nr_pages = max_pfn;
407 balloon_stats.current_pages = min(nr_pages, max_pfn);
404 balloon_stats.target_pages = balloon_stats.current_pages; 408 balloon_stats.target_pages = balloon_stats.current_pages;
405 balloon_stats.balloon_low = 0; 409 balloon_stats.balloon_low = 0;
406 balloon_stats.balloon_high = 0; 410 balloon_stats.balloon_high = 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74681478100a..149fa875e396 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -114,7 +114,7 @@ struct cpu_evtchn_s {
114static __initdata struct cpu_evtchn_s init_evtchn_mask = { 114static __initdata struct cpu_evtchn_s init_evtchn_mask = {
115 .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, 115 .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
116}; 116};
117static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; 117static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
118 118
119static inline unsigned long *cpu_evtchn_mask(int cpu) 119static inline unsigned long *cpu_evtchn_mask(int cpu)
120{ 120{
@@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
277 277
278 BUG_ON(irq == -1); 278 BUG_ON(irq == -1);
279#ifdef CONFIG_SMP 279#ifdef CONFIG_SMP
280 cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); 280 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
281#endif 281#endif
282 282
283 clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); 283 clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
@@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void)
294 294
295 /* By default all event channels notify CPU#0. */ 295 /* By default all event channels notify CPU#0. */
296 for_each_irq_desc(i, desc) { 296 for_each_irq_desc(i, desc) {
297 cpumask_copy(desc->affinity, cpumask_of(0)); 297 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
298 } 298 }
299#endif 299#endif
300 300
@@ -376,81 +376,69 @@ static void unmask_evtchn(int port)
376 put_cpu(); 376 put_cpu();
377} 377}
378 378
379static int get_nr_hw_irqs(void) 379static int xen_allocate_irq_dynamic(void)
380{ 380{
381 int ret = 1; 381 int first = 0;
382 int irq;
382 383
383#ifdef CONFIG_X86_IO_APIC 384#ifdef CONFIG_X86_IO_APIC
384 ret = get_nr_irqs_gsi(); 385 /*
386 * For an HVM guest or domain 0 which see "real" (emulated or
387 * actual repectively) GSIs we allocate dynamic IRQs
388 * e.g. those corresponding to event channels or MSIs
389 * etc. from the range above those "real" GSIs to avoid
390 * collisions.
391 */
392 if (xen_initial_domain() || xen_hvm_domain())
393 first = get_nr_irqs_gsi();
385#endif 394#endif
386 395
387 return ret; 396retry:
388} 397 irq = irq_alloc_desc_from(first, -1);
389 398
390static int find_unbound_pirq(int type) 399 if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
391{ 400 printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
392 int rc, i; 401 first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
393 struct physdev_get_free_pirq op_get_free_pirq; 402 goto retry;
394 op_get_free_pirq.type = type; 403 }
395 404
396 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); 405 if (irq < 0)
397 if (!rc) 406 panic("No available IRQ to bind to: increase nr_irqs!\n");
398 return op_get_free_pirq.pirq;
399 407
400 for (i = 0; i < nr_irqs; i++) { 408 return irq;
401 if (pirq_to_irq[i] < 0)
402 return i;
403 }
404 return -1;
405} 409}
406 410
407static int find_unbound_irq(void) 411static int xen_allocate_irq_gsi(unsigned gsi)
408{ 412{
409 struct irq_data *data; 413 int irq;
410 int irq, res;
411 int bottom = get_nr_hw_irqs();
412 int top = nr_irqs-1;
413
414 if (bottom == nr_irqs)
415 goto no_irqs;
416 414
417 /* This loop starts from the top of IRQ space and goes down. 415 /*
418 * We need this b/c if we have a PCI device in a Xen PV guest 416 * A PV guest has no concept of a GSI (since it has no ACPI
419 * we do not have an IO-APIC (though the backend might have them) 417 * nor access to/knowledge of the physical APICs). Therefore
420 * mapped in. To not have a collision of physical IRQs with the Xen 418 * all IRQs are dynamically allocated from the entire IRQ
421 * event channels start at the top of the IRQ space for virtual IRQs. 419 * space.
422 */ 420 */
423 for (irq = top; irq > bottom; irq--) { 421 if (xen_pv_domain() && !xen_initial_domain())
424 data = irq_get_irq_data(irq); 422 return xen_allocate_irq_dynamic();
425 /* only 15->0 have init'd desc; handle irq > 16 */
426 if (!data)
427 break;
428 if (data->chip == &no_irq_chip)
429 break;
430 if (data->chip != &xen_dynamic_chip)
431 continue;
432 if (irq_info[irq].type == IRQT_UNBOUND)
433 return irq;
434 }
435
436 if (irq == bottom)
437 goto no_irqs;
438 423
439 res = irq_alloc_desc_at(irq, -1); 424 /* Legacy IRQ descriptors are already allocated by the arch. */
425 if (gsi < NR_IRQS_LEGACY)
426 return gsi;
440 427
441 if (WARN_ON(res != irq)) 428 irq = irq_alloc_desc_at(gsi, -1);
442 return -1; 429 if (irq < 0)
430 panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
443 431
444 return irq; 432 return irq;
445
446no_irqs:
447 panic("No available IRQ to bind to: increase nr_irqs!\n");
448} 433}
449 434
450static bool identity_mapped_irq(unsigned irq) 435static void xen_free_irq(unsigned irq)
451{ 436{
452 /* identity map all the hardware irqs */ 437 /* Legacy IRQ descriptors are managed by the arch. */
453 return irq < get_nr_hw_irqs(); 438 if (irq < NR_IRQS_LEGACY)
439 return;
440
441 irq_free_desc(irq);
454} 442}
455 443
456static void pirq_unmask_notify(int irq) 444static void pirq_unmask_notify(int irq)
@@ -486,7 +474,7 @@ static bool probing_irq(int irq)
486 return desc && desc->action == NULL; 474 return desc && desc->action == NULL;
487} 475}
488 476
489static unsigned int startup_pirq(unsigned int irq) 477static unsigned int __startup_pirq(unsigned int irq)
490{ 478{
491 struct evtchn_bind_pirq bind_pirq; 479 struct evtchn_bind_pirq bind_pirq;
492 struct irq_info *info = info_for_irq(irq); 480 struct irq_info *info = info_for_irq(irq);
@@ -524,9 +512,15 @@ out:
524 return 0; 512 return 0;
525} 513}
526 514
527static void shutdown_pirq(unsigned int irq) 515static unsigned int startup_pirq(struct irq_data *data)
516{
517 return __startup_pirq(data->irq);
518}
519
520static void shutdown_pirq(struct irq_data *data)
528{ 521{
529 struct evtchn_close close; 522 struct evtchn_close close;
523 unsigned int irq = data->irq;
530 struct irq_info *info = info_for_irq(irq); 524 struct irq_info *info = info_for_irq(irq);
531 int evtchn = evtchn_from_irq(irq); 525 int evtchn = evtchn_from_irq(irq);
532 526
@@ -546,20 +540,20 @@ static void shutdown_pirq(unsigned int irq)
546 info->evtchn = 0; 540 info->evtchn = 0;
547} 541}
548 542
549static void enable_pirq(unsigned int irq) 543static void enable_pirq(struct irq_data *data)
550{ 544{
551 startup_pirq(irq); 545 startup_pirq(data);
552} 546}
553 547
554static void disable_pirq(unsigned int irq) 548static void disable_pirq(struct irq_data *data)
555{ 549{
556} 550}
557 551
558static void ack_pirq(unsigned int irq) 552static void ack_pirq(struct irq_data *data)
559{ 553{
560 int evtchn = evtchn_from_irq(irq); 554 int evtchn = evtchn_from_irq(data->irq);
561 555
562 move_native_irq(irq); 556 move_native_irq(data->irq);
563 557
564 if (VALID_EVTCHN(evtchn)) { 558 if (VALID_EVTCHN(evtchn)) {
565 mask_evtchn(evtchn); 559 mask_evtchn(evtchn);
@@ -567,23 +561,6 @@ static void ack_pirq(unsigned int irq)
567 } 561 }
568} 562}
569 563
570static void end_pirq(unsigned int irq)
571{
572 int evtchn = evtchn_from_irq(irq);
573 struct irq_desc *desc = irq_to_desc(irq);
574
575 if (WARN_ON(!desc))
576 return;
577
578 if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
579 (IRQ_DISABLED|IRQ_PENDING)) {
580 shutdown_pirq(irq);
581 } else if (VALID_EVTCHN(evtchn)) {
582 unmask_evtchn(evtchn);
583 pirq_unmask_notify(irq);
584 }
585}
586
587static int find_irq_by_gsi(unsigned gsi) 564static int find_irq_by_gsi(unsigned gsi)
588{ 565{
589 int irq; 566 int irq;
@@ -638,14 +615,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
638 goto out; /* XXX need refcount? */ 615 goto out; /* XXX need refcount? */
639 } 616 }
640 617
641 /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore 618 irq = xen_allocate_irq_gsi(gsi);
642 * we are using the !xen_initial_domain() to drop in the function.*/
643 if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
644 xen_pv_domain())) {
645 irq = gsi;
646 irq_alloc_desc_at(irq, -1);
647 } else
648 irq = find_unbound_irq();
649 619
650 set_irq_chip_and_handler_name(irq, &xen_pirq_chip, 620 set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
651 handle_level_irq, name); 621 handle_level_irq, name);
@@ -658,7 +628,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
658 * this in the priv domain. */ 628 * this in the priv domain. */
659 if (xen_initial_domain() && 629 if (xen_initial_domain() &&
660 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { 630 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
661 irq_free_desc(irq); 631 xen_free_irq(irq);
662 irq = -ENOSPC; 632 irq = -ENOSPC;
663 goto out; 633 goto out;
664 } 634 }
@@ -674,87 +644,46 @@ out:
674} 644}
675 645
676#ifdef CONFIG_PCI_MSI 646#ifdef CONFIG_PCI_MSI
677#include <linux/msi.h> 647int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
678#include "../pci/msi.h"
679
680void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
681{ 648{
682 spin_lock(&irq_mapping_update_lock); 649 int rc;
683 650 struct physdev_get_free_pirq op_get_free_pirq;
684 if (alloc & XEN_ALLOC_IRQ) {
685 *irq = find_unbound_irq();
686 if (*irq == -1)
687 goto out;
688 }
689
690 if (alloc & XEN_ALLOC_PIRQ) {
691 *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
692 if (*pirq == -1)
693 goto out;
694 }
695 651
696 set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, 652 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
697 handle_level_irq, name); 653 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
698 654
699 irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); 655 WARN_ONCE(rc == -ENOSYS,
700 pirq_to_irq[*pirq] = *irq; 656 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
701 657
702out: 658 return rc ? -1 : op_get_free_pirq.pirq;
703 spin_unlock(&irq_mapping_update_lock);
704} 659}
705 660
706int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) 661int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
662 int pirq, int vector, const char *name)
707{ 663{
708 int irq = -1; 664 int irq, ret;
709 struct physdev_map_pirq map_irq;
710 int rc;
711 int pos;
712 u32 table_offset, bir;
713
714 memset(&map_irq, 0, sizeof(map_irq));
715 map_irq.domid = DOMID_SELF;
716 map_irq.type = MAP_PIRQ_TYPE_MSI;
717 map_irq.index = -1;
718 map_irq.pirq = -1;
719 map_irq.bus = dev->bus->number;
720 map_irq.devfn = dev->devfn;
721
722 if (type == PCI_CAP_ID_MSIX) {
723 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
724
725 pci_read_config_dword(dev, msix_table_offset_reg(pos),
726 &table_offset);
727 bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
728
729 map_irq.table_base = pci_resource_start(dev, bir);
730 map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
731 }
732 665
733 spin_lock(&irq_mapping_update_lock); 666 spin_lock(&irq_mapping_update_lock);
734 667
735 irq = find_unbound_irq(); 668 irq = xen_allocate_irq_dynamic();
736
737 if (irq == -1) 669 if (irq == -1)
738 goto out; 670 goto out;
739 671
740 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
741 if (rc) {
742 printk(KERN_WARNING "xen map irq failed %d\n", rc);
743
744 irq_free_desc(irq);
745
746 irq = -1;
747 goto out;
748 }
749 irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
750
751 set_irq_chip_and_handler_name(irq, &xen_pirq_chip, 672 set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
752 handle_level_irq, 673 handle_level_irq, name);
753 (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
754 674
675 irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
676 pirq_to_irq[pirq] = irq;
677 ret = set_irq_msi(irq, msidesc);
678 if (ret < 0)
679 goto error_irq;
755out: 680out:
756 spin_unlock(&irq_mapping_update_lock); 681 spin_unlock(&irq_mapping_update_lock);
757 return irq; 682 return irq;
683error_irq:
684 spin_unlock(&irq_mapping_update_lock);
685 xen_free_irq(irq);
686 return -1;
758} 687}
759#endif 688#endif
760 689
@@ -779,11 +708,12 @@ int xen_destroy_irq(int irq)
779 printk(KERN_WARNING "unmap irq failed %d\n", rc); 708 printk(KERN_WARNING "unmap irq failed %d\n", rc);
780 goto out; 709 goto out;
781 } 710 }
782 pirq_to_irq[info->u.pirq.pirq] = -1;
783 } 711 }
712 pirq_to_irq[info->u.pirq.pirq] = -1;
713
784 irq_info[irq] = mk_unbound_info(); 714 irq_info[irq] = mk_unbound_info();
785 715
786 irq_free_desc(irq); 716 xen_free_irq(irq);
787 717
788out: 718out:
789 spin_unlock(&irq_mapping_update_lock); 719 spin_unlock(&irq_mapping_update_lock);
@@ -814,7 +744,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
814 irq = evtchn_to_irq[evtchn]; 744 irq = evtchn_to_irq[evtchn];
815 745
816 if (irq == -1) { 746 if (irq == -1) {
817 irq = find_unbound_irq(); 747 irq = xen_allocate_irq_dynamic();
818 748
819 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, 749 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
820 handle_fasteoi_irq, "event"); 750 handle_fasteoi_irq, "event");
@@ -839,7 +769,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
839 irq = per_cpu(ipi_to_irq, cpu)[ipi]; 769 irq = per_cpu(ipi_to_irq, cpu)[ipi];
840 770
841 if (irq == -1) { 771 if (irq == -1) {
842 irq = find_unbound_irq(); 772 irq = xen_allocate_irq_dynamic();
843 if (irq < 0) 773 if (irq < 0)
844 goto out; 774 goto out;
845 775
@@ -875,7 +805,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
875 irq = per_cpu(virq_to_irq, cpu)[virq]; 805 irq = per_cpu(virq_to_irq, cpu)[virq];
876 806
877 if (irq == -1) { 807 if (irq == -1) {
878 irq = find_unbound_irq(); 808 irq = xen_allocate_irq_dynamic();
879 809
880 set_irq_chip_and_handler_name(irq, &xen_percpu_chip, 810 set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
881 handle_percpu_irq, "virq"); 811 handle_percpu_irq, "virq");
@@ -934,7 +864,7 @@ static void unbind_from_irq(unsigned int irq)
934 if (irq_info[irq].type != IRQT_UNBOUND) { 864 if (irq_info[irq].type != IRQT_UNBOUND) {
935 irq_info[irq] = mk_unbound_info(); 865 irq_info[irq] = mk_unbound_info();
936 866
937 irq_free_desc(irq); 867 xen_free_irq(irq);
938 } 868 }
939 869
940 spin_unlock(&irq_mapping_update_lock); 870 spin_unlock(&irq_mapping_update_lock);
@@ -990,7 +920,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
990 if (irq < 0) 920 if (irq < 0)
991 return irq; 921 return irq;
992 922
993 irqflags |= IRQF_NO_SUSPEND; 923 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
994 retval = request_irq(irq, handler, irqflags, devname, dev_id); 924 retval = request_irq(irq, handler, irqflags, devname, dev_id);
995 if (retval != 0) { 925 if (retval != 0) {
996 unbind_from_irq(irq); 926 unbind_from_irq(irq);
@@ -1234,11 +1164,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1234 return 0; 1164 return 0;
1235} 1165}
1236 1166
1237static int set_affinity_irq(unsigned irq, const struct cpumask *dest) 1167static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1168 bool force)
1238{ 1169{
1239 unsigned tcpu = cpumask_first(dest); 1170 unsigned tcpu = cpumask_first(dest);
1240 1171
1241 return rebind_irq_to_cpu(irq, tcpu); 1172 return rebind_irq_to_cpu(data->irq, tcpu);
1242} 1173}
1243 1174
1244int resend_irq_on_evtchn(unsigned int irq) 1175int resend_irq_on_evtchn(unsigned int irq)
@@ -1257,35 +1188,35 @@ int resend_irq_on_evtchn(unsigned int irq)
1257 return 1; 1188 return 1;
1258} 1189}
1259 1190
1260static void enable_dynirq(unsigned int irq) 1191static void enable_dynirq(struct irq_data *data)
1261{ 1192{
1262 int evtchn = evtchn_from_irq(irq); 1193 int evtchn = evtchn_from_irq(data->irq);
1263 1194
1264 if (VALID_EVTCHN(evtchn)) 1195 if (VALID_EVTCHN(evtchn))
1265 unmask_evtchn(evtchn); 1196 unmask_evtchn(evtchn);
1266} 1197}
1267 1198
1268static void disable_dynirq(unsigned int irq) 1199static void disable_dynirq(struct irq_data *data)
1269{ 1200{
1270 int evtchn = evtchn_from_irq(irq); 1201 int evtchn = evtchn_from_irq(data->irq);
1271 1202
1272 if (VALID_EVTCHN(evtchn)) 1203 if (VALID_EVTCHN(evtchn))
1273 mask_evtchn(evtchn); 1204 mask_evtchn(evtchn);
1274} 1205}
1275 1206
1276static void ack_dynirq(unsigned int irq) 1207static void ack_dynirq(struct irq_data *data)
1277{ 1208{
1278 int evtchn = evtchn_from_irq(irq); 1209 int evtchn = evtchn_from_irq(data->irq);
1279 1210
1280 move_masked_irq(irq); 1211 move_masked_irq(data->irq);
1281 1212
1282 if (VALID_EVTCHN(evtchn)) 1213 if (VALID_EVTCHN(evtchn))
1283 unmask_evtchn(evtchn); 1214 unmask_evtchn(evtchn);
1284} 1215}
1285 1216
1286static int retrigger_dynirq(unsigned int irq) 1217static int retrigger_dynirq(struct irq_data *data)
1287{ 1218{
1288 int evtchn = evtchn_from_irq(irq); 1219 int evtchn = evtchn_from_irq(data->irq);
1289 struct shared_info *sh = HYPERVISOR_shared_info; 1220 struct shared_info *sh = HYPERVISOR_shared_info;
1290 int ret = 0; 1221 int ret = 0;
1291 1222
@@ -1334,7 +1265,7 @@ static void restore_cpu_pirqs(void)
1334 1265
1335 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); 1266 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1336 1267
1337 startup_pirq(irq); 1268 __startup_pirq(irq);
1338 } 1269 }
1339} 1270}
1340 1271
@@ -1445,7 +1376,6 @@ void xen_poll_irq(int irq)
1445void xen_irq_resume(void) 1376void xen_irq_resume(void)
1446{ 1377{
1447 unsigned int cpu, irq, evtchn; 1378 unsigned int cpu, irq, evtchn;
1448 struct irq_desc *desc;
1449 1379
1450 init_evtchn_cpu_bindings(); 1380 init_evtchn_cpu_bindings();
1451 1381
@@ -1465,66 +1395,48 @@ void xen_irq_resume(void)
1465 restore_cpu_ipis(cpu); 1395 restore_cpu_ipis(cpu);
1466 } 1396 }
1467 1397
1468 /*
1469 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
1470 * are not handled by the IRQ core.
1471 */
1472 for_each_irq_desc(irq, desc) {
1473 if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
1474 continue;
1475 if (desc->status & IRQ_DISABLED)
1476 continue;
1477
1478 evtchn = evtchn_from_irq(irq);
1479 if (evtchn == -1)
1480 continue;
1481
1482 unmask_evtchn(evtchn);
1483 }
1484
1485 restore_cpu_pirqs(); 1398 restore_cpu_pirqs();
1486} 1399}
1487 1400
1488static struct irq_chip xen_dynamic_chip __read_mostly = { 1401static struct irq_chip xen_dynamic_chip __read_mostly = {
1489 .name = "xen-dyn", 1402 .name = "xen-dyn",
1490 1403
1491 .disable = disable_dynirq, 1404 .irq_disable = disable_dynirq,
1492 .mask = disable_dynirq, 1405 .irq_mask = disable_dynirq,
1493 .unmask = enable_dynirq, 1406 .irq_unmask = enable_dynirq,
1494 1407
1495 .eoi = ack_dynirq, 1408 .irq_eoi = ack_dynirq,
1496 .set_affinity = set_affinity_irq, 1409 .irq_set_affinity = set_affinity_irq,
1497 .retrigger = retrigger_dynirq, 1410 .irq_retrigger = retrigger_dynirq,
1498}; 1411};
1499 1412
1500static struct irq_chip xen_pirq_chip __read_mostly = { 1413static struct irq_chip xen_pirq_chip __read_mostly = {
1501 .name = "xen-pirq", 1414 .name = "xen-pirq",
1502 1415
1503 .startup = startup_pirq, 1416 .irq_startup = startup_pirq,
1504 .shutdown = shutdown_pirq, 1417 .irq_shutdown = shutdown_pirq,
1505 1418
1506 .enable = enable_pirq, 1419 .irq_enable = enable_pirq,
1507 .unmask = enable_pirq, 1420 .irq_unmask = enable_pirq,
1508 1421
1509 .disable = disable_pirq, 1422 .irq_disable = disable_pirq,
1510 .mask = disable_pirq, 1423 .irq_mask = disable_pirq,
1511 1424
1512 .ack = ack_pirq, 1425 .irq_ack = ack_pirq,
1513 .end = end_pirq,
1514 1426
1515 .set_affinity = set_affinity_irq, 1427 .irq_set_affinity = set_affinity_irq,
1516 1428
1517 .retrigger = retrigger_dynirq, 1429 .irq_retrigger = retrigger_dynirq,
1518}; 1430};
1519 1431
1520static struct irq_chip xen_percpu_chip __read_mostly = { 1432static struct irq_chip xen_percpu_chip __read_mostly = {
1521 .name = "xen-percpu", 1433 .name = "xen-percpu",
1522 1434
1523 .disable = disable_dynirq, 1435 .irq_disable = disable_dynirq,
1524 .mask = disable_dynirq, 1436 .irq_mask = disable_dynirq,
1525 .unmask = enable_dynirq, 1437 .irq_unmask = enable_dynirq,
1526 1438
1527 .ack = ack_dynirq, 1439 .irq_ack = ack_dynirq,
1528}; 1440};
1529 1441
1530int xen_set_callback_via(uint64_t via) 1442int xen_set_callback_via(uint64_t via)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 24177272bcb8..ebb292859b59 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -34,42 +34,38 @@ enum shutdown_state {
34/* Ignore multiple shutdown requests. */ 34/* Ignore multiple shutdown requests. */
35static enum shutdown_state shutting_down = SHUTDOWN_INVALID; 35static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
36 36
37#ifdef CONFIG_PM_SLEEP 37struct suspend_info {
38static int xen_hvm_suspend(void *data) 38 int cancelled;
39{ 39 unsigned long arg; /* extra hypercall argument */
40 int err; 40 void (*pre)(void);
41 struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; 41 void (*post)(int cancelled);
42 int *cancelled = data; 42};
43
44 BUG_ON(!irqs_disabled());
45
46 err = sysdev_suspend(PMSG_SUSPEND);
47 if (err) {
48 printk(KERN_ERR "xen_hvm_suspend: sysdev_suspend failed: %d\n",
49 err);
50 return err;
51 }
52
53 *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
54 43
55 xen_hvm_post_suspend(*cancelled); 44static void xen_hvm_post_suspend(int cancelled)
45{
46 xen_arch_hvm_post_suspend(cancelled);
56 gnttab_resume(); 47 gnttab_resume();
48}
57 49
58 if (!*cancelled) { 50static void xen_pre_suspend(void)
59 xen_irq_resume(); 51{
60 xen_console_resume(); 52 xen_mm_pin_all();
61 xen_timer_resume(); 53 gnttab_suspend();
62 } 54 xen_arch_pre_suspend();
63 55}
64 sysdev_resume();
65 56
66 return 0; 57static void xen_post_suspend(int cancelled)
58{
59 xen_arch_post_suspend(cancelled);
60 gnttab_resume();
61 xen_mm_unpin_all();
67} 62}
68 63
64#ifdef CONFIG_PM_SLEEP
69static int xen_suspend(void *data) 65static int xen_suspend(void *data)
70{ 66{
67 struct suspend_info *si = data;
71 int err; 68 int err;
72 int *cancelled = data;
73 69
74 BUG_ON(!irqs_disabled()); 70 BUG_ON(!irqs_disabled());
75 71
@@ -80,22 +76,20 @@ static int xen_suspend(void *data)
80 return err; 76 return err;
81 } 77 }
82 78
83 xen_mm_pin_all(); 79 if (si->pre)
84 gnttab_suspend(); 80 si->pre();
85 xen_pre_suspend();
86 81
87 /* 82 /*
88 * This hypercall returns 1 if suspend was cancelled 83 * This hypercall returns 1 if suspend was cancelled
89 * or the domain was merely checkpointed, and 0 if it 84 * or the domain was merely checkpointed, and 0 if it
90 * is resuming in a new domain. 85 * is resuming in a new domain.
91 */ 86 */
92 *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); 87 si->cancelled = HYPERVISOR_suspend(si->arg);
93 88
94 xen_post_suspend(*cancelled); 89 if (si->post)
95 gnttab_resume(); 90 si->post(si->cancelled);
96 xen_mm_unpin_all();
97 91
98 if (!*cancelled) { 92 if (!si->cancelled) {
99 xen_irq_resume(); 93 xen_irq_resume();
100 xen_console_resume(); 94 xen_console_resume();
101 xen_timer_resume(); 95 xen_timer_resume();
@@ -109,7 +103,7 @@ static int xen_suspend(void *data)
109static void do_suspend(void) 103static void do_suspend(void)
110{ 104{
111 int err; 105 int err;
112 int cancelled = 1; 106 struct suspend_info si;
113 107
114 shutting_down = SHUTDOWN_SUSPEND; 108 shutting_down = SHUTDOWN_SUSPEND;
115 109
@@ -139,20 +133,29 @@ static void do_suspend(void)
139 goto out_resume; 133 goto out_resume;
140 } 134 }
141 135
142 if (xen_hvm_domain()) 136 si.cancelled = 1;
143 err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0)); 137
144 else 138 if (xen_hvm_domain()) {
145 err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); 139 si.arg = 0UL;
140 si.pre = NULL;
141 si.post = &xen_hvm_post_suspend;
142 } else {
143 si.arg = virt_to_mfn(xen_start_info);
144 si.pre = &xen_pre_suspend;
145 si.post = &xen_post_suspend;
146 }
147
148 err = stop_machine(xen_suspend, &si, cpumask_of(0));
146 149
147 dpm_resume_noirq(PMSG_RESUME); 150 dpm_resume_noirq(PMSG_RESUME);
148 151
149 if (err) { 152 if (err) {
150 printk(KERN_ERR "failed to start xen_suspend: %d\n", err); 153 printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
151 cancelled = 1; 154 si.cancelled = 1;
152 } 155 }
153 156
154out_resume: 157out_resume:
155 if (!cancelled) { 158 if (!si.cancelled) {
156 xen_arch_resume(); 159 xen_arch_resume();
157 xs_resume(); 160 xs_resume();
158 } else 161 } else
@@ -172,12 +175,39 @@ out:
172} 175}
173#endif /* CONFIG_PM_SLEEP */ 176#endif /* CONFIG_PM_SLEEP */
174 177
178struct shutdown_handler {
179 const char *command;
180 void (*cb)(void);
181};
182
183static void do_poweroff(void)
184{
185 shutting_down = SHUTDOWN_POWEROFF;
186 orderly_poweroff(false);
187}
188
189static void do_reboot(void)
190{
191 shutting_down = SHUTDOWN_POWEROFF; /* ? */
192 ctrl_alt_del();
193}
194
175static void shutdown_handler(struct xenbus_watch *watch, 195static void shutdown_handler(struct xenbus_watch *watch,
176 const char **vec, unsigned int len) 196 const char **vec, unsigned int len)
177{ 197{
178 char *str; 198 char *str;
179 struct xenbus_transaction xbt; 199 struct xenbus_transaction xbt;
180 int err; 200 int err;
201 static struct shutdown_handler handlers[] = {
202 { "poweroff", do_poweroff },
203 { "halt", do_poweroff },
204 { "reboot", do_reboot },
205#ifdef CONFIG_PM_SLEEP
206 { "suspend", do_suspend },
207#endif
208 {NULL, NULL},
209 };
210 static struct shutdown_handler *handler;
181 211
182 if (shutting_down != SHUTDOWN_INVALID) 212 if (shutting_down != SHUTDOWN_INVALID)
183 return; 213 return;
@@ -194,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
194 return; 224 return;
195 } 225 }
196 226
197 xenbus_write(xbt, "control", "shutdown", ""); 227 for (handler = &handlers[0]; handler->command; handler++) {
228 if (strcmp(str, handler->command) == 0)
229 break;
230 }
231
232 /* Only acknowledge commands which we are prepared to handle. */
233 if (handler->cb)
234 xenbus_write(xbt, "control", "shutdown", "");
198 235
199 err = xenbus_transaction_end(xbt, 0); 236 err = xenbus_transaction_end(xbt, 0);
200 if (err == -EAGAIN) { 237 if (err == -EAGAIN) {
@@ -202,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
202 goto again; 239 goto again;
203 } 240 }
204 241
205 if (strcmp(str, "poweroff") == 0 || 242 if (handler->cb) {
206 strcmp(str, "halt") == 0) { 243 handler->cb();
207 shutting_down = SHUTDOWN_POWEROFF;
208 orderly_poweroff(false);
209 } else if (strcmp(str, "reboot") == 0) {
210 shutting_down = SHUTDOWN_POWEROFF; /* ? */
211 ctrl_alt_del();
212#ifdef CONFIG_PM_SLEEP
213 } else if (strcmp(str, "suspend") == 0) {
214 do_suspend();
215#endif
216 } else { 244 } else {
217 printk(KERN_INFO "Ignoring shutdown request: %s\n", str); 245 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
218 shutting_down = SHUTDOWN_INVALID; 246 shutting_down = SHUTDOWN_INVALID;
@@ -291,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
291 return NOTIFY_DONE; 319 return NOTIFY_DONE;
292} 320}
293 321
294static int __init __setup_shutdown_event(void)
295{
296 /* Delay initialization in the PV on HVM case */
297 if (xen_hvm_domain())
298 return 0;
299
300 if (!xen_pv_domain())
301 return -ENODEV;
302
303 return xen_setup_shutdown_event();
304}
305
306int xen_setup_shutdown_event(void) 322int xen_setup_shutdown_event(void)
307{ 323{
308 static struct notifier_block xenstore_notifier = { 324 static struct notifier_block xenstore_notifier = {
309 .notifier_call = shutdown_event 325 .notifier_call = shutdown_event
310 }; 326 };
327
328 if (!xen_domain())
329 return -ENODEV;
311 register_xenstore_notifier(&xenstore_notifier); 330 register_xenstore_notifier(&xenstore_notifier);
312 331
313 return 0; 332 return 0;
314} 333}
315EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); 334EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
316 335
317subsys_initcall(__setup_shutdown_event); 336subsys_initcall(xen_setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index afbe041f42c5..319dd0a94d51 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
156 if (ret) 156 if (ret)
157 goto out; 157 goto out;
158 xenbus_probe(NULL); 158 xenbus_probe(NULL);
159 ret = xen_setup_shutdown_event();
160 if (ret)
161 goto out;
162 return 0; 159 return 0;
163 160
164out: 161out:
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..7cb53aafac1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..ba01202844c5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f820fa23df4..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 588ff9849873..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd3f172e94e6..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3046 } 3046 }
3047 3047
3048 while (!end) { 3048 while (!end) {
3049 off = extent_map_end(em); 3049 u64 offset_in_extent;
3050 if (off >= max) 3050
3051 end = 1; 3051 /* break if the extent we found is outside the range */
3052 if (em->start >= max || extent_map_end(em) < off)
3053 break;
3054
3055 /*
3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
3052 3062
3053 em_start = em->start; 3063 /*
3054 em_len = em->len; 3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3055 em_end = extent_map_end(em); 3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3056 emflags = em->flags; 3070 emflags = em->flags;
3057 disko = 0; 3071 disko = 0;
3058 flags = 0; 3072 flags = 0;
3059 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
3060 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
3061 end = 1; 3082 end = 1;
3062 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3067 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
3068 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
3069 } else { 3090 } else {
3070 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
3071 } 3092 }
3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3073 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -763,6 +776,27 @@ out:
763} 776}
764 777
765/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
766 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
767 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
768 * modified. 802 * modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
777 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
778 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 813 int err = 0;
814 int faili = 0;
780 u64 start_pos; 815 u64 start_pos;
781 u64 last_pos; 816 u64 last_pos;
782 817
@@ -794,15 +829,24 @@ again:
794 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
795 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
796 if (!pages[i]) { 831 if (!pages[i]) {
797 int c; 832 faili = i - 1;
798 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
799 unlock_page(pages[c]); 834 goto fail;
800 page_cache_release(pages[c]); 835 }
801 } 836
802 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
803 } 846 }
804 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
805 } 848 }
849 err = 0;
806 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
807 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
808 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
842 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
843 } 887 }
844 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
845} 897}
846 898
847static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
851 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
852 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
853 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
854 struct page *pinned[2];
855 struct page **pages = NULL; 906 struct page **pages = NULL;
856 struct iov_iter i; 907 struct iov_iter i;
857 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
872 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
873 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
874 925
875 pinned[0] = NULL;
876 pinned[1] = NULL;
877
878 start_pos = pos; 926 start_pos = pos;
879 927
880 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
962 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
963 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
964 1012
965 /*
966 * there are lots of better ways to do this, but this code
967 * makes sure the first and last page in the file range are
968 * up to date and ready for cow
969 */
970 if ((pos & (PAGE_CACHE_SIZE - 1))) {
971 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
972 if (!PageUptodate(pinned[0])) {
973 ret = btrfs_readpage(NULL, pinned[0]);
974 BUG_ON(ret);
975 wait_on_page_locked(pinned[0]);
976 } else {
977 unlock_page(pinned[0]);
978 }
979 }
980 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
981 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
982 if (!PageUptodate(pinned[1])) {
983 ret = btrfs_readpage(NULL, pinned[1]);
984 BUG_ON(ret);
985 wait_on_page_locked(pinned[1]);
986 } else {
987 unlock_page(pinned[1]);
988 }
989 }
990
991 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
992 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
993 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1024 1046
1025 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1026 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1028 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1029 1063
1030 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1031 if (copied > 0) 1065 if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
1069 err = ret; 1103 err = ret;
1070 1104
1071 kfree(pages); 1105 kfree(pages);
1072 if (pinned[0])
1073 page_cache_release(pinned[0]);
1074 if (pinned[1])
1075 page_cache_release(pinned[1]);
1076 *ppos = pos; 1106 *ppos = pos;
1077 1107
1078 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0efdb65953c5..4a0107e18747 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4806,9 +4806,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4806 int err;
4807 int drop_inode = 0; 4807 int drop_inode = 0;
4808 4808
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4809 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4810 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4811 return -EPERM;
@@ -4821,10 +4818,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4821 goto fail; 4818 goto fail;
4822 4819
4823 /* 4820 /*
4824 * 1 item for inode ref 4821 * 2 items for inode and inode ref
4825 * 2 items for dir items 4822 * 2 items for dir items
4823 * 1 item for parent inode
4826 */ 4824 */
4827 trans = btrfs_start_transaction(root, 3); 4825 trans = btrfs_start_transaction(root, 5);
4828 if (IS_ERR(trans)) { 4826 if (IS_ERR(trans)) {
4829 err = PTR_ERR(trans); 4827 err = PTR_ERR(trans);
4830 goto fail; 4828 goto fail;
@@ -6056,6 +6054,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6056 if (!skip_sum) { 6054 if (!skip_sum) {
6057 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6055 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6058 if (!dip->csums) { 6056 if (!dip->csums) {
6057 kfree(dip);
6059 ret = -ENOMEM; 6058 ret = -ENOMEM;
6060 goto free_ordered; 6059 goto free_ordered;
6061 } 6060 }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 099a58615b90..ebafa65a29b6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -993,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
993{ 993{
994 struct inode *dir; 994 struct inode *dir;
995 995
996 if (nd->flags & LOOKUP_RCU) 996 if (nd && nd->flags & LOOKUP_RCU)
997 return -ECHILD; 997 return -ECHILD;
998 998
999 dir = dentry->d_parent->d_inode; 999 dir = dentry->d_parent->d_inode;
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1228 file = fget_light(fd, &fput_needed); 1200 file = fget_light(fd, &fput_needed);
1229 if (!file) 1201 if (!file)
1230 return -EBADF; 1202 return -EBADF;
1231 ret = compat_readv(file, vec, vlen, &pos); 1203 ret = -ESPIPE;
1204 if (file->f_mode & FMODE_PREAD)
1205 ret = compat_readv(file, vec, vlen, &pos);
1232 fput_light(file, fput_needed); 1206 fput_light(file, fput_needed);
1233 return ret; 1207 return ret;
1234} 1208}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1285 file = fget_light(fd, &fput_needed); 1259 file = fget_light(fd, &fput_needed);
1286 if (!file) 1260 if (!file)
1287 return -EBADF; 1261 return -EBADF;
1288 ret = compat_writev(file, vec, vlen, &pos); 1262 ret = -ESPIPE;
1263 if (file->f_mode & FMODE_PWRITE)
1264 ret = compat_writev(file, vec, vlen, &pos);
1289 fput_light(file, fput_needed); 1265 fput_light(file, fput_needed);
1290 return ret; 1266 return ret;
1291} 1267}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2308} 2284}
2309 2285
2310#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..a39fe47c466f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
296 __releases(parent->d_lock) 296 __releases(parent->d_lock)
297 __releases(dentry->d_inode->i_lock) 297 __releases(dentry->d_inode->i_lock)
298{ 298{
299 dentry->d_parent = NULL;
300 list_del(&dentry->d_u.d_child); 299 list_del(&dentry->d_u.d_child);
300 /*
301 * Inform try_to_ascend() that we are no longer attached to the
302 * dentry tree
303 */
304 dentry->d_flags |= DCACHE_DISCONNECTED;
301 if (parent) 305 if (parent)
302 spin_unlock(&parent->d_lock); 306 spin_unlock(&parent->d_lock);
303 dentry_iput(dentry); 307 dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
1012} 1016}
1013 1017
1014/* 1018/*
1019 * This tries to ascend one level of parenthood, but
1020 * we can race with renaming, so we need to re-check
1021 * the parenthood after dropping the lock and check
1022 * that the sequence number still matches.
1023 */
1024static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1025{
1026 struct dentry *new = old->d_parent;
1027
1028 rcu_read_lock();
1029 spin_unlock(&old->d_lock);
1030 spin_lock(&new->d_lock);
1031
1032 /*
1033 * might go back up the wrong parent if we have had a rename
1034 * or deletion
1035 */
1036 if (new != old->d_parent ||
1037 (old->d_flags & DCACHE_DISCONNECTED) ||
1038 (!locked && read_seqretry(&rename_lock, seq))) {
1039 spin_unlock(&new->d_lock);
1040 new = NULL;
1041 }
1042 rcu_read_unlock();
1043 return new;
1044}
1045
1046
1047/*
1015 * Search for at least 1 mount point in the dentry's subdirs. 1048 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs 1049 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching. 1050 * list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
1066 * All done at this level ... ascend and resume the search. 1099 * All done at this level ... ascend and resume the search.
1067 */ 1100 */
1068 if (this_parent != parent) { 1101 if (this_parent != parent) {
1069 struct dentry *tmp; 1102 struct dentry *child = this_parent;
1070 struct dentry *child; 1103 this_parent = try_to_ascend(this_parent, locked, seq);
1071 1104 if (!this_parent)
1072 tmp = this_parent->d_parent;
1073 rcu_read_lock();
1074 spin_unlock(&this_parent->d_lock);
1075 child = this_parent;
1076 this_parent = tmp;
1077 spin_lock(&this_parent->d_lock);
1078 /* might go back up the wrong parent if we have had a rename
1079 * or deletion */
1080 if (this_parent != child->d_parent ||
1081 (!locked && read_seqretry(&rename_lock, seq))) {
1082 spin_unlock(&this_parent->d_lock);
1083 rcu_read_unlock();
1084 goto rename_retry; 1105 goto rename_retry;
1085 }
1086 rcu_read_unlock();
1087 next = child->d_u.d_child.next; 1106 next = child->d_u.d_child.next;
1088 goto resume; 1107 goto resume;
1089 } 1108 }
@@ -1181,24 +1200,10 @@ resume:
1181 * All done at this level ... ascend and resume the search. 1200 * All done at this level ... ascend and resume the search.
1182 */ 1201 */
1183 if (this_parent != parent) { 1202 if (this_parent != parent) {
1184 struct dentry *tmp; 1203 struct dentry *child = this_parent;
1185 struct dentry *child; 1204 this_parent = try_to_ascend(this_parent, locked, seq);
1186 1205 if (!this_parent)
1187 tmp = this_parent->d_parent;
1188 rcu_read_lock();
1189 spin_unlock(&this_parent->d_lock);
1190 child = this_parent;
1191 this_parent = tmp;
1192 spin_lock(&this_parent->d_lock);
1193 /* might go back up the wrong parent if we have had a rename
1194 * or deletion */
1195 if (this_parent != child->d_parent ||
1196 (!locked && read_seqretry(&rename_lock, seq))) {
1197 spin_unlock(&this_parent->d_lock);
1198 rcu_read_unlock();
1199 goto rename_retry; 1206 goto rename_retry;
1200 }
1201 rcu_read_unlock();
1202 next = child->d_u.d_child.next; 1207 next = child->d_u.d_child.next;
1203 goto resume; 1208 goto resume;
1204 } 1209 }
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1523} 1528}
1524EXPORT_SYMBOL(d_alloc_root); 1529EXPORT_SYMBOL(d_alloc_root);
1525 1530
1531static struct dentry * __d_find_any_alias(struct inode *inode)
1532{
1533 struct dentry *alias;
1534
1535 if (list_empty(&inode->i_dentry))
1536 return NULL;
1537 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1538 __dget(alias);
1539 return alias;
1540}
1541
1542static struct dentry * d_find_any_alias(struct inode *inode)
1543{
1544 struct dentry *de;
1545
1546 spin_lock(&inode->i_lock);
1547 de = __d_find_any_alias(inode);
1548 spin_unlock(&inode->i_lock);
1549 return de;
1550}
1551
1552
1526/** 1553/**
1527 * d_obtain_alias - find or allocate a dentry for a given inode 1554 * d_obtain_alias - find or allocate a dentry for a given inode
1528 * @inode: inode to allocate the dentry for 1555 * @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1552 if (IS_ERR(inode)) 1579 if (IS_ERR(inode))
1553 return ERR_CAST(inode); 1580 return ERR_CAST(inode);
1554 1581
1555 res = d_find_alias(inode); 1582 res = d_find_any_alias(inode);
1556 if (res) 1583 if (res)
1557 goto out_iput; 1584 goto out_iput;
1558 1585
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1565 1592
1566 1593
1567 spin_lock(&inode->i_lock); 1594 spin_lock(&inode->i_lock);
1568 res = __d_find_alias(inode, 0); 1595 res = __d_find_any_alias(inode);
1569 if (res) { 1596 if (res) {
1570 spin_unlock(&inode->i_lock); 1597 spin_unlock(&inode->i_lock);
1571 dput(tmp); 1598 dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
2920 spin_unlock(&dentry->d_lock); 2947 spin_unlock(&dentry->d_lock);
2921 } 2948 }
2922 if (this_parent != root) { 2949 if (this_parent != root) {
2923 struct dentry *tmp; 2950 struct dentry *child = this_parent;
2924 struct dentry *child;
2925
2926 tmp = this_parent->d_parent;
2927 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2951 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2928 this_parent->d_flags |= DCACHE_GENOCIDE; 2952 this_parent->d_flags |= DCACHE_GENOCIDE;
2929 this_parent->d_count--; 2953 this_parent->d_count--;
2930 } 2954 }
2931 rcu_read_lock(); 2955 this_parent = try_to_ascend(this_parent, locked, seq);
2932 spin_unlock(&this_parent->d_lock); 2956 if (!this_parent)
2933 child = this_parent;
2934 this_parent = tmp;
2935 spin_lock(&this_parent->d_lock);
2936 /* might go back up the wrong parent if we have had a rename
2937 * or deletion */
2938 if (this_parent != child->d_parent ||
2939 (!locked && read_seqretry(&rename_lock, seq))) {
2940 spin_unlock(&this_parent->d_lock);
2941 rcu_read_unlock();
2942 goto rename_retry; 2957 goto rename_retry;
2943 }
2944 rcu_read_unlock();
2945 next = child->d_u.d_child.next; 2958 next = child->d_u.d_child.next;
2946 goto resume; 2959 goto resume;
2947 } 2960 }
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..561f69256266 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..9cc19a1dea8e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1936 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1937 sb->dq_op = &ext3_quota_operations;
1938#endif 1938#endif
1939 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1941 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1942 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..5977b356a435 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU) 46 if (nd && nd->flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
56{ 56{
57 if (nd->flags & LOOKUP_RCU) 57 if (nd && nd->flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..74a9544ac770 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 276 rcu_read_lock();
277 file = fcheck_files(files, fd); 277 file = fcheck_files(files, fd);
278 if (file) { 278 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 279 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 280 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 281 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 282 file = NULL;
283 }
284 } 283 }
285 rcu_read_unlock(); 284 rcu_read_unlock();
286 285
@@ -289,6 +288,25 @@ struct file *fget(unsigned int fd)
289 288
290EXPORT_SYMBOL(fget); 289EXPORT_SYMBOL(fget);
291 290
291struct file *fget_raw(unsigned int fd)
292{
293 struct file *file;
294 struct files_struct *files = current->files;
295
296 rcu_read_lock();
297 file = fcheck_files(files, fd);
298 if (file) {
299 /* File object ref couldn't be taken */
300 if (!atomic_long_inc_not_zero(&file->f_count))
301 file = NULL;
302 }
303 rcu_read_unlock();
304
305 return file;
306}
307
308EXPORT_SYMBOL(fget_raw);
309
292/* 310/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 311 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 312 *
@@ -313,6 +331,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 331 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 332 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 333 file = fcheck_files(files, fd);
334 if (file && (file->f_mode & FMODE_PATH))
335 file = NULL;
336 } else {
337 rcu_read_lock();
338 file = fcheck_files(files, fd);
339 if (file) {
340 if (!(file->f_mode & FMODE_PATH) &&
341 atomic_long_inc_not_zero(&file->f_count))
342 *fput_needed = 1;
343 else
344 /* Didn't get the reference, someone's freed */
345 file = NULL;
346 }
347 rcu_read_unlock();
348 }
349
350 return file;
351}
352
353struct file *fget_raw_light(unsigned int fd, int *fput_needed)
354{
355 struct file *file;
356 struct files_struct *files = current->files;
357
358 *fput_needed = 0;
359 if (atomic_read(&files->count) == 1) {
360 file = fcheck_files(files, fd);
316 } else { 361 } else {
317 rcu_read_lock(); 362 rcu_read_lock();
318 file = fcheck_files(files, fd); 363 file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83543b5ff941..8bd0ef9286c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU) 161 if (nd && nd->flags & LOOKUP_RCU)
162 return -ECHILD; 162 return -ECHILD;
163 163
164 inode = entry->d_inode; 164 inode = entry->d_inode;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd->flags & LOOKUP_RCU) 47 if (nd && nd->flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/internal.h b/fs/internal.h
index 9b976b57d7fe..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,6 +106,19 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..3f04a1804931 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
@@ -1600,7 +1597,7 @@ out:
1600 1597
1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1602{ 1599{
1603 if (nd->flags & LOOKUP_RCU) 1600 if (nd && nd->flags & LOOKUP_RCU)
1604 return -ECHILD; 1601 return -ECHILD;
1605 /* 1602 /*
1606 * This is not negative dentry. Always valid. 1603 * This is not negative dentry. Always valid.
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6b..0a601cae23de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,20 +737,44 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (unlikely(current->total_link_count >= 40)) {
757 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
758 path_put_conditional(link, nd);
759 path_put(&nd->path);
760 return -ELOOP;
761 }
762 cond_resched();
763 current->total_link_count++;
764
768 touch_atime(link->mnt, dentry); 765 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 766 nd_set_link(nd, NULL);
770 767
771 if (link->mnt == nd->path.mnt) 768 if (link->mnt == nd->path.mnt)
772 mntget(link->mnt); 769 mntget(link->mnt);
773 770
771 error = security_inode_follow_link(link->dentry, nd);
772 if (error) {
773 *p = ERR_PTR(error); /* no ->put_link(), please */
774 path_put(&nd->path);
775 return error;
776 }
777
774 nd->last_type = LAST_BIND; 778 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 779 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
776 error = PTR_ERR(*p); 780 error = PTR_ERR(*p);
@@ -780,56 +784,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 784 if (s)
781 error = __vfs_follow_link(nd, s); 785 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 786 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 787 nd->flags |= LOOKUP_JUMPED;
784 if (error) 788 nd->inode = nd->path.dentry->d_inode;
789 if (nd->inode->i_op->follow_link) {
790 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 791 path_put(&nd->path);
792 error = -ELOOP;
793 }
786 } 794 }
787 } 795 }
788 return error; 796 return error;
789} 797}
790 798
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 799static int follow_up_rcu(struct path *path)
834{ 800{
835 struct vfsmount *parent; 801 struct vfsmount *parent;
@@ -1068,7 +1034,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1034
1069 seq = read_seqcount_begin(&parent->d_seq); 1035 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1036 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1037 goto failed;
1072 inode = parent->d_inode; 1038 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1039 nd->path.dentry = parent;
1074 nd->seq = seq; 1040 nd->seq = seq;
@@ -1081,8 +1047,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1047 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1048 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1049 nd->inode = inode;
1084
1085 return 0; 1050 return 0;
1051
1052failed:
1053 nd->flags &= ~LOOKUP_RCU;
1054 if (!(nd->flags & LOOKUP_ROOT))
1055 nd->root.mnt = NULL;
1056 rcu_read_unlock();
1057 br_read_unlock(vfsmount_lock);
1058 return -ECHILD;
1086} 1059}
1087 1060
1088/* 1061/*
@@ -1216,68 +1189,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1189{
1217 struct vfsmount *mnt = nd->path.mnt; 1190 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1191 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1192 int need_reval = 1;
1193 int status = 1;
1220 int err; 1194 int err;
1221 1195
1222 /* 1196 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1197 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1198 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1199 * do the non-racy lookup, below.
1236 */ 1200 */
1237 if (nd->flags & LOOKUP_RCU) { 1201 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1202 unsigned seq;
1239
1240 *inode = nd->inode; 1203 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1204 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1205 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1206 goto unlazy;
1244 return -ECHILD; 1207
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1208 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1209 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1210 return -ECHILD;
1250
1251 nd->seq = seq; 1211 nd->seq = seq;
1212
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1213 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1214 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1215 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1216 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1217 need_reval = 0;
1257 goto fail; 1218 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1219 }
1259 goto done;
1260 } 1220 }
1261 path->mnt = mnt; 1221 path->mnt = mnt;
1262 path->dentry = dentry; 1222 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1223 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1224 return 0;
1265 if (nameidata_drop_rcu(nd)) 1225unlazy:
1266 return -ECHILD; 1226 if (dentry) {
1267 /* fallthru */ 1227 if (nameidata_dentry_drop_rcu(nd, dentry))
1228 return -ECHILD;
1229 } else {
1230 if (nameidata_drop_rcu(nd))
1231 return -ECHILD;
1232 }
1233 } else {
1234 dentry = __d_lookup(parent, name);
1268 } 1235 }
1269 dentry = __d_lookup(parent, name); 1236
1270 if (!dentry) 1237retry:
1271 goto need_lookup; 1238 if (unlikely(!dentry)) {
1272found: 1239 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1240 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1241
1275 if (!dentry) 1242 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1243 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1244 if (likely(!dentry)) {
1278 goto fail; 1245 dentry = d_alloc_and_lookup(parent, name, nd);
1246 if (IS_ERR(dentry)) {
1247 mutex_unlock(&dir->i_mutex);
1248 return PTR_ERR(dentry);
1249 }
1250 /* known good */
1251 need_reval = 0;
1252 status = 1;
1253 }
1254 mutex_unlock(&dir->i_mutex);
1279 } 1255 }
1280done: 1256 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1257 status = d_revalidate(dentry, nd);
1258 if (unlikely(status <= 0)) {
1259 if (status < 0) {
1260 dput(dentry);
1261 return status;
1262 }
1263 if (!d_invalidate(dentry)) {
1264 dput(dentry);
1265 dentry = NULL;
1266 need_reval = 1;
1267 goto retry;
1268 }
1269 }
1270
1281 path->mnt = mnt; 1271 path->mnt = mnt;
1282 path->dentry = dentry; 1272 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1273 err = follow_managed(path, nd->flags);
@@ -1287,39 +1277,113 @@ done:
1287 } 1277 }
1288 *inode = path->dentry->d_inode; 1278 *inode = path->dentry->d_inode;
1289 return 0; 1279 return 0;
1280}
1290 1281
1291need_lookup: 1282static inline int may_lookup(struct nameidata *nd)
1292 dir = parent->d_inode; 1283{
1293 BUG_ON(nd->inode != dir); 1284 if (nd->flags & LOOKUP_RCU) {
1285 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1286 if (err != -ECHILD)
1287 return err;
1288 if (nameidata_drop_rcu(nd))
1289 return -ECHILD;
1290 }
1291 return exec_permission(nd->inode, 0);
1292}
1294 1293
1295 mutex_lock(&dir->i_mutex); 1294static inline int handle_dots(struct nameidata *nd, int type)
1296 /* 1295{
1297 * First re-do the cached lookup just in case it was created 1296 if (type == LAST_DOTDOT) {
1298 * while we waited for the directory semaphore, or the first 1297 if (nd->flags & LOOKUP_RCU) {
1299 * lookup failed due to an unrelated rename. 1298 if (follow_dotdot_rcu(nd))
1300 * 1299 return -ECHILD;
1301 * This could use version numbering or similar to avoid unnecessary 1300 } else
1302 * cache lookups, but then we'd have to do the first lookup in the 1301 follow_dotdot(nd);
1303 * non-racy way. However in the common case here, everything should 1302 }
1304 * be hot in cache, so would it be a big win? 1303 return 0;
1305 */ 1304}
1306 dentry = d_lookup(parent, name); 1305
1307 if (likely(!dentry)) { 1306static void terminate_walk(struct nameidata *nd)
1308 dentry = d_alloc_and_lookup(parent, name, nd); 1307{
1309 mutex_unlock(&dir->i_mutex); 1308 if (!(nd->flags & LOOKUP_RCU)) {
1310 if (IS_ERR(dentry)) 1309 path_put(&nd->path);
1311 goto fail; 1310 } else {
1312 goto done; 1311 nd->flags &= ~LOOKUP_RCU;
1312 if (!(nd->flags & LOOKUP_ROOT))
1313 nd->root.mnt = NULL;
1314 rcu_read_unlock();
1315 br_read_unlock(vfsmount_lock);
1313 } 1316 }
1317}
1318
1319static inline int walk_component(struct nameidata *nd, struct path *path,
1320 struct qstr *name, int type, int follow)
1321{
1322 struct inode *inode;
1323 int err;
1314 /* 1324 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1325 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1326 * to be able to know about the current root directory and
1327 * parent relationships.
1317 */ 1328 */
1318 mutex_unlock(&dir->i_mutex); 1329 if (unlikely(type != LAST_NORM))
1319 goto found; 1330 return handle_dots(nd, type);
1331 err = do_lookup(nd, name, path, &inode);
1332 if (unlikely(err)) {
1333 terminate_walk(nd);
1334 return err;
1335 }
1336 if (!inode) {
1337 path_to_nameidata(path, nd);
1338 terminate_walk(nd);
1339 return -ENOENT;
1340 }
1341 if (unlikely(inode->i_op->follow_link) && follow) {
1342 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1343 return -ECHILD;
1344 BUG_ON(inode != path->dentry->d_inode);
1345 return 1;
1346 }
1347 path_to_nameidata(path, nd);
1348 nd->inode = inode;
1349 return 0;
1350}
1320 1351
1321fail: 1352/*
1322 return PTR_ERR(dentry); 1353 * This limits recursive symlink follows to 8, while
1354 * limiting consecutive symlinks to 40.
1355 *
1356 * Without that kind of total limit, nasty chains of consecutive
1357 * symlinks can cause almost arbitrarily long lookups.
1358 */
1359static inline int nested_symlink(struct path *path, struct nameidata *nd)
1360{
1361 int res;
1362
1363 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1364 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1365 path_put_conditional(path, nd);
1366 path_put(&nd->path);
1367 return -ELOOP;
1368 }
1369
1370 nd->depth++;
1371 current->link_count++;
1372
1373 do {
1374 struct path link = *path;
1375 void *cookie;
1376
1377 res = follow_link(&link, nd, &cookie);
1378 if (!res)
1379 res = walk_component(nd, path, &nd->last,
1380 nd->last_type, LOOKUP_FOLLOW);
1381 put_link(nd, &link, cookie);
1382 } while (res > 0);
1383
1384 current->link_count--;
1385 nd->depth--;
1386 return res;
1323} 1387}
1324 1388
1325/* 1389/*
@@ -1339,30 +1403,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1403 while (*name=='/')
1340 name++; 1404 name++;
1341 if (!*name) 1405 if (!*name)
1342 goto return_reval; 1406 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1407
1347 /* At this point we know we have a real path component. */ 1408 /* At this point we know we have a real path component. */
1348 for(;;) { 1409 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1410 unsigned long hash;
1351 struct qstr this; 1411 struct qstr this;
1352 unsigned int c; 1412 unsigned int c;
1413 int type;
1353 1414
1354 nd->flags |= LOOKUP_CONTINUE; 1415 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1416
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1417 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1418 if (err)
1367 break; 1419 break;
1368 1420
@@ -1378,52 +1430,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1430 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1431 this.hash = end_name_hash(hash);
1380 1432
1433 type = LAST_NORM;
1434 if (this.name[0] == '.') switch (this.len) {
1435 case 2:
1436 if (this.name[1] == '.') {
1437 type = LAST_DOTDOT;
1438 nd->flags |= LOOKUP_JUMPED;
1439 }
1440 break;
1441 case 1:
1442 type = LAST_DOT;
1443 }
1444 if (likely(type == LAST_NORM)) {
1445 struct dentry *parent = nd->path.dentry;
1446 nd->flags &= ~LOOKUP_JUMPED;
1447 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1448 err = parent->d_op->d_hash(parent, nd->inode,
1449 &this);
1450 if (err < 0)
1451 break;
1452 }
1453 }
1454
1381 /* remove trailing slashes? */ 1455 /* remove trailing slashes? */
1382 if (!c) 1456 if (!c)
1383 goto last_component; 1457 goto last_component;
1384 while (*++name == '/'); 1458 while (*++name == '/');
1385 if (!*name) 1459 if (!*name)
1386 goto last_with_slashes; 1460 goto last_component;
1387 1461
1388 /* 1462 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1463 if (err < 0)
1390 * to be able to know about the current root directory and 1464 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1465
1416 if (inode->i_op->follow_link) { 1466 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1467 err = nested_symlink(&next, nd);
1418 if (err) 1468 if (err)
1419 goto return_err; 1469 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1470 }
1428 err = -ENOTDIR; 1471 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1472 if (!nd->inode->i_op->lookup)
@@ -1431,209 +1474,109 @@ exec_again:
1431 continue; 1474 continue;
1432 /* here ends the main loop */ 1475 /* here ends the main loop */
1433 1476
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1477last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1478 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1479 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1480 nd->last = this;
1480 nd->last_type = LAST_NORM; 1481 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1482 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1483 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1484 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1485 return err;
1518} 1486}
1519 1487
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1488static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1489 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 path_get(&nd->path);
1550 nd->flags |= LOOKUP_REVAL;
1551 result = link_path_walk(name, nd);
1552 }
1553
1554 path_put(&save);
1555
1556 return result;
1557}
1558
1559static void path_finish_rcu(struct nameidata *nd)
1560{
1561 if (nd->flags & LOOKUP_RCU) {
1562 /* RCU dangling. Cancel it. */
1563 nd->flags &= ~LOOKUP_RCU;
1564 nd->root.mnt = NULL;
1565 rcu_read_unlock();
1566 br_read_unlock(vfsmount_lock);
1567 }
1568 if (nd->file)
1569 fput(nd->file);
1570}
1571
1572static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1573{ 1490{
1574 int retval = 0; 1491 int retval = 0;
1575 int fput_needed; 1492 int fput_needed;
1576 struct file *file; 1493 struct file *file;
1577 1494
1578 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1495 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1579 nd->flags = flags | LOOKUP_RCU; 1496 nd->flags = flags | LOOKUP_JUMPED;
1580 nd->depth = 0; 1497 nd->depth = 0;
1498 if (flags & LOOKUP_ROOT) {
1499 struct inode *inode = nd->root.dentry->d_inode;
1500 if (*name) {
1501 if (!inode->i_op->lookup)
1502 return -ENOTDIR;
1503 retval = inode_permission(inode, MAY_EXEC);
1504 if (retval)
1505 return retval;
1506 }
1507 nd->path = nd->root;
1508 nd->inode = inode;
1509 if (flags & LOOKUP_RCU) {
1510 br_read_lock(vfsmount_lock);
1511 rcu_read_lock();
1512 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1513 } else {
1514 path_get(&nd->path);
1515 }
1516 return 0;
1517 }
1518
1581 nd->root.mnt = NULL; 1519 nd->root.mnt = NULL;
1582 nd->file = NULL;
1583 1520
1584 if (*name=='/') { 1521 if (*name=='/') {
1585 struct fs_struct *fs = current->fs; 1522 if (flags & LOOKUP_RCU) {
1586 unsigned seq; 1523 br_read_lock(vfsmount_lock);
1587 1524 rcu_read_lock();
1588 br_read_lock(vfsmount_lock); 1525 set_root_rcu(nd);
1589 rcu_read_lock(); 1526 } else {
1590 1527 set_root(nd);
1591 do { 1528 path_get(&nd->root);
1592 seq = read_seqcount_begin(&fs->seq); 1529 }
1593 nd->root = fs->root; 1530 nd->path = nd->root;
1594 nd->path = nd->root;
1595 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1596 } while (read_seqcount_retry(&fs->seq, seq));
1597
1598 } else if (dfd == AT_FDCWD) { 1531 } else if (dfd == AT_FDCWD) {
1599 struct fs_struct *fs = current->fs; 1532 if (flags & LOOKUP_RCU) {
1600 unsigned seq; 1533 struct fs_struct *fs = current->fs;
1601 1534 unsigned seq;
1602 br_read_lock(vfsmount_lock);
1603 rcu_read_lock();
1604 1535
1605 do { 1536 br_read_lock(vfsmount_lock);
1606 seq = read_seqcount_begin(&fs->seq); 1537 rcu_read_lock();
1607 nd->path = fs->pwd;
1608 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1609 } while (read_seqcount_retry(&fs->seq, seq));
1610 1538
1539 do {
1540 seq = read_seqcount_begin(&fs->seq);
1541 nd->path = fs->pwd;
1542 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1543 } while (read_seqcount_retry(&fs->seq, seq));
1544 } else {
1545 get_fs_pwd(current->fs, &nd->path);
1546 }
1611 } else { 1547 } else {
1612 struct dentry *dentry; 1548 struct dentry *dentry;
1613 1549
1614 file = fget_light(dfd, &fput_needed); 1550 file = fget_raw_light(dfd, &fput_needed);
1615 retval = -EBADF; 1551 retval = -EBADF;
1616 if (!file) 1552 if (!file)
1617 goto out_fail; 1553 goto out_fail;
1618 1554
1619 dentry = file->f_path.dentry; 1555 dentry = file->f_path.dentry;
1620 1556
1621 retval = -ENOTDIR; 1557 if (*name) {
1622 if (!S_ISDIR(dentry->d_inode->i_mode)) 1558 retval = -ENOTDIR;
1623 goto fput_fail; 1559 if (!S_ISDIR(dentry->d_inode->i_mode))
1560 goto fput_fail;
1624 1561
1625 retval = file_permission(file, MAY_EXEC); 1562 retval = file_permission(file, MAY_EXEC);
1626 if (retval) 1563 if (retval)
1627 goto fput_fail; 1564 goto fput_fail;
1565 }
1628 1566
1629 nd->path = file->f_path; 1567 nd->path = file->f_path;
1630 if (fput_needed) 1568 if (flags & LOOKUP_RCU) {
1631 nd->file = file; 1569 if (fput_needed)
1632 1570 *fp = file;
1633 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1571 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1634 br_read_lock(vfsmount_lock); 1572 br_read_lock(vfsmount_lock);
1635 rcu_read_lock(); 1573 rcu_read_lock();
1574 } else {
1575 path_get(&file->f_path);
1576 fput_light(file, fput_needed);
1577 }
1636 } 1578 }
1579
1637 nd->inode = nd->path.dentry->d_inode; 1580 nd->inode = nd->path.dentry->d_inode;
1638 return 0; 1581 return 0;
1639 1582
@@ -1643,60 +1586,23 @@ out_fail:
1643 return retval; 1586 return retval;
1644} 1587}
1645 1588
1646static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1589static inline int lookup_last(struct nameidata *nd, struct path *path)
1647{ 1590{
1648 int retval = 0; 1591 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1649 int fput_needed; 1592 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1650 struct file *file;
1651 1593
1652 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1594 nd->flags &= ~LOOKUP_PARENT;
1653 nd->flags = flags; 1595 return walk_component(nd, path, &nd->last, nd->last_type,
1654 nd->depth = 0; 1596 nd->flags & LOOKUP_FOLLOW);
1655 nd->root.mnt = NULL;
1656
1657 if (*name=='/') {
1658 set_root(nd);
1659 nd->path = nd->root;
1660 path_get(&nd->root);
1661 } else if (dfd == AT_FDCWD) {
1662 get_fs_pwd(current->fs, &nd->path);
1663 } else {
1664 struct dentry *dentry;
1665
1666 file = fget_light(dfd, &fput_needed);
1667 retval = -EBADF;
1668 if (!file)
1669 goto out_fail;
1670
1671 dentry = file->f_path.dentry;
1672
1673 retval = -ENOTDIR;
1674 if (!S_ISDIR(dentry->d_inode->i_mode))
1675 goto fput_fail;
1676
1677 retval = file_permission(file, MAY_EXEC);
1678 if (retval)
1679 goto fput_fail;
1680
1681 nd->path = file->f_path;
1682 path_get(&file->f_path);
1683
1684 fput_light(file, fput_needed);
1685 }
1686 nd->inode = nd->path.dentry->d_inode;
1687 return 0;
1688
1689fput_fail:
1690 fput_light(file, fput_needed);
1691out_fail:
1692 return retval;
1693} 1597}
1694 1598
1695/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1599/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1696static int do_path_lookup(int dfd, const char *name, 1600static int path_lookupat(int dfd, const char *name,
1697 unsigned int flags, struct nameidata *nd) 1601 unsigned int flags, struct nameidata *nd)
1698{ 1602{
1699 int retval; 1603 struct file *base = NULL;
1604 struct path path;
1605 int err;
1700 1606
1701 /* 1607 /*
1702 * Path walking is largely split up into 2 different synchronisation 1608 * Path walking is largely split up into 2 different synchronisation
@@ -1712,44 +1618,75 @@ static int do_path_lookup(int dfd, const char *name,
1712 * be handled by restarting a traditional ref-walk (which will always 1618 * be handled by restarting a traditional ref-walk (which will always
1713 * be able to complete). 1619 * be able to complete).
1714 */ 1620 */
1715 retval = path_init_rcu(dfd, name, flags, nd); 1621 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1716 if (unlikely(retval)) 1622
1717 return retval; 1623 if (unlikely(err))
1718 retval = path_walk_rcu(name, nd); 1624 return err;
1719 path_finish_rcu(nd); 1625
1720 if (nd->root.mnt) { 1626 current->total_link_count = 0;
1721 path_put(&nd->root); 1627 err = link_path_walk(name, nd);
1722 nd->root.mnt = NULL; 1628
1629 if (!err && !(flags & LOOKUP_PARENT)) {
1630 err = lookup_last(nd, &path);
1631 while (err > 0) {
1632 void *cookie;
1633 struct path link = path;
1634 nd->flags |= LOOKUP_PARENT;
1635 err = follow_link(&link, nd, &cookie);
1636 if (!err)
1637 err = lookup_last(nd, &path);
1638 put_link(nd, &link, cookie);
1639 }
1723 } 1640 }
1724 1641
1725 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1642 if (nd->flags & LOOKUP_RCU) {
1726 /* slower, locked walk */ 1643 /* went all way through without dropping RCU */
1727 if (retval == -ESTALE) 1644 BUG_ON(err);
1728 flags |= LOOKUP_REVAL; 1645 if (nameidata_drop_rcu_last(nd))
1729 retval = path_init(dfd, name, flags, nd); 1646 err = -ECHILD;
1730 if (unlikely(retval)) 1647 }
1731 return retval; 1648
1732 retval = path_walk(name, nd); 1649 if (!err)
1733 if (nd->root.mnt) { 1650 err = handle_reval_path(nd);
1734 path_put(&nd->root); 1651
1735 nd->root.mnt = NULL; 1652 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1653 if (!nd->inode->i_op->lookup) {
1654 path_put(&nd->path);
1655 return -ENOTDIR;
1736 } 1656 }
1737 } 1657 }
1738 1658
1659 if (base)
1660 fput(base);
1661
1662 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1663 path_put(&nd->root);
1664 nd->root.mnt = NULL;
1665 }
1666 return err;
1667}
1668
1669static int do_path_lookup(int dfd, const char *name,
1670 unsigned int flags, struct nameidata *nd)
1671{
1672 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1673 if (unlikely(retval == -ECHILD))
1674 retval = path_lookupat(dfd, name, flags, nd);
1675 if (unlikely(retval == -ESTALE))
1676 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1677
1739 if (likely(!retval)) { 1678 if (likely(!retval)) {
1740 if (unlikely(!audit_dummy_context())) { 1679 if (unlikely(!audit_dummy_context())) {
1741 if (nd->path.dentry && nd->inode) 1680 if (nd->path.dentry && nd->inode)
1742 audit_inode(name, nd->path.dentry); 1681 audit_inode(name, nd->path.dentry);
1743 } 1682 }
1744 } 1683 }
1745
1746 return retval; 1684 return retval;
1747} 1685}
1748 1686
1749int path_lookup(const char *name, unsigned int flags, 1687int kern_path_parent(const char *name, struct nameidata *nd)
1750 struct nameidata *nd)
1751{ 1688{
1752 return do_path_lookup(AT_FDCWD, name, flags, nd); 1689 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1753} 1690}
1754 1691
1755int kern_path(const char *name, unsigned int flags, struct path *path) 1692int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1773,29 +1710,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1773 const char *name, unsigned int flags, 1710 const char *name, unsigned int flags,
1774 struct nameidata *nd) 1711 struct nameidata *nd)
1775{ 1712{
1776 int retval; 1713 nd->root.dentry = dentry;
1777 1714 nd->root.mnt = mnt;
1778 /* same as do_path_lookup */ 1715 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1779 nd->last_type = LAST_ROOT; 1716 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1780 nd->flags = flags;
1781 nd->depth = 0;
1782
1783 nd->path.dentry = dentry;
1784 nd->path.mnt = mnt;
1785 path_get(&nd->path);
1786 nd->root = nd->path;
1787 path_get(&nd->root);
1788 nd->inode = nd->path.dentry->d_inode;
1789
1790 retval = path_walk(name, nd);
1791 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1792 nd->inode))
1793 audit_inode(name, nd->path.dentry);
1794
1795 path_put(&nd->root);
1796 nd->root.mnt = NULL;
1797
1798 return retval;
1799} 1717}
1800 1718
1801static struct dentry *__lookup_hash(struct qstr *name, 1719static struct dentry *__lookup_hash(struct qstr *name,
@@ -1810,17 +1728,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1810 return ERR_PTR(err); 1728 return ERR_PTR(err);
1811 1729
1812 /* 1730 /*
1813 * See if the low-level filesystem might want
1814 * to use its own hash..
1815 */
1816 if (base->d_flags & DCACHE_OP_HASH) {
1817 err = base->d_op->d_hash(base, inode, name);
1818 dentry = ERR_PTR(err);
1819 if (err < 0)
1820 goto out;
1821 }
1822
1823 /*
1824 * Don't bother with __d_lookup: callers are for creat as 1731 * Don't bother with __d_lookup: callers are for creat as
1825 * well as unlink, so a lot of the time it would cost 1732 * well as unlink, so a lot of the time it would cost
1826 * a double lookup. 1733 * a double lookup.
@@ -1832,7 +1739,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1832 1739
1833 if (!dentry) 1740 if (!dentry)
1834 dentry = d_alloc_and_lookup(base, name, nd); 1741 dentry = d_alloc_and_lookup(base, name, nd);
1835out: 1742
1836 return dentry; 1743 return dentry;
1837} 1744}
1838 1745
@@ -1846,28 +1753,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1846 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1753 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1847} 1754}
1848 1755
1849static int __lookup_one_len(const char *name, struct qstr *this,
1850 struct dentry *base, int len)
1851{
1852 unsigned long hash;
1853 unsigned int c;
1854
1855 this->name = name;
1856 this->len = len;
1857 if (!len)
1858 return -EACCES;
1859
1860 hash = init_name_hash();
1861 while (len--) {
1862 c = *(const unsigned char *)name++;
1863 if (c == '/' || c == '\0')
1864 return -EACCES;
1865 hash = partial_name_hash(c, hash);
1866 }
1867 this->hash = end_name_hash(hash);
1868 return 0;
1869}
1870
1871/** 1756/**
1872 * lookup_one_len - filesystem helper to lookup single pathname component 1757 * lookup_one_len - filesystem helper to lookup single pathname component
1873 * @name: pathname component to lookup 1758 * @name: pathname component to lookup
@@ -1881,14 +1766,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1881 */ 1766 */
1882struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1767struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1883{ 1768{
1884 int err;
1885 struct qstr this; 1769 struct qstr this;
1770 unsigned long hash;
1771 unsigned int c;
1886 1772
1887 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1773 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1888 1774
1889 err = __lookup_one_len(name, &this, base, len); 1775 this.name = name;
1890 if (err) 1776 this.len = len;
1891 return ERR_PTR(err); 1777 if (!len)
1778 return ERR_PTR(-EACCES);
1779
1780 hash = init_name_hash();
1781 while (len--) {
1782 c = *(const unsigned char *)name++;
1783 if (c == '/' || c == '\0')
1784 return ERR_PTR(-EACCES);
1785 hash = partial_name_hash(c, hash);
1786 }
1787 this.hash = end_name_hash(hash);
1788 /*
1789 * See if the low-level filesystem might want
1790 * to use its own hash..
1791 */
1792 if (base->d_flags & DCACHE_OP_HASH) {
1793 int err = base->d_op->d_hash(base, base->d_inode, &this);
1794 if (err < 0)
1795 return ERR_PTR(err);
1796 }
1892 1797
1893 return __lookup_hash(&this, base, NULL); 1798 return __lookup_hash(&this, base, NULL);
1894} 1799}
@@ -1897,7 +1802,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1897 struct path *path) 1802 struct path *path)
1898{ 1803{
1899 struct nameidata nd; 1804 struct nameidata nd;
1900 char *tmp = getname(name); 1805 char *tmp = getname_flags(name, flags);
1901 int err = PTR_ERR(tmp); 1806 int err = PTR_ERR(tmp);
1902 if (!IS_ERR(tmp)) { 1807 if (!IS_ERR(tmp)) {
1903 1808
@@ -2077,12 +1982,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2077 return error; 1982 return error;
2078} 1983}
2079 1984
2080int may_open(struct path *path, int acc_mode, int flag) 1985static int may_open(struct path *path, int acc_mode, int flag)
2081{ 1986{
2082 struct dentry *dentry = path->dentry; 1987 struct dentry *dentry = path->dentry;
2083 struct inode *inode = dentry->d_inode; 1988 struct inode *inode = dentry->d_inode;
2084 int error; 1989 int error;
2085 1990
1991 /* O_PATH? */
1992 if (!acc_mode)
1993 return 0;
1994
2086 if (!inode) 1995 if (!inode)
2087 return -ENOENT; 1996 return -ENOENT;
2088 1997
@@ -2151,34 +2060,6 @@ static int handle_truncate(struct file *filp)
2151} 2060}
2152 2061
2153/* 2062/*
2154 * Be careful about ever adding any more callers of this
2155 * function. Its flags must be in the namei format, not
2156 * what get passed to sys_open().
2157 */
2158static int __open_namei_create(struct nameidata *nd, struct path *path,
2159 int open_flag, int mode)
2160{
2161 int error;
2162 struct dentry *dir = nd->path.dentry;
2163
2164 if (!IS_POSIXACL(dir->d_inode))
2165 mode &= ~current_umask();
2166 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2167 if (error)
2168 goto out_unlock;
2169 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2170out_unlock:
2171 mutex_unlock(&dir->d_inode->i_mutex);
2172 dput(nd->path.dentry);
2173 nd->path.dentry = path->dentry;
2174
2175 if (error)
2176 return error;
2177 /* Don't check for write permission, don't truncate */
2178 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2179}
2180
2181/*
2182 * Note that while the flag value (low two bits) for sys_open means: 2063 * Note that while the flag value (low two bits) for sys_open means:
2183 * 00 - read-only 2064 * 00 - read-only
2184 * 01 - write-only 2065 * 01 - write-only
@@ -2202,126 +2083,115 @@ static inline int open_to_namei_flags(int flag)
2202 return flag; 2083 return flag;
2203} 2084}
2204 2085
2205static int open_will_truncate(int flag, struct inode *inode)
2206{
2207 /*
2208 * We'll never write to the fs underlying
2209 * a device file.
2210 */
2211 if (special_file(inode->i_mode))
2212 return 0;
2213 return (flag & O_TRUNC);
2214}
2215
2216static struct file *finish_open(struct nameidata *nd,
2217 int open_flag, int acc_mode)
2218{
2219 struct file *filp;
2220 int will_truncate;
2221 int error;
2222
2223 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2224 if (will_truncate) {
2225 error = mnt_want_write(nd->path.mnt);
2226 if (error)
2227 goto exit;
2228 }
2229 error = may_open(&nd->path, acc_mode, open_flag);
2230 if (error) {
2231 if (will_truncate)
2232 mnt_drop_write(nd->path.mnt);
2233 goto exit;
2234 }
2235 filp = nameidata_to_filp(nd);
2236 if (!IS_ERR(filp)) {
2237 error = ima_file_check(filp, acc_mode);
2238 if (error) {
2239 fput(filp);
2240 filp = ERR_PTR(error);
2241 }
2242 }
2243 if (!IS_ERR(filp)) {
2244 if (will_truncate) {
2245 error = handle_truncate(filp);
2246 if (error) {
2247 fput(filp);
2248 filp = ERR_PTR(error);
2249 }
2250 }
2251 }
2252 /*
2253 * It is now safe to drop the mnt write
2254 * because the filp has had a write taken
2255 * on its behalf.
2256 */
2257 if (will_truncate)
2258 mnt_drop_write(nd->path.mnt);
2259 path_put(&nd->path);
2260 return filp;
2261
2262exit:
2263 path_put(&nd->path);
2264 return ERR_PTR(error);
2265}
2266
2267/* 2086/*
2268 * Handle O_CREAT case for do_filp_open 2087 * Handle the last step of open()
2269 */ 2088 */
2270static struct file *do_last(struct nameidata *nd, struct path *path, 2089static struct file *do_last(struct nameidata *nd, struct path *path,
2271 int open_flag, int acc_mode, 2090 const struct open_flags *op, const char *pathname)
2272 int mode, const char *pathname)
2273{ 2091{
2274 struct dentry *dir = nd->path.dentry; 2092 struct dentry *dir = nd->path.dentry;
2093 struct dentry *dentry;
2094 int open_flag = op->open_flag;
2095 int will_truncate = open_flag & O_TRUNC;
2096 int want_write = 0;
2097 int acc_mode = op->acc_mode;
2275 struct file *filp; 2098 struct file *filp;
2276 int error = -EISDIR; 2099 int error;
2100
2101 nd->flags &= ~LOOKUP_PARENT;
2102 nd->flags |= op->intent;
2277 2103
2278 switch (nd->last_type) { 2104 switch (nd->last_type) {
2279 case LAST_DOTDOT: 2105 case LAST_DOTDOT:
2280 follow_dotdot(nd);
2281 dir = nd->path.dentry;
2282 case LAST_DOT: 2106 case LAST_DOT:
2283 if (need_reval_dot(dir)) { 2107 error = handle_dots(nd, nd->last_type);
2284 int status = d_revalidate(nd->path.dentry, nd); 2108 if (error)
2285 if (!status) 2109 return ERR_PTR(error);
2286 status = -ESTALE;
2287 if (status < 0) {
2288 error = status;
2289 goto exit;
2290 }
2291 }
2292 /* fallthrough */ 2110 /* fallthrough */
2293 case LAST_ROOT: 2111 case LAST_ROOT:
2294 goto exit; 2112 if (nd->flags & LOOKUP_RCU) {
2113 if (nameidata_drop_rcu_last(nd))
2114 return ERR_PTR(-ECHILD);
2115 }
2116 error = handle_reval_path(nd);
2117 if (error)
2118 goto exit;
2119 audit_inode(pathname, nd->path.dentry);
2120 if (open_flag & O_CREAT) {
2121 error = -EISDIR;
2122 goto exit;
2123 }
2124 goto ok;
2295 case LAST_BIND: 2125 case LAST_BIND:
2126 /* can't be RCU mode here */
2127 error = handle_reval_path(nd);
2128 if (error)
2129 goto exit;
2296 audit_inode(pathname, dir); 2130 audit_inode(pathname, dir);
2297 goto ok; 2131 goto ok;
2298 } 2132 }
2299 2133
2134 if (!(open_flag & O_CREAT)) {
2135 int symlink_ok = 0;
2136 if (nd->last.name[nd->last.len])
2137 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2138 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2139 symlink_ok = 1;
2140 /* we _can_ be in RCU mode here */
2141 error = walk_component(nd, path, &nd->last, LAST_NORM,
2142 !symlink_ok);
2143 if (error < 0)
2144 return ERR_PTR(error);
2145 if (error) /* symlink */
2146 return NULL;
2147 /* sayonara */
2148 if (nd->flags & LOOKUP_RCU) {
2149 if (nameidata_drop_rcu_last(nd))
2150 return ERR_PTR(-ECHILD);
2151 }
2152
2153 error = -ENOTDIR;
2154 if (nd->flags & LOOKUP_DIRECTORY) {
2155 if (!nd->inode->i_op->lookup)
2156 goto exit;
2157 }
2158 audit_inode(pathname, nd->path.dentry);
2159 goto ok;
2160 }
2161
2162 /* create side of things */
2163
2164 if (nd->flags & LOOKUP_RCU) {
2165 if (nameidata_drop_rcu_last(nd))
2166 return ERR_PTR(-ECHILD);
2167 }
2168
2169 audit_inode(pathname, dir);
2170 error = -EISDIR;
2300 /* trailing slashes? */ 2171 /* trailing slashes? */
2301 if (nd->last.name[nd->last.len]) 2172 if (nd->last.name[nd->last.len])
2302 goto exit; 2173 goto exit;
2303 2174
2304 mutex_lock(&dir->d_inode->i_mutex); 2175 mutex_lock(&dir->d_inode->i_mutex);
2305 2176
2306 path->dentry = lookup_hash(nd); 2177 dentry = lookup_hash(nd);
2307 path->mnt = nd->path.mnt; 2178 error = PTR_ERR(dentry);
2308 2179 if (IS_ERR(dentry)) {
2309 error = PTR_ERR(path->dentry);
2310 if (IS_ERR(path->dentry)) {
2311 mutex_unlock(&dir->d_inode->i_mutex); 2180 mutex_unlock(&dir->d_inode->i_mutex);
2312 goto exit; 2181 goto exit;
2313 } 2182 }
2314 2183
2315 if (IS_ERR(nd->intent.open.file)) { 2184 path->dentry = dentry;
2316 error = PTR_ERR(nd->intent.open.file); 2185 path->mnt = nd->path.mnt;
2317 goto exit_mutex_unlock;
2318 }
2319 2186
2320 /* Negative dentry, just create the file */ 2187 /* Negative dentry, just create the file */
2321 if (!path->dentry->d_inode) { 2188 if (!dentry->d_inode) {
2189 int mode = op->mode;
2190 if (!IS_POSIXACL(dir->d_inode))
2191 mode &= ~current_umask();
2322 /* 2192 /*
2323 * This write is needed to ensure that a 2193 * This write is needed to ensure that a
2324 * ro->rw transition does not occur between 2194 * rw->ro transition does not occur between
2325 * the time when the file is created and when 2195 * the time when the file is created and when
2326 * a permanent write count is taken through 2196 * a permanent write count is taken through
2327 * the 'struct file' in nameidata_to_filp(). 2197 * the 'struct file' in nameidata_to_filp().
@@ -2329,22 +2199,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2329 error = mnt_want_write(nd->path.mnt); 2199 error = mnt_want_write(nd->path.mnt);
2330 if (error) 2200 if (error)
2331 goto exit_mutex_unlock; 2201 goto exit_mutex_unlock;
2332 error = __open_namei_create(nd, path, open_flag, mode); 2202 want_write = 1;
2333 if (error) { 2203 /* Don't check for write permission, don't truncate */
2334 mnt_drop_write(nd->path.mnt); 2204 open_flag &= ~O_TRUNC;
2335 goto exit; 2205 will_truncate = 0;
2336 } 2206 acc_mode = MAY_OPEN;
2337 filp = nameidata_to_filp(nd); 2207 error = security_path_mknod(&nd->path, dentry, mode, 0);
2338 mnt_drop_write(nd->path.mnt); 2208 if (error)
2339 path_put(&nd->path); 2209 goto exit_mutex_unlock;
2340 if (!IS_ERR(filp)) { 2210 error = vfs_create(dir->d_inode, dentry, mode, nd);
2341 error = ima_file_check(filp, acc_mode); 2211 if (error)
2342 if (error) { 2212 goto exit_mutex_unlock;
2343 fput(filp); 2213 mutex_unlock(&dir->d_inode->i_mutex);
2344 filp = ERR_PTR(error); 2214 dput(nd->path.dentry);
2345 } 2215 nd->path.dentry = dentry;
2346 } 2216 goto common;
2347 return filp;
2348 } 2217 }
2349 2218
2350 /* 2219 /*
@@ -2374,7 +2243,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2374 if (S_ISDIR(nd->inode->i_mode)) 2243 if (S_ISDIR(nd->inode->i_mode))
2375 goto exit; 2244 goto exit;
2376ok: 2245ok:
2377 filp = finish_open(nd, open_flag, acc_mode); 2246 if (!S_ISREG(nd->inode->i_mode))
2247 will_truncate = 0;
2248
2249 if (will_truncate) {
2250 error = mnt_want_write(nd->path.mnt);
2251 if (error)
2252 goto exit;
2253 want_write = 1;
2254 }
2255common:
2256 error = may_open(&nd->path, acc_mode, open_flag);
2257 if (error)
2258 goto exit;
2259 filp = nameidata_to_filp(nd);
2260 if (!IS_ERR(filp)) {
2261 error = ima_file_check(filp, op->acc_mode);
2262 if (error) {
2263 fput(filp);
2264 filp = ERR_PTR(error);
2265 }
2266 }
2267 if (!IS_ERR(filp)) {
2268 if (will_truncate) {
2269 error = handle_truncate(filp);
2270 if (error) {
2271 fput(filp);
2272 filp = ERR_PTR(error);
2273 }
2274 }
2275 }
2276out:
2277 if (want_write)
2278 mnt_drop_write(nd->path.mnt);
2279 path_put(&nd->path);
2378 return filp; 2280 return filp;
2379 2281
2380exit_mutex_unlock: 2282exit_mutex_unlock:
@@ -2382,197 +2284,103 @@ exit_mutex_unlock:
2382exit_dput: 2284exit_dput:
2383 path_put_conditional(path, nd); 2285 path_put_conditional(path, nd);
2384exit: 2286exit:
2385 path_put(&nd->path); 2287 filp = ERR_PTR(error);
2386 return ERR_PTR(error); 2288 goto out;
2387} 2289}
2388 2290
2389/* 2291static struct file *path_openat(int dfd, const char *pathname,
2390 * Note that the low bits of the passed in "open_flag" 2292 struct nameidata *nd, const struct open_flags *op, int flags)
2391 * are not the same as in the local variable "flag". See
2392 * open_to_namei_flags() for more details.
2393 */
2394struct file *do_filp_open(int dfd, const char *pathname,
2395 int open_flag, int mode, int acc_mode)
2396{ 2293{
2294 struct file *base = NULL;
2397 struct file *filp; 2295 struct file *filp;
2398 struct nameidata nd;
2399 int error;
2400 struct path path; 2296 struct path path;
2401 int count = 0; 2297 int error;
2402 int flag = open_to_namei_flags(open_flag);
2403 int flags;
2404
2405 if (!(open_flag & O_CREAT))
2406 mode = 0;
2407
2408 /* Must never be set by userspace */
2409 open_flag &= ~FMODE_NONOTIFY;
2410
2411 /*
2412 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2413 * check for O_DSYNC if the need any syncing at all we enforce it's
2414 * always set instead of having to deal with possibly weird behaviour
2415 * for malicious applications setting only __O_SYNC.
2416 */
2417 if (open_flag & __O_SYNC)
2418 open_flag |= O_DSYNC;
2419
2420 if (!acc_mode)
2421 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2422
2423 /* O_TRUNC implies we need access checks for write permissions */
2424 if (open_flag & O_TRUNC)
2425 acc_mode |= MAY_WRITE;
2426
2427 /* Allow the LSM permission hook to distinguish append
2428 access from general write access. */
2429 if (open_flag & O_APPEND)
2430 acc_mode |= MAY_APPEND;
2431
2432 flags = LOOKUP_OPEN;
2433 if (open_flag & O_CREAT) {
2434 flags |= LOOKUP_CREATE;
2435 if (open_flag & O_EXCL)
2436 flags |= LOOKUP_EXCL;
2437 }
2438 if (open_flag & O_DIRECTORY)
2439 flags |= LOOKUP_DIRECTORY;
2440 if (!(open_flag & O_NOFOLLOW))
2441 flags |= LOOKUP_FOLLOW;
2442 2298
2443 filp = get_empty_filp(); 2299 filp = get_empty_filp();
2444 if (!filp) 2300 if (!filp)
2445 return ERR_PTR(-ENFILE); 2301 return ERR_PTR(-ENFILE);
2446 2302
2447 filp->f_flags = open_flag; 2303 filp->f_flags = op->open_flag;
2448 nd.intent.open.file = filp; 2304 nd->intent.open.file = filp;
2449 nd.intent.open.flags = flag; 2305 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2450 nd.intent.open.create_mode = mode; 2306 nd->intent.open.create_mode = op->mode;
2451
2452 if (open_flag & O_CREAT)
2453 goto creat;
2454 2307
2455 /* !O_CREAT, simple open */ 2308 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2456 error = do_path_lookup(dfd, pathname, flags, &nd);
2457 if (unlikely(error)) 2309 if (unlikely(error))
2458 goto out_filp; 2310 goto out_filp;
2459 error = -ELOOP;
2460 if (!(nd.flags & LOOKUP_FOLLOW)) {
2461 if (nd.inode->i_op->follow_link)
2462 goto out_path;
2463 }
2464 error = -ENOTDIR;
2465 if (nd.flags & LOOKUP_DIRECTORY) {
2466 if (!nd.inode->i_op->lookup)
2467 goto out_path;
2468 }
2469 audit_inode(pathname, nd.path.dentry);
2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2472 return filp;
2473 2311
2474creat: 2312 current->total_link_count = 0;
2475 /* OK, have to create the file. Find the parent. */ 2313 error = link_path_walk(pathname, nd);
2476 error = path_init_rcu(dfd, pathname,
2477 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2478 if (error)
2479 goto out_filp;
2480 error = path_walk_rcu(pathname, &nd);
2481 path_finish_rcu(&nd);
2482 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2483 /* slower, locked walk */
2484 if (error == -ESTALE) {
2485reval:
2486 flags |= LOOKUP_REVAL;
2487 }
2488 error = path_init(dfd, pathname,
2489 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2490 if (error)
2491 goto out_filp;
2492
2493 error = path_walk_simple(pathname, &nd);
2494 }
2495 if (unlikely(error)) 2314 if (unlikely(error))
2496 goto out_filp; 2315 goto out_filp;
2497 if (unlikely(!audit_dummy_context()))
2498 audit_inode(pathname, nd.path.dentry);
2499 2316
2500 /* 2317 filp = do_last(nd, &path, op, pathname);
2501 * We have the parent and last component.
2502 */
2503 nd.flags = flags;
2504 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2505 while (unlikely(!filp)) { /* trailing symlink */ 2318 while (unlikely(!filp)) { /* trailing symlink */
2506 struct path link = path; 2319 struct path link = path;
2507 struct inode *linki = link.dentry->d_inode;
2508 void *cookie; 2320 void *cookie;
2509 error = -ELOOP; 2321 if (!(nd->flags & LOOKUP_FOLLOW)) {
2510 if (!(nd.flags & LOOKUP_FOLLOW)) 2322 path_put_conditional(&path, nd);
2511 goto exit_dput; 2323 path_put(&nd->path);
2512 if (count++ == 32) 2324 filp = ERR_PTR(-ELOOP);
2513 goto exit_dput; 2325 break;
2514 /*
2515 * This is subtle. Instead of calling do_follow_link() we do
2516 * the thing by hands. The reason is that this way we have zero
2517 * link_count and path_walk() (called from ->follow_link)
2518 * honoring LOOKUP_PARENT. After that we have the parent and
2519 * last component, i.e. we are in the same situation as after
2520 * the first path_walk(). Well, almost - if the last component
2521 * is normal we get its copy stored in nd->last.name and we will
2522 * have to putname() it when we are done. Procfs-like symlinks
2523 * just set LAST_BIND.
2524 */
2525 nd.flags |= LOOKUP_PARENT;
2526 error = security_inode_follow_link(link.dentry, &nd);
2527 if (error)
2528 goto exit_dput;
2529 error = __do_follow_link(&link, &nd, &cookie);
2530 if (unlikely(error)) {
2531 if (!IS_ERR(cookie) && linki->i_op->put_link)
2532 linki->i_op->put_link(link.dentry, &nd, cookie);
2533 /* nd.path had been dropped */
2534 nd.path = link;
2535 goto out_path;
2536 } 2326 }
2537 nd.flags &= ~LOOKUP_PARENT; 2327 nd->flags |= LOOKUP_PARENT;
2538 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2539 if (linki->i_op->put_link) 2329 error = follow_link(&link, nd, &cookie);
2540 linki->i_op->put_link(link.dentry, &nd, cookie); 2330 if (unlikely(error))
2541 path_put(&link); 2331 filp = ERR_PTR(error);
2332 else
2333 filp = do_last(nd, &path, op, pathname);
2334 put_link(nd, &link, cookie);
2542 } 2335 }
2543out: 2336out:
2544 if (nd.root.mnt) 2337 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2545 path_put(&nd.root); 2338 path_put(&nd->root);
2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2339 if (base)
2547 goto reval; 2340 fput(base);
2548 release_open_intent(&nd); 2341 release_open_intent(nd);
2549 return filp; 2342 return filp;
2550 2343
2551exit_dput:
2552 path_put_conditional(&path, &nd);
2553out_path:
2554 path_put(&nd.path);
2555out_filp: 2344out_filp:
2556 filp = ERR_PTR(error); 2345 filp = ERR_PTR(error);
2557 goto out; 2346 goto out;
2558} 2347}
2559 2348
2560/** 2349struct file *do_filp_open(int dfd, const char *pathname,
2561 * filp_open - open file and return file pointer 2350 const struct open_flags *op, int flags)
2562 * 2351{
2563 * @filename: path to open 2352 struct nameidata nd;
2564 * @flags: open flags as per the open(2) second argument 2353 struct file *filp;
2565 * @mode: mode for the new file if O_CREAT is set, else ignored 2354
2566 * 2355 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2567 * This is the helper to open a file from kernelspace if you really 2356 if (unlikely(filp == ERR_PTR(-ECHILD)))
2568 * have to. But in generally you should not do this, so please move 2357 filp = path_openat(dfd, pathname, &nd, op, flags);
2569 * along, nothing to see here.. 2358 if (unlikely(filp == ERR_PTR(-ESTALE)))
2570 */ 2359 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2571struct file *filp_open(const char *filename, int flags, int mode) 2360 return filp;
2361}
2362
2363struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2364 const char *name, const struct open_flags *op, int flags)
2572{ 2365{
2573 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2366 struct nameidata nd;
2367 struct file *file;
2368
2369 nd.root.mnt = mnt;
2370 nd.root.dentry = dentry;
2371
2372 flags |= LOOKUP_ROOT;
2373
2374 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2375 return ERR_PTR(-ELOOP);
2376
2377 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2378 if (unlikely(file == ERR_PTR(-ECHILD)))
2379 file = path_openat(-1, name, &nd, op, flags);
2380 if (unlikely(file == ERR_PTR(-ESTALE)))
2381 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2382 return file;
2574} 2383}
2575EXPORT_SYMBOL(filp_open);
2576 2384
2577/** 2385/**
2578 * lookup_create - lookup a dentry, creating it if it doesn't exist 2386 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3111,7 +2919,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3111 return error; 2919 return error;
3112 2920
3113 mutex_lock(&inode->i_mutex); 2921 mutex_lock(&inode->i_mutex);
3114 error = dir->i_op->link(old_dentry, dir, new_dentry); 2922 /* Make sure we don't allow creating hardlink to an unlinked file */
2923 if (inode->i_nlink == 0)
2924 error = -ENOENT;
2925 else
2926 error = dir->i_op->link(old_dentry, dir, new_dentry);
3115 mutex_unlock(&inode->i_mutex); 2927 mutex_unlock(&inode->i_mutex);
3116 if (!error) 2928 if (!error)
3117 fsnotify_link(dir, inode, new_dentry); 2929 fsnotify_link(dir, inode, new_dentry);
@@ -3133,15 +2945,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3133 struct dentry *new_dentry; 2945 struct dentry *new_dentry;
3134 struct nameidata nd; 2946 struct nameidata nd;
3135 struct path old_path; 2947 struct path old_path;
2948 int how = 0;
3136 int error; 2949 int error;
3137 char *to; 2950 char *to;
3138 2951
3139 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2952 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3140 return -EINVAL; 2953 return -EINVAL;
2954 /*
2955 * To use null names we require CAP_DAC_READ_SEARCH
2956 * This ensures that not everyone will be able to create
2957 * handlink using the passed filedescriptor.
2958 */
2959 if (flags & AT_EMPTY_PATH) {
2960 if (!capable(CAP_DAC_READ_SEARCH))
2961 return -ENOENT;
2962 how = LOOKUP_EMPTY;
2963 }
2964
2965 if (flags & AT_SYMLINK_FOLLOW)
2966 how |= LOOKUP_FOLLOW;
3141 2967
3142 error = user_path_at(olddfd, oldname, 2968 error = user_path_at(olddfd, oldname, how, &old_path);
3143 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3144 &old_path);
3145 if (error) 2969 if (error)
3146 return error; 2970 return error;
3147 2971
@@ -3578,7 +3402,7 @@ EXPORT_SYMBOL(page_readlink);
3578EXPORT_SYMBOL(__page_symlink); 3402EXPORT_SYMBOL(__page_symlink);
3579EXPORT_SYMBOL(page_symlink); 3403EXPORT_SYMBOL(page_symlink);
3580EXPORT_SYMBOL(page_symlink_inode_operations); 3404EXPORT_SYMBOL(page_symlink_inode_operations);
3581EXPORT_SYMBOL(path_lookup); 3405EXPORT_SYMBOL(kern_path_parent);
3582EXPORT_SYMBOL(kern_path); 3406EXPORT_SYMBOL(kern_path);
3583EXPORT_SYMBOL(vfs_path_lookup); 3407EXPORT_SYMBOL(vfs_path_lookup);
3584EXPORT_SYMBOL(inode_permission); 3408EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index d1edf26025dc..dffe6f49ab93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1002 .show = show_vfsmnt
1003}; 1003};
1004 1004
1005static int uuid_is_nil(u8 *uuid)
1006{
1007 int i;
1008 u8 *cp = (u8 *)uuid;
1009
1010 for (i = 0; i < 16; i++) {
1011 if (*cp++)
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1005static int show_mountinfo(struct seq_file *m, void *v) 1017static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1018{
1007 struct proc_mounts *p = m->private; 1019 struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1052 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1053 seq_puts(m, " unbindable");
1042 1054
1055 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1056 /* print the uuid */
1057 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1058
1043 /* Filesystem specific data */ 1059 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1060 seq_puts(m, " - ");
1045 show_type(m, sb); 1061 show_type(m, sb);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..2f8e61816d75 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..1be36cf65bfc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 298#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
301extern void nfs4_schedule_session_recovery(struct nfs4_session *);
302#else
303static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
304{
305}
301#endif /* CONFIG_NFS_V4_1 */ 306#endif /* CONFIG_NFS_V4_1 */
302 307
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 308extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 312extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 313extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 314extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 315extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 316extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 317extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 318extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 319extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 320extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..b73c34375f60 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 219 goto out_err;
220 } 220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 if (!buf) {
223 dprintk("%s: Not enough memory\n", __func__);
224 goto out_err;
225 }
222 buf[rlen] = '\0'; 226 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 227 memcpy(buf, r_addr, rlen);
224 228
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1ff76acc7e98..0a07e353a961 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,7 +51,6 @@
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h> 52#include <linux/xattr.h>
53#include <linux/utsname.h> 53#include <linux/utsname.h>
54#include <linux/mm.h>
55 54
56#include "nfs4_fs.h" 55#include "nfs4_fs.h"
57#include "delegation.h" 56#include "delegation.h"
@@ -257,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
257 case -NFS4ERR_OPENMODE: 256 case -NFS4ERR_OPENMODE:
258 if (state == NULL) 257 if (state == NULL)
259 break; 258 break;
260 nfs4_state_mark_reclaim_nograce(clp, state); 259 nfs4_schedule_stateid_recovery(server, state);
261 goto do_state_recovery; 260 goto wait_on_recovery;
262 case -NFS4ERR_STALE_STATEID: 261 case -NFS4ERR_STALE_STATEID:
263 case -NFS4ERR_STALE_CLIENTID: 262 case -NFS4ERR_STALE_CLIENTID:
264 case -NFS4ERR_EXPIRED: 263 case -NFS4ERR_EXPIRED:
265 goto do_state_recovery; 264 nfs4_schedule_lease_recovery(clp);
265 goto wait_on_recovery;
266#if defined(CONFIG_NFS_V4_1) 266#if defined(CONFIG_NFS_V4_1)
267 case -NFS4ERR_BADSESSION: 267 case -NFS4ERR_BADSESSION:
268 case -NFS4ERR_BADSLOT: 268 case -NFS4ERR_BADSLOT:
@@ -273,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
273 case -NFS4ERR_SEQ_MISORDERED: 273 case -NFS4ERR_SEQ_MISORDERED:
274 dprintk("%s ERROR: %d Reset session\n", __func__, 274 dprintk("%s ERROR: %d Reset session\n", __func__,
275 errorcode); 275 errorcode);
276 nfs4_schedule_state_recovery(clp); 276 nfs4_schedule_session_recovery(clp->cl_session);
277 exception->retry = 1; 277 exception->retry = 1;
278 break; 278 break;
279#endif /* defined(CONFIG_NFS_V4_1) */ 279#endif /* defined(CONFIG_NFS_V4_1) */
@@ -296,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
296 } 296 }
297 /* We failed to handle the error */ 297 /* We failed to handle the error */
298 return nfs4_map_errors(ret); 298 return nfs4_map_errors(ret);
299do_state_recovery: 299wait_on_recovery:
300 nfs4_schedule_state_recovery(clp);
301 ret = nfs4_wait_clnt_recover(clp); 300 ret = nfs4_wait_clnt_recover(clp);
302 if (ret == 0) 301 if (ret == 0)
303 exception->retry = 1; 302 exception->retry = 1;
@@ -436,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
436 clp = res->sr_session->clp; 435 clp = res->sr_session->clp;
437 do_renew_lease(clp, timestamp); 436 do_renew_lease(clp, timestamp);
438 /* Check sequence flags */ 437 /* Check sequence flags */
439 if (atomic_read(&clp->cl_count) > 1) 438 if (res->sr_status_flags != 0)
440 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 439 nfs4_schedule_lease_recovery(clp);
441 break; 440 break;
442 case -NFS4ERR_DELAY: 441 case -NFS4ERR_DELAY:
443 /* The server detected a resend of the RPC call and 442 /* The server detected a resend of the RPC call and
@@ -1256,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1256 case -NFS4ERR_BAD_HIGH_SLOT: 1255 case -NFS4ERR_BAD_HIGH_SLOT:
1257 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1258 case -NFS4ERR_DEADSESSION: 1257 case -NFS4ERR_DEADSESSION:
1259 nfs4_schedule_state_recovery( 1258 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1260 server->nfs_client);
1261 goto out; 1259 goto out;
1262 case -NFS4ERR_STALE_CLIENTID: 1260 case -NFS4ERR_STALE_CLIENTID:
1263 case -NFS4ERR_STALE_STATEID: 1261 case -NFS4ERR_STALE_STATEID:
1264 case -NFS4ERR_EXPIRED: 1262 case -NFS4ERR_EXPIRED:
1265 /* Don't recall a delegation if it was lost */ 1263 /* Don't recall a delegation if it was lost */
1266 nfs4_schedule_state_recovery(server->nfs_client); 1264 nfs4_schedule_lease_recovery(server->nfs_client);
1267 goto out; 1265 goto out;
1268 case -ERESTARTSYS: 1266 case -ERESTARTSYS:
1269 /* 1267 /*
@@ -1272,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1272 */ 1270 */
1273 case -NFS4ERR_ADMIN_REVOKED: 1271 case -NFS4ERR_ADMIN_REVOKED:
1274 case -NFS4ERR_BAD_STATEID: 1272 case -NFS4ERR_BAD_STATEID:
1275 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1273 nfs4_schedule_stateid_recovery(server, state);
1276 case -EKEYEXPIRED: 1274 case -EKEYEXPIRED:
1277 /* 1275 /*
1278 * User RPCSEC_GSS context has expired. 1276 * User RPCSEC_GSS context has expired.
@@ -1588,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1588 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1586 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1589 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1587 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1590 break; 1588 break;
1591 nfs4_schedule_state_recovery(clp); 1589 nfs4_schedule_state_manager(clp);
1592 ret = -EIO; 1590 ret = -EIO;
1593 } 1591 }
1594 return ret; 1592 return ret;
@@ -3179,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3179 if (task->tk_status < 0) { 3177 if (task->tk_status < 0) {
3180 /* Unless we're shutting down, schedule state recovery! */ 3178 /* Unless we're shutting down, schedule state recovery! */
3181 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3179 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3182 nfs4_schedule_state_recovery(clp); 3180 nfs4_schedule_lease_recovery(clp);
3183 return; 3181 return;
3184 } 3182 }
3185 do_renew_lease(clp, timestamp); 3183 do_renew_lease(clp, timestamp);
@@ -3262,7 +3260,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
3262 spages = pages; 3260 spages = pages;
3263 3261
3264 do { 3262 do {
3265 len = min(PAGE_CACHE_SIZE, buflen); 3263 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3266 newpage = alloc_page(GFP_KERNEL); 3264 newpage = alloc_page(GFP_KERNEL);
3267 3265
3268 if (newpage == NULL) 3266 if (newpage == NULL)
@@ -3504,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3504 case -NFS4ERR_OPENMODE: 3502 case -NFS4ERR_OPENMODE:
3505 if (state == NULL) 3503 if (state == NULL)
3506 break; 3504 break;
3507 nfs4_state_mark_reclaim_nograce(clp, state); 3505 nfs4_schedule_stateid_recovery(server, state);
3508 goto do_state_recovery; 3506 goto wait_on_recovery;
3509 case -NFS4ERR_STALE_STATEID: 3507 case -NFS4ERR_STALE_STATEID:
3510 case -NFS4ERR_STALE_CLIENTID: 3508 case -NFS4ERR_STALE_CLIENTID:
3511 case -NFS4ERR_EXPIRED: 3509 case -NFS4ERR_EXPIRED:
3512 goto do_state_recovery; 3510 nfs4_schedule_lease_recovery(clp);
3511 goto wait_on_recovery;
3513#if defined(CONFIG_NFS_V4_1) 3512#if defined(CONFIG_NFS_V4_1)
3514 case -NFS4ERR_BADSESSION: 3513 case -NFS4ERR_BADSESSION:
3515 case -NFS4ERR_BADSLOT: 3514 case -NFS4ERR_BADSLOT:
@@ -3520,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3520 case -NFS4ERR_SEQ_MISORDERED: 3519 case -NFS4ERR_SEQ_MISORDERED:
3521 dprintk("%s ERROR %d, Reset session\n", __func__, 3520 dprintk("%s ERROR %d, Reset session\n", __func__,
3522 task->tk_status); 3521 task->tk_status);
3523 nfs4_schedule_state_recovery(clp); 3522 nfs4_schedule_session_recovery(clp->cl_session);
3524 task->tk_status = 0; 3523 task->tk_status = 0;
3525 return -EAGAIN; 3524 return -EAGAIN;
3526#endif /* CONFIG_NFS_V4_1 */ 3525#endif /* CONFIG_NFS_V4_1 */
@@ -3537,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3537 } 3536 }
3538 task->tk_status = nfs4_map_errors(task->tk_status); 3537 task->tk_status = nfs4_map_errors(task->tk_status);
3539 return 0; 3538 return 0;
3540do_state_recovery: 3539wait_on_recovery:
3541 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3540 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3542 nfs4_schedule_state_recovery(clp);
3543 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3541 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3544 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3542 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3545 task->tk_status = 0; 3543 task->tk_status = 0;
@@ -4150,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
4150 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4148 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4151 data->arg.lock_seqid); 4149 data->arg.lock_seqid);
4152 if (!IS_ERR(task)) 4150 if (!IS_ERR(task))
4153 rpc_put_task(task); 4151 rpc_put_task_async(task);
4154 dprintk("%s: cancelling lock!\n", __func__); 4152 dprintk("%s: cancelling lock!\n", __func__);
4155 } else 4153 } else
4156 nfs_free_seqid(data->arg.lock_seqid); 4154 nfs_free_seqid(data->arg.lock_seqid);
@@ -4174,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4174 4172
4175static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4173static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4176{ 4174{
4177 struct nfs_client *clp = server->nfs_client;
4178 struct nfs4_state *state = lsp->ls_state;
4179
4180 switch (error) { 4175 switch (error) {
4181 case -NFS4ERR_ADMIN_REVOKED: 4176 case -NFS4ERR_ADMIN_REVOKED:
4182 case -NFS4ERR_BAD_STATEID: 4177 case -NFS4ERR_BAD_STATEID:
4183 case -NFS4ERR_EXPIRED: 4178 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4184 if (new_lock_owner != 0 || 4179 if (new_lock_owner != 0 ||
4185 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4180 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4186 nfs4_state_mark_reclaim_nograce(clp, state); 4181 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4187 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4188 break; 4182 break;
4189 case -NFS4ERR_STALE_STATEID: 4183 case -NFS4ERR_STALE_STATEID:
4190 if (new_lock_owner != 0 ||
4191 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4192 nfs4_state_mark_reclaim_reboot(clp, state);
4193 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4184 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4185 case -NFS4ERR_EXPIRED:
4186 nfs4_schedule_lease_recovery(server->nfs_client);
4194 }; 4187 };
4195} 4188}
4196 4189
@@ -4406,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4406 case -NFS4ERR_EXPIRED: 4399 case -NFS4ERR_EXPIRED:
4407 case -NFS4ERR_STALE_CLIENTID: 4400 case -NFS4ERR_STALE_CLIENTID:
4408 case -NFS4ERR_STALE_STATEID: 4401 case -NFS4ERR_STALE_STATEID:
4402 nfs4_schedule_lease_recovery(server->nfs_client);
4403 goto out;
4409 case -NFS4ERR_BADSESSION: 4404 case -NFS4ERR_BADSESSION:
4410 case -NFS4ERR_BADSLOT: 4405 case -NFS4ERR_BADSLOT:
4411 case -NFS4ERR_BAD_HIGH_SLOT: 4406 case -NFS4ERR_BAD_HIGH_SLOT:
4412 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4407 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4413 case -NFS4ERR_DEADSESSION: 4408 case -NFS4ERR_DEADSESSION:
4414 nfs4_schedule_state_recovery(server->nfs_client); 4409 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4415 goto out; 4410 goto out;
4416 case -ERESTARTSYS: 4411 case -ERESTARTSYS:
4417 /* 4412 /*
@@ -4421,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4421 case -NFS4ERR_ADMIN_REVOKED: 4416 case -NFS4ERR_ADMIN_REVOKED:
4422 case -NFS4ERR_BAD_STATEID: 4417 case -NFS4ERR_BAD_STATEID:
4423 case -NFS4ERR_OPENMODE: 4418 case -NFS4ERR_OPENMODE:
4424 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4419 nfs4_schedule_stateid_recovery(server, state);
4425 err = 0; 4420 err = 0;
4426 goto out; 4421 goto out;
4427 case -EKEYEXPIRED: 4422 case -EKEYEXPIRED:
@@ -5028,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5028 int status; 5023 int status;
5029 unsigned *ptr; 5024 unsigned *ptr;
5030 struct nfs4_session *session = clp->cl_session; 5025 struct nfs4_session *session = clp->cl_session;
5026 long timeout = 0;
5027 int err;
5031 5028
5032 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5029 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5033 5030
5034 status = _nfs4_proc_create_session(clp); 5031 do {
5032 status = _nfs4_proc_create_session(clp);
5033 if (status == -NFS4ERR_DELAY) {
5034 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5035 if (err)
5036 status = err;
5037 }
5038 } while (status == -NFS4ERR_DELAY);
5039
5035 if (status) 5040 if (status)
5036 goto out; 5041 goto out;
5037 5042
@@ -5140,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5140 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5145 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5141 return -EAGAIN; 5146 return -EAGAIN;
5142 default: 5147 default:
5143 nfs4_schedule_state_recovery(clp); 5148 nfs4_schedule_lease_recovery(clp);
5144 } 5149 }
5145 return 0; 5150 return 0;
5146} 5151}
@@ -5227,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5227 if (IS_ERR(task)) 5232 if (IS_ERR(task))
5228 ret = PTR_ERR(task); 5233 ret = PTR_ERR(task);
5229 else 5234 else
5230 rpc_put_task(task); 5235 rpc_put_task_async(task);
5231 dprintk("<-- %s status=%d\n", __func__, ret); 5236 dprintk("<-- %s status=%d\n", __func__, ret);
5232 return ret; 5237 return ret;
5233} 5238}
@@ -5243,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5243 goto out; 5248 goto out;
5244 } 5249 }
5245 ret = rpc_wait_for_completion_task(task); 5250 ret = rpc_wait_for_completion_task(task);
5246 if (!ret) 5251 if (!ret) {
5252 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5253
5254 if (task->tk_status == 0)
5255 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5247 ret = task->tk_status; 5256 ret = task->tk_status;
5257 }
5248 rpc_put_task(task); 5258 rpc_put_task(task);
5249out: 5259out:
5250 dprintk("<-- %s status=%d\n", __func__, ret); 5260 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5281,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5281 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5291 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5282 return -EAGAIN; 5292 return -EAGAIN;
5283 default: 5293 default:
5284 nfs4_schedule_state_recovery(clp); 5294 nfs4_schedule_lease_recovery(clp);
5285 } 5295 }
5286 return 0; 5296 return 0;
5287} 5297}
@@ -5349,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5349 status = PTR_ERR(task); 5359 status = PTR_ERR(task);
5350 goto out; 5360 goto out;
5351 } 5361 }
5362 status = nfs4_wait_for_completion_rpc_task(task);
5363 if (status == 0)
5364 status = task->tk_status;
5352 rpc_put_task(task); 5365 rpc_put_task(task);
5353 return 0; 5366 return 0;
5354out: 5367out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..0592288f9f06 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1007}
1008 1008
1009/* 1009/*
1010 * Schedule a state recovery attempt 1010 * Schedule a lease recovery attempt
1011 */ 1011 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1012void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1013{
1014 if (!clp) 1014 if (!clp)
1015 return; 1015 return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1018 nfs4_schedule_state_manager(clp);
1019} 1019}
1020 1020
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1021static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1022{
1023 1023
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1032 return 1;
1033} 1033}
1034 1034
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1035static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1036{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1041 return 1;
1042} 1042}
1043 1043
1044void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1045{
1046 struct nfs_client *clp = server->nfs_client;
1047
1048 nfs4_state_mark_reclaim_nograce(clp, state);
1049 nfs4_schedule_state_manager(clp);
1050}
1051
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1052static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1053{
1046 struct inode *inode = state->inode; 1054 struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1444}
1437 1445
1438#ifdef CONFIG_NFS_V4_1 1446#ifdef CONFIG_NFS_V4_1
1447void nfs4_schedule_session_recovery(struct nfs4_session *session)
1448{
1449 nfs4_schedule_lease_recovery(session->clp);
1450}
1451
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1452void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1453{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1454 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1455 nfs4_schedule_state_manager(clp);
1443} 1456}
1444 1457
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1458static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1460 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1461 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1462 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1463 nfs4_schedule_state_manager(clp);
1451 } 1464 }
1452} 1465}
1453 1466
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1468{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1469 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1470 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1471 nfs4_schedule_state_manager(clp);
1459 } 1472 }
1460} 1473}
1461 1474
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1488{
1476 nfs_expire_all_delegations(clp); 1489 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1490 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1491 nfs4_schedule_state_manager(clp);
1479} 1492}
1480 1493
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1494void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..94d50e86a124 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1660
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1662 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1663 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1665 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1666
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4694 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4695 if (unlikely(!p))
4696 goto out_overflow; 4696 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4697 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4698 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4699 if (unlikely(!p))
4700 goto out_overflow; 4700 goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..6481d537d69d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 180 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 181 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 182 if (!IS_ERR(task))
183 rpc_put_task(task); 183 rpc_put_task_async(task);
184 return 1; 184 return 1;
185} 185}
186 186
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..42b92d7a9cc4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1292 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1293 if (IS_ERR(task))
1294 return PTR_ERR(task); 1294 return PTR_ERR(task);
1295 if (how & FLUSH_SYNC)
1296 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1297 rpc_put_task(task);
1296 return 0; 1298 return 0;
1297} 1299}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f348..02eb4edf0ece 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
432 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
433 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
434 */ 434 */
435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4); 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
436 if (unlikely(p == NULL)) 436 if (unlikely(p == NULL))
437 goto out_overflow; 437 goto out_overflow;
438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8d..7b566ec14e18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2445static struct nfs4_delegation * 2445static struct nfs4_delegation *
2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2447{ 2447{
2448 struct nfs4_delegation *dp = NULL; 2448 struct nfs4_delegation *dp;
2449 2449
2450 spin_lock(&recall_lock); 2450 spin_lock(&recall_lock);
2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
2453 break; 2453 spin_unlock(&recall_lock);
2454 } 2454 return dp;
2455 }
2455 spin_unlock(&recall_lock); 2456 spin_unlock(&recall_lock);
2456 return dp; 2457 return NULL;
2457} 2458}
2458 2459
2459int share_access_to_flags(u32 share_access) 2460int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1275b8655070..615f0a9f0600 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i; 1145 int i, j;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (i = 0; i < dummy; ++i) 1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy); 1219 READ32(dummy);
1220 break; 1220 break;
1221 case RPC_AUTH_GSS: 1221 case RPC_AUTH_GSS:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 57 struct ocfs2_super *osb;
58 58
59 if (nd->flags & LOOKUP_RCU) 59 if (nd && nd->flags & LOOKUP_RCU)
60 return -ECHILD; 60 return -ECHILD;
61 61
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19ebc5aad391..29623da133cc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4379,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4379 if (IS_ERR(s)) 4379 if (IS_ERR(s))
4380 return PTR_ERR(s); 4380 return PTR_ERR(s);
4381 4381
4382 error = path_lookup(s, LOOKUP_PARENT, nd); 4382 error = kern_path_parent(s, nd);
4383 if (error) 4383 if (error)
4384 putname(s); 4384 putname(s);
4385 else 4385 else
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b5..3cac0bda46df 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
233 233
234 if (!(file->f_mode & FMODE_WRITE)) 234 if (!(file->f_mode & FMODE_WRITE))
235 return -EBADF; 235 return -EBADF;
236
237 /* It's not possible punch hole on append only file */
238 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
239 return -EPERM;
240
241 if (IS_IMMUTABLE(inode))
242 return -EPERM;
243
236 /* 244 /*
237 * Revalidate the write permissions, in case security policy has 245 * Revalidate the write permissions, in case security policy has
238 * changed since the files were opened. 246 * changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
565{ 573{
566 struct path path; 574 struct path path;
567 int error = -EINVAL; 575 int error = -EINVAL;
568 int follow; 576 int lookup_flags;
569 577
570 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
571 goto out; 579 goto out;
572 580
573 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
574 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
575 if (error) 585 if (error)
576 goto out; 586 goto out;
577 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
661 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
662 const struct cred *cred) 672 const struct cred *cred)
663{ 673{
674 static const struct file_operations empty_fops = {};
664 struct inode *inode; 675 struct inode *inode;
665 int error; 676 int error;
666 677
667 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
668 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
669 inode = dentry->d_inode; 684 inode = dentry->d_inode;
670 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
671 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
679 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
680 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
681 f->f_pos = 0; 696 f->f_pos = 0;
682 f->f_op = fops_get(inode->i_fop);
683 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
684 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
685 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
686 if (error) 707 if (error)
687 goto cleanup_all; 708 goto cleanup_all;
@@ -882,15 +903,110 @@ void fd_install(unsigned int fd, struct file *file)
882 903
883EXPORT_SYMBOL(fd_install); 904EXPORT_SYMBOL(fd_install);
884 905
906static inline int build_open_flags(int flags, int mode, struct open_flags *op)
907{
908 int lookup_flags = 0;
909 int acc_mode;
910
911 if (!(flags & O_CREAT))
912 mode = 0;
913 op->mode = mode;
914
915 /* Must never be set by userspace */
916 flags &= ~FMODE_NONOTIFY;
917
918 /*
919 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
920 * check for O_DSYNC if the need any syncing at all we enforce it's
921 * always set instead of having to deal with possibly weird behaviour
922 * for malicious applications setting only __O_SYNC.
923 */
924 if (flags & __O_SYNC)
925 flags |= O_DSYNC;
926
927 /*
928 * If we have O_PATH in the open flag. Then we
929 * cannot have anything other than the below set of flags
930 */
931 if (flags & O_PATH) {
932 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
933 acc_mode = 0;
934 } else {
935 acc_mode = MAY_OPEN | ACC_MODE(flags);
936 }
937
938 op->open_flag = flags;
939
940 /* O_TRUNC implies we need access checks for write permissions */
941 if (flags & O_TRUNC)
942 acc_mode |= MAY_WRITE;
943
944 /* Allow the LSM permission hook to distinguish append
945 access from general write access. */
946 if (flags & O_APPEND)
947 acc_mode |= MAY_APPEND;
948
949 op->acc_mode = acc_mode;
950
951 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
952
953 if (flags & O_CREAT) {
954 op->intent |= LOOKUP_CREATE;
955 if (flags & O_EXCL)
956 op->intent |= LOOKUP_EXCL;
957 }
958
959 if (flags & O_DIRECTORY)
960 lookup_flags |= LOOKUP_DIRECTORY;
961 if (!(flags & O_NOFOLLOW))
962 lookup_flags |= LOOKUP_FOLLOW;
963 return lookup_flags;
964}
965
966/**
967 * filp_open - open file and return file pointer
968 *
969 * @filename: path to open
970 * @flags: open flags as per the open(2) second argument
971 * @mode: mode for the new file if O_CREAT is set, else ignored
972 *
973 * This is the helper to open a file from kernelspace if you really
974 * have to. But in generally you should not do this, so please move
975 * along, nothing to see here..
976 */
977struct file *filp_open(const char *filename, int flags, int mode)
978{
979 struct open_flags op;
980 int lookup = build_open_flags(flags, mode, &op);
981 return do_filp_open(AT_FDCWD, filename, &op, lookup);
982}
983EXPORT_SYMBOL(filp_open);
984
985struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
986 const char *filename, int flags)
987{
988 struct open_flags op;
989 int lookup = build_open_flags(flags, 0, &op);
990 if (flags & O_CREAT)
991 return ERR_PTR(-EINVAL);
992 if (!filename && (flags & O_DIRECTORY))
993 if (!dentry->d_inode->i_op->lookup)
994 return ERR_PTR(-ENOTDIR);
995 return do_file_open_root(dentry, mnt, filename, &op, lookup);
996}
997EXPORT_SYMBOL(file_open_root);
998
885long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 999long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
886{ 1000{
1001 struct open_flags op;
1002 int lookup = build_open_flags(flags, mode, &op);
887 char *tmp = getname(filename); 1003 char *tmp = getname(filename);
888 int fd = PTR_ERR(tmp); 1004 int fd = PTR_ERR(tmp);
889 1005
890 if (!IS_ERR(tmp)) { 1006 if (!IS_ERR(tmp)) {
891 fd = get_unused_fd_flags(flags); 1007 fd = get_unused_fd_flags(flags);
892 if (fd >= 0) { 1008 if (fd >= 0) {
893 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1009 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
894 if (IS_ERR(f)) { 1010 if (IS_ERR(f)) {
895 put_unused_fd(fd); 1011 put_unused_fd(fd);
896 fd = PTR_ERR(f); 1012 fd = PTR_ERR(f);
@@ -960,8 +1076,10 @@ int filp_close(struct file *filp, fl_owner_t id)
960 if (filp->f_op && filp->f_op->flush) 1076 if (filp->f_op && filp->f_op->flush)
961 retval = filp->f_op->flush(filp, id); 1077 retval = filp->f_op->flush(filp, id);
962 1078
963 dnotify_flush(filp, id); 1079 if (likely(!(filp->f_mode & FMODE_PATH))) {
964 locks_remove_posix(filp, id); 1080 dnotify_flush(filp, id);
1081 locks_remove_posix(filp, id);
1082 }
965 fput(filp); 1083 fput(filp);
966 return retval; 1084 return retval;
967} 1085}
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..be03a0b08b47 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 8
14
13int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
14{ 16{
15 int i; 17 int i;
16 int slot = 1; 18 int slot = 1;
19 unsigned int npartitions;
17 Sector sect; 20 Sector sect;
18 unsigned char *data; 21 unsigned char *data;
19 struct disklabel { 22 struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
45 u8 p_fstype; 48 u8 p_fstype;
46 u8 p_frag; 49 u8 p_frag;
47 __le16 p_cpg; 50 __le16 p_cpg;
48 } d_partitions[8]; 51 } d_partitions[MAX_OSF_PARTITIONS];
49 } * label; 52 } * label;
50 struct d_partition * partition; 53 struct d_partition * partition;
51 54
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
63 put_dev_sector(sect); 66 put_dev_sector(sect);
64 return 0; 67 return 0;
65 } 68 }
66 for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { 69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
67 if (slot == state->limit) 75 if (slot == state->limit)
68 break; 76 break;
69 if (le32_to_cpu(partition->p_size)) 77 if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..d49c4b5d2c3e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
2620 &proc_self_inode_operations, NULL, {}), 2620 &proc_self_inode_operations, NULL, {}),
2621}; 2621};
2622 2622
2623/*
2624 * Exceptional case: normally we are not allowed to unhash a busy
2625 * directory. In this case, however, we can do it - no aliasing problems
2626 * due to the way we treat inodes.
2627 */
2628static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2629{
2630 struct inode *inode;
2631 struct task_struct *task;
2632
2633 if (nd->flags & LOOKUP_RCU)
2634 return -ECHILD;
2635
2636 inode = dentry->d_inode;
2637 task = get_proc_task(inode);
2638 if (task) {
2639 put_task_struct(task);
2640 return 1;
2641 }
2642 d_drop(dentry);
2643 return 0;
2644}
2645
2646static const struct dentry_operations proc_base_dentry_operations =
2647{
2648 .d_revalidate = proc_base_revalidate,
2649 .d_delete = pid_delete_dentry,
2650};
2651
2652static struct dentry *proc_base_instantiate(struct inode *dir, 2623static struct dentry *proc_base_instantiate(struct inode *dir,
2653 struct dentry *dentry, struct task_struct *task, const void *ptr) 2624 struct dentry *dentry, struct task_struct *task, const void *ptr)
2654{ 2625{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2685 if (p->fop) 2656 if (p->fop)
2686 inode->i_fop = p->fop; 2657 inode->i_fop = p->fop;
2687 ei->op = p->op; 2658 ei->op = p->op;
2688 d_set_d_op(dentry, &proc_base_dentry_operations);
2689 d_add(dentry, inode); 2659 d_add(dentry, inode);
2690 error = NULL; 2660 error = NULL;
2691out: 2661out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d6a7ca1fdac5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
27static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
30 31
31 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
32 end_writeback(inode); 33 end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
38 de = PROC_I(inode)->pde; 39 de = PROC_I(inode)->pde;
39 if (de) 40 if (de)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 head = PROC_I(inode)->sysctl;
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 if (head) {
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head);
46 }
43} 47}
44 48
45struct vfsmount *proc_mnt; 49struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..8eb2522111c5 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent,
408 const struct dentry *dentry, const struct inode *inode, 408 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name) 409 unsigned int len, const char *str, const struct qstr *name)
410{ 410{
411 struct ctl_table_header *head;
411 /* Although proc doesn't have negative dentries, rcu-walk means 412 /* Although proc doesn't have negative dentries, rcu-walk means
412 * that inode here can be NULL */ 413 * that inode here can be NULL */
414 /* AV: can it, indeed? */
413 if (!inode) 415 if (!inode)
414 return 0; 416 return 1;
415 if (name->len != len) 417 if (name->len != len)
416 return 1; 418 return 1;
417 if (memcmp(name->name, str, len)) 419 if (memcmp(name->name, str, len))
418 return 1; 420 return 1;
419 return !sysctl_is_seen(PROC_I(inode)->sysctl); 421 head = rcu_dereference(PROC_I(inode)->sysctl);
422 return !head || !sysctl_is_seen(head);
420} 423}
421 424
422static const struct dentry_operations proc_sys_dentry_operations = { 425static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 68fdf45cc6c9..4b2eb564fdad 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1122,10 +1122,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1122 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1123 return -EMLINK;
1124 } 1124 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1125
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1126 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1127 inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
978 978
979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
983 return -EPERM; 981 return -EPERM;
984} 982}
985 983
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index b7c338d5e9df..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1286,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1286 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1287 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1288 1288
1289 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1290 return 255; 1294 return 255;
1295 }
1291 1296
1292 *lenp = 3; 1297 *lenp = 3;
1293 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 0fc16e3f0bfc..84793c7025e2 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -80,6 +80,10 @@
80#define O_SYNC (__O_SYNC|O_DSYNC) 80#define O_SYNC (__O_SYNC|O_DSYNC)
81#endif 81#endif
82 82
83#ifndef O_PATH
84#define O_PATH 010000000
85#endif
86
83#ifndef O_NDELAY 87#ifndef O_NDELAY
84#define O_NDELAY O_NONBLOCK 88#define O_NDELAY O_NONBLOCK
85#endif 89#endif
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 3c2344f48136..01f227e14254 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -6,7 +6,7 @@
6#include <asm/errno.h> 6#include <asm/errno.h>
7 7
8static inline int 8static inline int
9futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 9futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
10{ 10{
11 int op = (encoded_op >> 28) & 7; 11 int op = (encoded_op >> 28) & 7;
12 int cmp = (encoded_op >> 24) & 15; 12 int cmp = (encoded_op >> 24) & 15;
@@ -16,7 +16,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
16 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 16 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
17 oparg = 1 << oparg; 17 oparg = 1 << oparg;
18 18
19 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 19 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
20 return -EFAULT; 20 return -EFAULT;
21 21
22 pagefault_disable(); 22 pagefault_disable();
@@ -48,7 +48,8 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
48} 48}
49 49
50static inline int 50static inline int
51futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 51futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
52 u32 oldval, u32 newval)
52{ 53{
53 return -ENOSYS; 54 return -ENOSYS;
54} 55}
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index b969770196c2..57af0338d270 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -646,9 +646,13 @@ __SYSCALL(__NR_prlimit64, sys_prlimit64)
646__SYSCALL(__NR_fanotify_init, sys_fanotify_init) 646__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
647#define __NR_fanotify_mark 263 647#define __NR_fanotify_mark 263
648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
649#define __NR_name_to_handle_at 264
650__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
651#define __NR_open_by_handle_at 265
652__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
649 653
650#undef __NR_syscalls 654#undef __NR_syscalls
651#define __NR_syscalls 264 655#define __NR_syscalls 266
652 656
653/* 657/*
654 * All syscalls below here should go away really, 658 * All syscalls below here should go away really,
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 597692f1fc8d..65970b811e22 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -34,7 +34,10 @@ struct debug_obj {
34 34
35/** 35/**
36 * struct debug_obj_descr - object type specific debug description structure 36 * struct debug_obj_descr - object type specific debug description structure
37 *
37 * @name: name of the object typee 38 * @name: name of the object typee
39 * @debug_hint: function returning address, which have associated
40 * kernel symbol, to allow identify the object
38 * @fixup_init: fixup function, which is called when the init check 41 * @fixup_init: fixup function, which is called when the init check
39 * fails 42 * fails
40 * @fixup_activate: fixup function, which is called when the activate check 43 * @fixup_activate: fixup function, which is called when the activate check
@@ -46,7 +49,7 @@ struct debug_obj {
46 */ 49 */
47struct debug_obj_descr { 50struct debug_obj_descr {
48 const char *name; 51 const char *name;
49 52 void *(*debug_hint) (void *addr);
50 int (*fixup_init) (void *addr, enum debug_obj_state state); 53 int (*fixup_init) (void *addr, enum debug_obj_state state);
51 int (*fixup_activate) (void *addr, enum debug_obj_state state); 54 int (*fixup_activate) (void *addr, enum debug_obj_state state);
52 int (*fixup_destroy) (void *addr, enum debug_obj_state state); 55 int (*fixup_destroy) (void *addr, enum debug_obj_state state);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 28028988c862..33a42f24b275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -8,6 +8,9 @@ struct inode;
8struct super_block; 8struct super_block;
9struct vfsmount; 9struct vfsmount;
10 10
11/* limit the handle size to NFSv4 handle size now */
12#define MAX_HANDLE_SZ 128
13
11/* 14/*
12 * The fileid_type identifies how the file within the filesystem is encoded. 15 * The fileid_type identifies how the file within the filesystem is encoded.
13 * In theory this is freely set and parsed by the filesystem, but we try to 16 * In theory this is freely set and parsed by the filesystem, but we try to
@@ -121,8 +124,10 @@ struct fid {
121 * set, the encode_fh() should store sufficient information so that a good 124 * set, the encode_fh() should store sufficient information so that a good
122 * attempt can be made to find not only the file but also it's place in the 125 * attempt can be made to find not only the file but also it's place in the
123 * filesystem. This typically means storing a reference to de->d_parent in 126 * filesystem. This typically means storing a reference to de->d_parent in
124 * the filehandle fragment. encode_fh() should return the number of bytes 127 * the filehandle fragment. encode_fh() should return the fileid_type on
125 * stored or a negative error code such as %-ENOSPC 128 * success and on error returns 255 (if the space needed to encode fh is
129 * greater than @max_len*4 bytes). On error @max_len contains the minimum
130 * size(in 4 byte unit) needed to encode the file handle.
126 * 131 *
127 * fh_to_dentry: 132 * fh_to_dentry:
128 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle 133 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a562fa5fb4e3..f550f894ba15 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -46,6 +46,7 @@
46 unlinking file. */ 46 unlinking file. */
47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ 47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ 48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
49#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
49 50
50#ifdef __KERNEL__ 51#ifdef __KERNEL__
51 52
diff --git a/include/linux/file.h b/include/linux/file.h
index e85baebf6279..21a79958541c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
29 29
30extern struct file *fget(unsigned int fd); 30extern struct file *fget(unsigned int fd);
31extern struct file *fget_light(unsigned int fd, int *fput_needed); 31extern struct file *fget_light(unsigned int fd, int *fput_needed);
32extern struct file *fget_raw(unsigned int fd);
33extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
32extern void set_close_on_exec(unsigned int fd, int flag); 34extern void set_close_on_exec(unsigned int fd, int flag);
33extern void put_filp(struct file *); 35extern void put_filp(struct file *);
34extern int alloc_fd(unsigned start, unsigned flags); 36extern int alloc_fd(unsigned start, unsigned flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e38b50a4b9d2..13df14e2c42e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -102,6 +102,9 @@ struct inodes_stat_t {
102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
104 104
105/* File is opened with O_PATH; almost nothing can be done with it */
106#define FMODE_PATH ((__force fmode_t)0x4000)
107
105/* File was opened by fanotify and shouldn't generate fanotify events */ 108/* File was opened by fanotify and shouldn't generate fanotify events */
106#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 109#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
107 110
@@ -978,6 +981,13 @@ struct file {
978#endif 981#endif
979}; 982};
980 983
984struct file_handle {
985 __u32 handle_bytes;
986 int handle_type;
987 /* file identifier */
988 unsigned char f_handle[0];
989};
990
981#define get_file(x) atomic_long_inc(&(x)->f_count) 991#define get_file(x) atomic_long_inc(&(x)->f_count)
982#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 992#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
983#define file_count(x) atomic_long_read(&(x)->f_count) 993#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1401,6 +1411,7 @@ struct super_block {
1401 wait_queue_head_t s_wait_unfrozen; 1411 wait_queue_head_t s_wait_unfrozen;
1402 1412
1403 char s_id[32]; /* Informational name */ 1413 char s_id[32]; /* Informational name */
1414 u8 s_uuid[16]; /* UUID */
1404 1415
1405 void *s_fs_info; /* Filesystem private info */ 1416 void *s_fs_info; /* Filesystem private info */
1406 fmode_t s_mode; 1417 fmode_t s_mode;
@@ -1874,6 +1885,8 @@ extern void drop_collected_mounts(struct vfsmount *);
1874extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1885extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1875 struct vfsmount *); 1886 struct vfsmount *);
1876extern int vfs_statfs(struct path *, struct kstatfs *); 1887extern int vfs_statfs(struct path *, struct kstatfs *);
1888extern int user_statfs(const char __user *, struct kstatfs *);
1889extern int fd_statfs(int, struct kstatfs *);
1877extern int statfs_by_dentry(struct dentry *, struct kstatfs *); 1890extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
1878extern int freeze_super(struct super_block *super); 1891extern int freeze_super(struct super_block *super);
1879extern int thaw_super(struct super_block *super); 1892extern int thaw_super(struct super_block *super);
@@ -1990,6 +2003,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
1990extern long do_sys_open(int dfd, const char __user *filename, int flags, 2003extern long do_sys_open(int dfd, const char __user *filename, int flags,
1991 int mode); 2004 int mode);
1992extern struct file *filp_open(const char *, int, int); 2005extern struct file *filp_open(const char *, int, int);
2006extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2007 const char *, int);
1993extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 2008extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1994 const struct cred *); 2009 const struct cred *);
1995extern int filp_close(struct file *, fl_owner_t id); 2010extern int filp_close(struct file *, fl_owner_t id);
@@ -2205,10 +2220,6 @@ extern struct file *create_read_pipe(struct file *f, int flags);
2205extern struct file *create_write_pipe(int flags); 2220extern struct file *create_write_pipe(int flags);
2206extern void free_write_pipe(struct file *); 2221extern void free_write_pipe(struct file *);
2207 2222
2208extern struct file *do_filp_open(int dfd, const char *pathname,
2209 int open_flag, int mode, int acc_mode);
2210extern int may_open(struct path *, int, int);
2211
2212extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2223extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2213extern struct file * open_exec(const char *); 2224extern struct file * open_exec(const char *);
2214 2225
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d4253e49..d746da19c6a2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -55,7 +55,7 @@
55 * Used by threaded interrupts which need to keep the 55 * Used by threaded interrupts which need to keep the
56 * irq line disabled until the threaded handler has been run. 56 * irq line disabled until the threaded handler has been run.
57 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend 57 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
58 * 58 * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
59 */ 59 */
60#define IRQF_DISABLED 0x00000020 60#define IRQF_DISABLED 0x00000020
61#define IRQF_SAMPLE_RANDOM 0x00000040 61#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -67,6 +67,7 @@
67#define IRQF_IRQPOLL 0x00001000 67#define IRQF_IRQPOLL 0x00001000
68#define IRQF_ONESHOT 0x00002000 68#define IRQF_ONESHOT 0x00002000
69#define IRQF_NO_SUSPEND 0x00004000 69#define IRQF_NO_SUSPEND 0x00004000
70#define IRQF_FORCE_RESUME 0x00008000
70 71
71#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND) 72#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND)
72 73
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c3..7ff16f7d3ed4 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
64}; 64};
65 65
66#define KTHREAD_WORKER_INIT(worker) { \ 66#define KTHREAD_WORKER_INIT(worker) { \
67 .lock = SPIN_LOCK_UNLOCKED, \ 67 .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
68 .work_list = LIST_HEAD_INIT((worker).work_list), \ 68 .work_list = LIST_HEAD_INIT((worker).work_list), \
69 } 69 }
70 70
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f276d4fa01fc..9c8603872c36 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,6 @@ struct nameidata {
19 struct path path; 19 struct path path;
20 struct qstr last; 20 struct qstr last;
21 struct path root; 21 struct path root;
22 struct file *file;
23 struct inode *inode; /* path.dentry.d_inode */ 22 struct inode *inode; /* path.dentry.d_inode */
24 unsigned int flags; 23 unsigned int flags;
25 unsigned seq; 24 unsigned seq;
@@ -63,6 +62,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
63#define LOOKUP_EXCL 0x0400 62#define LOOKUP_EXCL 0x0400
64#define LOOKUP_RENAME_TARGET 0x0800 63#define LOOKUP_RENAME_TARGET 0x0800
65 64
65#define LOOKUP_JUMPED 0x1000
66#define LOOKUP_ROOT 0x2000
67#define LOOKUP_EMPTY 0x4000
68
66extern int user_path_at(int, const char __user *, unsigned, struct path *); 69extern int user_path_at(int, const char __user *, unsigned, struct path *);
67 70
68#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) 71#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
@@ -72,7 +75,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
72 75
73extern int kern_path(const char *, unsigned, struct path *); 76extern int kern_path(const char *, unsigned, struct path *);
74 77
75extern int path_lookup(const char *, unsigned, struct nameidata *); 78extern int kern_path_parent(const char *, struct nameidata *);
76extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 79extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
77 const char *, unsigned int, struct nameidata *); 80 const char *, unsigned int, struct nameidata *);
78 81
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346b0340..71caf7a5e6c6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2392,6 +2392,9 @@ extern int netdev_notice(const struct net_device *dev, const char *format, ...)
2392extern int netdev_info(const struct net_device *dev, const char *format, ...) 2392extern int netdev_info(const struct net_device *dev, const char *format, ...)
2393 __attribute__ ((format (printf, 2, 3))); 2393 __attribute__ ((format (printf, 2, 3)));
2394 2394
2395#define MODULE_ALIAS_NETDEV(device) \
2396 MODULE_ALIAS("netdev-" device)
2397
2395#if defined(DEBUG) 2398#if defined(DEBUG)
2396#define netdev_dbg(__dev, format, args...) \ 2399#define netdev_dbg(__dev, format, args...) \
2397 netdev_printk(KERN_DEBUG, __dev, format, ##args) 2400 netdev_printk(KERN_DEBUG, __dev, format, ##args)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b197563913bf..3e112de12d8d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -68,11 +68,7 @@ struct nfs_client {
68 unsigned char cl_id_uniquifier; 68 unsigned char cl_id_uniquifier;
69 u32 cl_cb_ident; /* v4.0 callback identifier */ 69 u32 cl_cb_ident; /* v4.0 callback identifier */
70 const struct nfs4_minor_version_ops *cl_mvops; 70 const struct nfs4_minor_version_ops *cl_mvops;
71#endif /* CONFIG_NFS_V4 */
72 71
73#ifdef CONFIG_NFS_V4_1
74 /* clientid returned from EXCHANGE_ID, used by session operations */
75 u64 cl_ex_clid;
76 /* The sequence id to use for the next CREATE_SESSION */ 72 /* The sequence id to use for the next CREATE_SESSION */
77 u32 cl_seqid; 73 u32 cl_seqid;
78 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 74 /* The flags used for obtaining the clientid during EXCHANGE_ID */
@@ -80,7 +76,7 @@ struct nfs_client {
80 struct nfs4_session *cl_session; /* sharred session */ 76 struct nfs4_session *cl_session; /* sharred session */
81 struct list_head cl_layouts; 77 struct list_head cl_layouts;
82 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ 78 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
83#endif /* CONFIG_NFS_V4_1 */ 79#endif /* CONFIG_NFS_V4 */
84 80
85#ifdef CONFIG_NFS_FSCACHE 81#ifdef CONFIG_NFS_FSCACHE
86 struct fscache_cookie *fscache; /* client index cache cookie */ 82 struct fscache_cookie *fscache; /* client index cache cookie */
@@ -185,7 +181,7 @@ struct nfs_server {
185/* maximum number of slots to use */ 181/* maximum number of slots to use */
186#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE 182#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
187 183
188#if defined(CONFIG_NFS_V4_1) 184#if defined(CONFIG_NFS_V4)
189 185
190/* Sessions */ 186/* Sessions */
191#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long))) 187#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
@@ -225,5 +221,5 @@ struct nfs4_session {
225 struct nfs_client *clp; 221 struct nfs_client *clp;
226}; 222};
227 223
228#endif /* CONFIG_NFS_V4_1 */ 224#endif /* CONFIG_NFS_V4 */
229#endif 225#endif
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 7254eda078e5..c9b9f322c8d8 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -31,15 +31,17 @@
31 * 31 *
32 * Simple ASCII art explanation: 32 * Simple ASCII art explanation:
33 * 33 *
34 * |HEAD | 34 * pl:prio_list (only for plist_node)
35 * | | 35 * nl:node_list
36 * |prio_list.prev|<------------------------------------| 36 * HEAD| NODE(S)
37 * |prio_list.next|<->|pl|<->|pl|<--------------->|pl|<-| 37 * |
38 * |10 | |10| |21| |21| |21| |40| (prio) 38 * ||------------------------------------|
39 * | | | | | | | | | | | | 39 * ||->|pl|<->|pl|<--------------->|pl|<-|
40 * | | | | | | | | | | | | 40 * | |10| |21| |21| |21| |40| (prio)
41 * |node_list.next|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-| 41 * | | | | | | | | | | |
42 * |node_list.prev|<------------------------------------| 42 * | | | | | | | | | | |
43 * |->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
44 * |-------------------------------------------|
43 * 45 *
44 * The nodes on the prio_list list are sorted by priority to simplify 46 * The nodes on the prio_list list are sorted by priority to simplify
45 * the insertion of new nodes. There are no nodes with duplicate 47 * the insertion of new nodes. There are no nodes with duplicate
@@ -78,7 +80,6 @@
78#include <linux/spinlock_types.h> 80#include <linux/spinlock_types.h>
79 81
80struct plist_head { 82struct plist_head {
81 struct list_head prio_list;
82 struct list_head node_list; 83 struct list_head node_list;
83#ifdef CONFIG_DEBUG_PI_LIST 84#ifdef CONFIG_DEBUG_PI_LIST
84 raw_spinlock_t *rawlock; 85 raw_spinlock_t *rawlock;
@@ -88,7 +89,8 @@ struct plist_head {
88 89
89struct plist_node { 90struct plist_node {
90 int prio; 91 int prio;
91 struct plist_head plist; 92 struct list_head prio_list;
93 struct list_head node_list;
92}; 94};
93 95
94#ifdef CONFIG_DEBUG_PI_LIST 96#ifdef CONFIG_DEBUG_PI_LIST
@@ -100,7 +102,6 @@ struct plist_node {
100#endif 102#endif
101 103
102#define _PLIST_HEAD_INIT(head) \ 104#define _PLIST_HEAD_INIT(head) \
103 .prio_list = LIST_HEAD_INIT((head).prio_list), \
104 .node_list = LIST_HEAD_INIT((head).node_list) 105 .node_list = LIST_HEAD_INIT((head).node_list)
105 106
106/** 107/**
@@ -133,7 +134,8 @@ struct plist_node {
133#define PLIST_NODE_INIT(node, __prio) \ 134#define PLIST_NODE_INIT(node, __prio) \
134{ \ 135{ \
135 .prio = (__prio), \ 136 .prio = (__prio), \
136 .plist = { _PLIST_HEAD_INIT((node).plist) }, \ 137 .prio_list = LIST_HEAD_INIT((node).prio_list), \
138 .node_list = LIST_HEAD_INIT((node).node_list), \
137} 139}
138 140
139/** 141/**
@@ -144,7 +146,6 @@ struct plist_node {
144static inline void 146static inline void
145plist_head_init(struct plist_head *head, spinlock_t *lock) 147plist_head_init(struct plist_head *head, spinlock_t *lock)
146{ 148{
147 INIT_LIST_HEAD(&head->prio_list);
148 INIT_LIST_HEAD(&head->node_list); 149 INIT_LIST_HEAD(&head->node_list);
149#ifdef CONFIG_DEBUG_PI_LIST 150#ifdef CONFIG_DEBUG_PI_LIST
150 head->spinlock = lock; 151 head->spinlock = lock;
@@ -160,7 +161,6 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
160static inline void 161static inline void
161plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock) 162plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
162{ 163{
163 INIT_LIST_HEAD(&head->prio_list);
164 INIT_LIST_HEAD(&head->node_list); 164 INIT_LIST_HEAD(&head->node_list);
165#ifdef CONFIG_DEBUG_PI_LIST 165#ifdef CONFIG_DEBUG_PI_LIST
166 head->rawlock = lock; 166 head->rawlock = lock;
@@ -176,7 +176,8 @@ plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
176static inline void plist_node_init(struct plist_node *node, int prio) 176static inline void plist_node_init(struct plist_node *node, int prio)
177{ 177{
178 node->prio = prio; 178 node->prio = prio;
179 plist_head_init(&node->plist, NULL); 179 INIT_LIST_HEAD(&node->prio_list);
180 INIT_LIST_HEAD(&node->node_list);
180} 181}
181 182
182extern void plist_add(struct plist_node *node, struct plist_head *head); 183extern void plist_add(struct plist_node *node, struct plist_head *head);
@@ -188,7 +189,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
188 * @head: the head for your list 189 * @head: the head for your list
189 */ 190 */
190#define plist_for_each(pos, head) \ 191#define plist_for_each(pos, head) \
191 list_for_each_entry(pos, &(head)->node_list, plist.node_list) 192 list_for_each_entry(pos, &(head)->node_list, node_list)
192 193
193/** 194/**
194 * plist_for_each_safe - iterate safely over a plist of given type 195 * plist_for_each_safe - iterate safely over a plist of given type
@@ -199,7 +200,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
199 * Iterate over a plist of given type, safe against removal of list entry. 200 * Iterate over a plist of given type, safe against removal of list entry.
200 */ 201 */
201#define plist_for_each_safe(pos, n, head) \ 202#define plist_for_each_safe(pos, n, head) \
202 list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list) 203 list_for_each_entry_safe(pos, n, &(head)->node_list, node_list)
203 204
204/** 205/**
205 * plist_for_each_entry - iterate over list of given type 206 * plist_for_each_entry - iterate over list of given type
@@ -208,7 +209,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
208 * @mem: the name of the list_struct within the struct 209 * @mem: the name of the list_struct within the struct
209 */ 210 */
210#define plist_for_each_entry(pos, head, mem) \ 211#define plist_for_each_entry(pos, head, mem) \
211 list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list) 212 list_for_each_entry(pos, &(head)->node_list, mem.node_list)
212 213
213/** 214/**
214 * plist_for_each_entry_safe - iterate safely over list of given type 215 * plist_for_each_entry_safe - iterate safely over list of given type
@@ -220,7 +221,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
220 * Iterate over list of given type, safe against removal of list entry. 221 * Iterate over list of given type, safe against removal of list entry.
221 */ 222 */
222#define plist_for_each_entry_safe(pos, n, head, m) \ 223#define plist_for_each_entry_safe(pos, n, head, m) \
223 list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list) 224 list_for_each_entry_safe(pos, n, &(head)->node_list, m.node_list)
224 225
225/** 226/**
226 * plist_head_empty - return !0 if a plist_head is empty 227 * plist_head_empty - return !0 if a plist_head is empty
@@ -237,7 +238,7 @@ static inline int plist_head_empty(const struct plist_head *head)
237 */ 238 */
238static inline int plist_node_empty(const struct plist_node *node) 239static inline int plist_node_empty(const struct plist_node *node)
239{ 240{
240 return plist_head_empty(&node->plist); 241 return list_empty(&node->node_list);
241} 242}
242 243
243/* All functions below assume the plist_head is not empty. */ 244/* All functions below assume the plist_head is not empty. */
@@ -285,7 +286,7 @@ static inline int plist_node_empty(const struct plist_node *node)
285static inline struct plist_node *plist_first(const struct plist_head *head) 286static inline struct plist_node *plist_first(const struct plist_head *head)
286{ 287{
287 return list_entry(head->node_list.next, 288 return list_entry(head->node_list.next,
288 struct plist_node, plist.node_list); 289 struct plist_node, node_list);
289} 290}
290 291
291/** 292/**
@@ -297,7 +298,7 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
297static inline struct plist_node *plist_last(const struct plist_head *head) 298static inline struct plist_node *plist_last(const struct plist_head *head)
298{ 299{
299 return list_entry(head->node_list.prev, 300 return list_entry(head->node_list.prev,
300 struct plist_node, plist.node_list); 301 struct plist_node, node_list);
301} 302}
302 303
303#endif 304#endif
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8e..cc0072e93e36 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
43 RW_DEP_MAP_INIT(lockname) } 43 RW_DEP_MAP_INIT(lockname) }
44#endif 44#endif
45 45
46/*
47 * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
48 * deprecated.
49 *
50 * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
51 */
52#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
53
54#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) 46#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
55 47
56#endif /* __LINUX_RWLOCK_TYPES_H */ 48#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc2527970..34701241b673 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
12#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead" 12#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
13#endif 13#endif
14 14
15#include <linux/spinlock.h>
16#include <linux/list.h>
17
18#ifdef __KERNEL__ 15#ifdef __KERNEL__
19
20#include <linux/types.h>
21
22struct rwsem_waiter;
23
24/* 16/*
25 * the rw-semaphore definition 17 * the rw-semaphore definition
26 * - if activity is 0 then there are no active readers or writers 18 * - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
37#endif 29#endif
38}; 30};
39 31
40#ifdef CONFIG_DEBUG_LOCK_ALLOC 32#define RWSEM_UNLOCKED_VALUE 0x00000000
41# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
42#else
43# define __RWSEM_DEP_MAP_INIT(lockname)
44#endif
45
46#define __RWSEM_INITIALIZER(name) \
47{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
48 __RWSEM_DEP_MAP_INIT(name) }
49
50#define DECLARE_RWSEM(name) \
51 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
52
53extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
54 struct lock_class_key *key);
55
56#define init_rwsem(sem) \
57do { \
58 static struct lock_class_key __key; \
59 \
60 __init_rwsem((sem), #sem, &__key); \
61} while (0)
62 33
63extern void __down_read(struct rw_semaphore *sem); 34extern void __down_read(struct rw_semaphore *sem);
64extern int __down_read_trylock(struct rw_semaphore *sem); 35extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca7..a8afe9cd000c 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16
14#include <asm/system.h> 17#include <asm/system.h>
15#include <asm/atomic.h> 18#include <asm/atomic.h>
16 19
@@ -19,9 +22,57 @@ struct rw_semaphore;
19#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK 22#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
20#include <linux/rwsem-spinlock.h> /* use a generic implementation */ 23#include <linux/rwsem-spinlock.h> /* use a generic implementation */
21#else 24#else
22#include <asm/rwsem.h> /* use an arch-specific implementation */ 25/* All arch specific implementations share the same struct */
26struct rw_semaphore {
27 long count;
28 spinlock_t wait_lock;
29 struct list_head wait_list;
30#ifdef CONFIG_DEBUG_LOCK_ALLOC
31 struct lockdep_map dep_map;
32#endif
33};
34
35extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
36extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
37extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
38extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
39
40/* Include the arch specific part */
41#include <asm/rwsem.h>
42
43/* In all implementations count != 0 means locked */
44static inline int rwsem_is_locked(struct rw_semaphore *sem)
45{
46 return sem->count != 0;
47}
48
49#endif
50
51/* Common initializer macros and functions */
52
53#ifdef CONFIG_DEBUG_LOCK_ALLOC
54# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
55#else
56# define __RWSEM_DEP_MAP_INIT(lockname)
23#endif 57#endif
24 58
59#define __RWSEM_INITIALIZER(name) \
60 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
61 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
62
63#define DECLARE_RWSEM(name) \
64 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
65
66extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
67 struct lock_class_key *key);
68
69#define init_rwsem(sem) \
70do { \
71 static struct lock_class_key __key; \
72 \
73 __init_rwsem((sem), #sem, &__key); \
74} while (0)
75
25/* 76/*
26 * lock for reading 77 * lock for reading
27 */ 78 */
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720d..73548eb13a5d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
81#define __SPIN_LOCK_UNLOCKED(lockname) \ 81#define __SPIN_LOCK_UNLOCKED(lockname) \
82 (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) 82 (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
83 83
84/*
85 * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
86 * deprecated.
87 * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
88 * appropriate.
89 */
90#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
91
92#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) 84#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
93 85
94#include <linux/rwlock_types.h> 86#include <linux/rwlock_types.h>
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 88513fd8e208..d81db8012c63 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -212,6 +212,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
212struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, 212struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
213 const struct rpc_call_ops *ops); 213 const struct rpc_call_ops *ops);
214void rpc_put_task(struct rpc_task *); 214void rpc_put_task(struct rpc_task *);
215void rpc_put_task_async(struct rpc_task *);
215void rpc_exit_task(struct rpc_task *); 216void rpc_exit_task(struct rpc_task *);
216void rpc_exit(struct rpc_task *, int); 217void rpc_exit(struct rpc_task *, int);
217void rpc_release_calldata(const struct rpc_call_ops *, void *); 218void rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a17fcea2ca58..4acc6b6088b0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -62,6 +62,7 @@ struct robust_list_head;
62struct getcpu_cache; 62struct getcpu_cache;
63struct old_linux_dirent; 63struct old_linux_dirent;
64struct perf_event_attr; 64struct perf_event_attr;
65struct file_handle;
65 66
66#include <linux/types.h> 67#include <linux/types.h>
67#include <linux/aio_abi.h> 68#include <linux/aio_abi.h>
@@ -834,5 +835,10 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
834 unsigned long prot, unsigned long flags, 835 unsigned long prot, unsigned long flags,
835 unsigned long fd, unsigned long pgoff); 836 unsigned long fd, unsigned long pgoff);
836asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); 837asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
837 838asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
839 struct file_handle __user *handle,
840 int __user *mnt_id, int flag);
841asmlinkage long sys_open_by_handle_at(int mountdirfd,
842 struct file_handle __user *handle,
843 int flags);
838#endif 844#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb64f3b8..11684d9e6bd2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -930,6 +930,7 @@ enum
930 930
931#ifdef __KERNEL__ 931#ifdef __KERNEL__
932#include <linux/list.h> 932#include <linux/list.h>
933#include <linux/rcupdate.h>
933 934
934/* For the /proc/sys support */ 935/* For the /proc/sys support */
935struct ctl_table; 936struct ctl_table;
@@ -1037,10 +1038,15 @@ struct ctl_table_root {
1037 struct ctl_table trees. */ 1038 struct ctl_table trees. */
1038struct ctl_table_header 1039struct ctl_table_header
1039{ 1040{
1040 struct ctl_table *ctl_table; 1041 union {
1041 struct list_head ctl_entry; 1042 struct {
1042 int used; 1043 struct ctl_table *ctl_table;
1043 int count; 1044 struct list_head ctl_entry;
1045 int used;
1046 int count;
1047 };
1048 struct rcu_head rcu;
1049 };
1044 struct completion *unregistering; 1050 struct completion *unregistering;
1045 struct ctl_table *ctl_table_arg; 1051 struct ctl_table *ctl_table_arg;
1046 struct ctl_table_root *root; 1052 struct ctl_table_root *root;
diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
index 246940511579..2e8ec51f0615 100644
--- a/include/target/target_core_transport.h
+++ b/include/target/target_core_transport.h
@@ -135,6 +135,8 @@ extern void transport_complete_task(struct se_task *, int);
135extern void transport_add_task_to_execute_queue(struct se_task *, 135extern void transport_add_task_to_execute_queue(struct se_task *,
136 struct se_task *, 136 struct se_task *,
137 struct se_device *); 137 struct se_device *);
138extern void transport_remove_task_from_execute_queue(struct se_task *,
139 struct se_device *);
138unsigned char *transport_dump_cmd_direction(struct se_cmd *); 140unsigned char *transport_dump_cmd_direction(struct se_cmd *);
139extern void transport_dump_dev_state(struct se_device *, char *, int *); 141extern void transport_dump_dev_state(struct se_device *, char *, int *);
140extern void transport_dump_dev_info(struct se_device *, struct se_lun *, 142extern void transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/include/xen/events.h b/include/xen/events.h
index 00f53ddcc062..962da2ced5b4 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -75,11 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
75int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); 75int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
76 76
77#ifdef CONFIG_PCI_MSI 77#ifdef CONFIG_PCI_MSI
78/* Allocate an irq and a pirq to be used with MSIs. */ 78int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
79#define XEN_ALLOC_PIRQ (1 << 0) 79int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
80#define XEN_ALLOC_IRQ (1 << 1) 80 int pirq, int vector, const char *name);
81void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
82int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
83#endif 81#endif
84 82
85/* De-allocates the above mentioned physical interrupt. */ 83/* De-allocates the above mentioned physical interrupt. */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index c2d1fa4dc1ee..61e523af3c46 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
51 */ 51 */
52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
53 53
54struct blkif_request { 54struct blkif_request_rw {
55 uint8_t operation; /* BLKIF_OP_??? */
56 uint8_t nr_segments; /* number of segments */
57 blkif_vdev_t handle; /* only for read/write requests */
58 uint64_t id; /* private guest value, echoed in resp */
59 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 55 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
60 struct blkif_request_segment { 56 struct blkif_request_segment {
61 grant_ref_t gref; /* reference to I/O buffer frame */ 57 grant_ref_t gref; /* reference to I/O buffer frame */
@@ -65,6 +61,16 @@ struct blkif_request {
65 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 61 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
66}; 62};
67 63
64struct blkif_request {
65 uint8_t operation; /* BLKIF_OP_??? */
66 uint8_t nr_segments; /* number of segments */
67 blkif_vdev_t handle; /* only for read/write requests */
68 uint64_t id; /* private guest value, echoed in resp */
69 union {
70 struct blkif_request_rw rw;
71 } u;
72};
73
68struct blkif_response { 74struct blkif_response {
69 uint64_t id; /* copied from request */ 75 uint64_t id; /* copied from request */
70 uint8_t operation; /* copied from request */ 76 uint8_t operation; /* copied from request */
@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
91#define VDISK_REMOVABLE 0x2 97#define VDISK_REMOVABLE 0x2
92#define VDISK_READONLY 0x4 98#define VDISK_READONLY 0x4
93 99
100/* Xen-defined major numbers for virtual disks, they look strangely
101 * familiar */
102#define XEN_IDE0_MAJOR 3
103#define XEN_IDE1_MAJOR 22
104#define XEN_SCSI_DISK0_MAJOR 8
105#define XEN_SCSI_DISK1_MAJOR 65
106#define XEN_SCSI_DISK2_MAJOR 66
107#define XEN_SCSI_DISK3_MAJOR 67
108#define XEN_SCSI_DISK4_MAJOR 68
109#define XEN_SCSI_DISK5_MAJOR 69
110#define XEN_SCSI_DISK6_MAJOR 70
111#define XEN_SCSI_DISK7_MAJOR 71
112#define XEN_SCSI_DISK8_MAJOR 128
113#define XEN_SCSI_DISK9_MAJOR 129
114#define XEN_SCSI_DISK10_MAJOR 130
115#define XEN_SCSI_DISK11_MAJOR 131
116#define XEN_SCSI_DISK12_MAJOR 132
117#define XEN_SCSI_DISK13_MAJOR 133
118#define XEN_SCSI_DISK14_MAJOR 134
119#define XEN_SCSI_DISK15_MAJOR 135
120
94#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ 121#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e2f1bc..b33257bc7e83 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -30,7 +30,7 @@
30#define __HYPERVISOR_stack_switch 3 30#define __HYPERVISOR_stack_switch 3
31#define __HYPERVISOR_set_callbacks 4 31#define __HYPERVISOR_set_callbacks 4
32#define __HYPERVISOR_fpu_taskswitch 5 32#define __HYPERVISOR_fpu_taskswitch 5
33#define __HYPERVISOR_sched_op 6 33#define __HYPERVISOR_sched_op_compat 6
34#define __HYPERVISOR_dom0_op 7 34#define __HYPERVISOR_dom0_op 7
35#define __HYPERVISOR_set_debugreg 8 35#define __HYPERVISOR_set_debugreg 8
36#define __HYPERVISOR_get_debugreg 9 36#define __HYPERVISOR_get_debugreg 9
@@ -52,7 +52,7 @@
52#define __HYPERVISOR_mmuext_op 26 52#define __HYPERVISOR_mmuext_op 26
53#define __HYPERVISOR_acm_op 27 53#define __HYPERVISOR_acm_op 27
54#define __HYPERVISOR_nmi_op 28 54#define __HYPERVISOR_nmi_op 28
55#define __HYPERVISOR_sched_op_new 29 55#define __HYPERVISOR_sched_op 29
56#define __HYPERVISOR_callback_op 30 56#define __HYPERVISOR_callback_op 30
57#define __HYPERVISOR_xenoprof_op 31 57#define __HYPERVISOR_xenoprof_op 31
58#define __HYPERVISOR_event_channel_op 32 58#define __HYPERVISOR_event_channel_op 32
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 98b92154a264..03c85d7387fb 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,9 +5,9 @@
5 5
6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); 6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
7 7
8void xen_pre_suspend(void); 8void xen_arch_pre_suspend(void);
9void xen_post_suspend(int suspend_cancelled); 9void xen_arch_post_suspend(int suspend_cancelled);
10void xen_hvm_post_suspend(int suspend_cancelled); 10void xen_arch_hvm_post_suspend(int suspend_cancelled);
11 11
12void xen_mm_pin_all(void); 12void xen_mm_pin_all(void);
13void xen_mm_unpin_all(void); 13void xen_mm_unpin_all(void);
diff --git a/init/Kconfig b/init/Kconfig
index 4c4edf2ec4a9..5721d27af626 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3
287 for processing it. A preliminary version of these tools is available 287 for processing it. A preliminary version of these tools is available
288 at <http://www.gnu.org/software/acct/>. 288 at <http://www.gnu.org/software/acct/>.
289 289
290config FHANDLE
291 bool "open by fhandle syscalls"
292 select EXPORTFS
293 help
294 If you say Y here, a user level program will be able to map
295 file names to handle and then later use the handle for
296 different file system operations. This is useful in implementing
297 userspace file servers, which now track files using handles instead
298 of names. The handle would remain the same even if file names
299 get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
300 syscalls.
301
290config TASKSTATS 302config TASKSTATS
291 bool "Export task/process statistics through netlink (EXPERIMENTAL)" 303 bool "Export task/process statistics through netlink (EXPERIMENTAL)"
292 depends on NET 304 depends on NET
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d2e3c7866460..e683869365d9 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
144} 144}
145 145
146/* Initialize a parent watch entry. */ 146/* Initialize a parent watch entry. */
147static struct audit_parent *audit_init_parent(struct nameidata *ndp) 147static struct audit_parent *audit_init_parent(struct path *path)
148{ 148{
149 struct inode *inode = ndp->path.dentry->d_inode; 149 struct inode *inode = path->dentry->d_inode;
150 struct audit_parent *parent; 150 struct audit_parent *parent;
151 int ret; 151 int ret;
152 152
@@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
353} 353}
354 354
355/* Get path information necessary for adding watches. */ 355/* Get path information necessary for adding watches. */
356static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) 356static int audit_get_nd(struct audit_watch *watch, struct path *parent)
357{ 357{
358 struct nameidata *ndparent, *ndwatch; 358 struct nameidata nd;
359 struct dentry *d;
359 int err; 360 int err;
360 361
361 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); 362 err = kern_path_parent(watch->path, &nd);
362 if (unlikely(!ndparent)) 363 if (err)
363 return -ENOMEM; 364 return err;
364 365
365 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); 366 if (nd.last_type != LAST_NORM) {
366 if (unlikely(!ndwatch)) { 367 path_put(&nd.path);
367 kfree(ndparent); 368 return -EINVAL;
368 return -ENOMEM;
369 } 369 }
370 370
371 err = path_lookup(path, LOOKUP_PARENT, ndparent); 371 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
372 if (err) { 372 d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
373 kfree(ndparent); 373 if (IS_ERR(d)) {
374 kfree(ndwatch); 374 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
375 return err; 375 path_put(&nd.path);
376 return PTR_ERR(d);
376 } 377 }
377 378 if (d->d_inode) {
378 err = path_lookup(path, 0, ndwatch); 379 /* update watch filter fields */
379 if (err) { 380 watch->dev = d->d_inode->i_sb->s_dev;
380 kfree(ndwatch); 381 watch->ino = d->d_inode->i_ino;
381 ndwatch = NULL;
382 } 382 }
383 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
383 384
384 *ndp = ndparent; 385 *parent = nd.path;
385 *ndw = ndwatch; 386 dput(d);
386
387 return 0; 387 return 0;
388} 388}
389 389
390/* Release resources used for watch path information. */
391static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
392{
393 if (ndp) {
394 path_put(&ndp->path);
395 kfree(ndp);
396 }
397 if (ndw) {
398 path_put(&ndw->path);
399 kfree(ndw);
400 }
401}
402
403/* Associate the given rule with an existing parent. 390/* Associate the given rule with an existing parent.
404 * Caller must hold audit_filter_mutex. */ 391 * Caller must hold audit_filter_mutex. */
405static void audit_add_to_parent(struct audit_krule *krule, 392static void audit_add_to_parent(struct audit_krule *krule,
@@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
440{ 427{
441 struct audit_watch *watch = krule->watch; 428 struct audit_watch *watch = krule->watch;
442 struct audit_parent *parent; 429 struct audit_parent *parent;
443 struct nameidata *ndp = NULL, *ndw = NULL; 430 struct path parent_path;
444 int h, ret = 0; 431 int h, ret = 0;
445 432
446 mutex_unlock(&audit_filter_mutex); 433 mutex_unlock(&audit_filter_mutex);
447 434
448 /* Avoid calling path_lookup under audit_filter_mutex. */ 435 /* Avoid calling path_lookup under audit_filter_mutex. */
449 ret = audit_get_nd(watch->path, &ndp, &ndw); 436 ret = audit_get_nd(watch, &parent_path);
450 if (ret) {
451 /* caller expects mutex locked */
452 mutex_lock(&audit_filter_mutex);
453 goto error;
454 }
455 437
438 /* caller expects mutex locked */
456 mutex_lock(&audit_filter_mutex); 439 mutex_lock(&audit_filter_mutex);
457 440
458 /* update watch filter fields */ 441 if (ret)
459 if (ndw) { 442 return ret;
460 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
461 watch->ino = ndw->path.dentry->d_inode->i_ino;
462 }
463 443
464 /* either find an old parent or attach a new one */ 444 /* either find an old parent or attach a new one */
465 parent = audit_find_parent(ndp->path.dentry->d_inode); 445 parent = audit_find_parent(parent_path.dentry->d_inode);
466 if (!parent) { 446 if (!parent) {
467 parent = audit_init_parent(ndp); 447 parent = audit_init_parent(&parent_path);
468 if (IS_ERR(parent)) { 448 if (IS_ERR(parent)) {
469 ret = PTR_ERR(parent); 449 ret = PTR_ERR(parent);
470 goto error; 450 goto error;
@@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
479 h = audit_hash_ino((u32)watch->ino); 459 h = audit_hash_ino((u32)watch->ino);
480 *list = &audit_inode_hash[h]; 460 *list = &audit_inode_hash[h];
481error: 461error:
482 audit_put_nd(ndp, ndw); /* NULL args OK */ 462 path_put(&parent_path);
483 return ret; 463 return ret;
484
485} 464}
486 465
487void audit_remove_watch_rule(struct audit_krule *krule) 466void audit_remove_watch_rule(struct audit_krule *krule)
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a9d6dd53a6c..2343c132c5a7 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
35static struct thread_group_cred init_tgcred = { 35static struct thread_group_cred init_tgcred = {
36 .usage = ATOMIC_INIT(2), 36 .usage = ATOMIC_INIT(2),
37 .tgid = 0, 37 .tgid = 0,
38 .lock = SPIN_LOCK_UNLOCKED, 38 .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
39}; 39};
40#endif 40#endif
41 41
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd6..bda415715382 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
381 return NULL; 381 return NULL;
382} 382}
383 383
384static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 384static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
385 u32 uval, u32 newval)
385{ 386{
386 u32 curval; 387 int ret;
387 388
388 pagefault_disable(); 389 pagefault_disable();
389 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 390 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
390 pagefault_enable(); 391 pagefault_enable();
391 392
392 return curval; 393 return ret;
393} 394}
394 395
395static int get_futex_value_locked(u32 *dest, u32 __user *from) 396static int get_futex_value_locked(u32 *dest, u32 __user *from)
@@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
674 struct task_struct *task, int set_waiters) 675 struct task_struct *task, int set_waiters)
675{ 676{
676 int lock_taken, ret, ownerdied = 0; 677 int lock_taken, ret, ownerdied = 0;
677 u32 uval, newval, curval; 678 u32 uval, newval, curval, vpid = task_pid_vnr(task);
678 679
679retry: 680retry:
680 ret = lock_taken = 0; 681 ret = lock_taken = 0;
@@ -684,19 +685,17 @@ retry:
684 * (by doing a 0 -> TID atomic cmpxchg), while holding all 685 * (by doing a 0 -> TID atomic cmpxchg), while holding all
685 * the locks. It will most likely not succeed. 686 * the locks. It will most likely not succeed.
686 */ 687 */
687 newval = task_pid_vnr(task); 688 newval = vpid;
688 if (set_waiters) 689 if (set_waiters)
689 newval |= FUTEX_WAITERS; 690 newval |= FUTEX_WAITERS;
690 691
691 curval = cmpxchg_futex_value_locked(uaddr, 0, newval); 692 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
692
693 if (unlikely(curval == -EFAULT))
694 return -EFAULT; 693 return -EFAULT;
695 694
696 /* 695 /*
697 * Detect deadlocks. 696 * Detect deadlocks.
698 */ 697 */
699 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) 698 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
700 return -EDEADLK; 699 return -EDEADLK;
701 700
702 /* 701 /*
@@ -723,14 +722,12 @@ retry:
723 */ 722 */
724 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { 723 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
725 /* Keep the OWNER_DIED bit */ 724 /* Keep the OWNER_DIED bit */
726 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); 725 newval = (curval & ~FUTEX_TID_MASK) | vpid;
727 ownerdied = 0; 726 ownerdied = 0;
728 lock_taken = 1; 727 lock_taken = 1;
729 } 728 }
730 729
731 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 730 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
732
733 if (unlikely(curval == -EFAULT))
734 return -EFAULT; 731 return -EFAULT;
735 if (unlikely(curval != uval)) 732 if (unlikely(curval != uval))
736 goto retry; 733 goto retry;
@@ -775,6 +772,24 @@ retry:
775 return ret; 772 return ret;
776} 773}
777 774
775/**
776 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
777 * @q: The futex_q to unqueue
778 *
779 * The q->lock_ptr must not be NULL and must be held by the caller.
780 */
781static void __unqueue_futex(struct futex_q *q)
782{
783 struct futex_hash_bucket *hb;
784
785 if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
786 || plist_node_empty(&q->list)))
787 return;
788
789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
790 plist_del(&q->list, &hb->chain);
791}
792
778/* 793/*
779 * The hash bucket lock must be held when this is called. 794 * The hash bucket lock must be held when this is called.
780 * Afterwards, the futex_q must not be accessed. 795 * Afterwards, the futex_q must not be accessed.
@@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q)
792 */ 807 */
793 get_task_struct(p); 808 get_task_struct(p);
794 809
795 plist_del(&q->list, &q->list.plist); 810 __unqueue_futex(q);
796 /* 811 /*
797 * The waiting task can free the futex_q as soon as 812 * The waiting task can free the futex_q as soon as
798 * q->lock_ptr = NULL is written, without taking any locks. A 813 * q->lock_ptr = NULL is written, without taking any locks. A
@@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
843 858
844 newval = FUTEX_WAITERS | task_pid_vnr(new_owner); 859 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
845 860
846 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 861 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
847
848 if (curval == -EFAULT)
849 ret = -EFAULT; 862 ret = -EFAULT;
850 else if (curval != uval) 863 else if (curval != uval)
851 ret = -EINVAL; 864 ret = -EINVAL;
@@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
880 * There is no waiter, so we unlock the futex. The owner died 893 * There is no waiter, so we unlock the futex. The owner died
881 * bit has not to be preserved here. We are the owner: 894 * bit has not to be preserved here. We are the owner:
882 */ 895 */
883 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); 896 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
884 897 return -EFAULT;
885 if (oldval == -EFAULT)
886 return oldval;
887 if (oldval != uval) 898 if (oldval != uval)
888 return -EAGAIN; 899 return -EAGAIN;
889 900
@@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1071 plist_del(&q->list, &hb1->chain); 1082 plist_del(&q->list, &hb1->chain);
1072 plist_add(&q->list, &hb2->chain); 1083 plist_add(&q->list, &hb2->chain);
1073 q->lock_ptr = &hb2->lock; 1084 q->lock_ptr = &hb2->lock;
1074#ifdef CONFIG_DEBUG_PI_LIST
1075 q->list.plist.spinlock = &hb2->lock;
1076#endif
1077 } 1085 }
1078 get_futex_key_refs(key2); 1086 get_futex_key_refs(key2);
1079 q->key = *key2; 1087 q->key = *key2;
@@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1100 get_futex_key_refs(key); 1108 get_futex_key_refs(key);
1101 q->key = *key; 1109 q->key = *key;
1102 1110
1103 WARN_ON(plist_node_empty(&q->list)); 1111 __unqueue_futex(q);
1104 plist_del(&q->list, &q->list.plist);
1105 1112
1106 WARN_ON(!q->rt_waiter); 1113 WARN_ON(!q->rt_waiter);
1107 q->rt_waiter = NULL; 1114 q->rt_waiter = NULL;
1108 1115
1109 q->lock_ptr = &hb->lock; 1116 q->lock_ptr = &hb->lock;
1110#ifdef CONFIG_DEBUG_PI_LIST
1111 q->list.plist.spinlock = &hb->lock;
1112#endif
1113 1117
1114 wake_up_state(q->task, TASK_NORMAL); 1118 wake_up_state(q->task, TASK_NORMAL);
1115} 1119}
@@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1457 prio = min(current->normal_prio, MAX_RT_PRIO); 1461 prio = min(current->normal_prio, MAX_RT_PRIO);
1458 1462
1459 plist_node_init(&q->list, prio); 1463 plist_node_init(&q->list, prio);
1460#ifdef CONFIG_DEBUG_PI_LIST
1461 q->list.plist.spinlock = &hb->lock;
1462#endif
1463 plist_add(&q->list, &hb->chain); 1464 plist_add(&q->list, &hb->chain);
1464 q->task = current; 1465 q->task = current;
1465 spin_unlock(&hb->lock); 1466 spin_unlock(&hb->lock);
@@ -1504,8 +1505,7 @@ retry:
1504 spin_unlock(lock_ptr); 1505 spin_unlock(lock_ptr);
1505 goto retry; 1506 goto retry;
1506 } 1507 }
1507 WARN_ON(plist_node_empty(&q->list)); 1508 __unqueue_futex(q);
1508 plist_del(&q->list, &q->list.plist);
1509 1509
1510 BUG_ON(q->pi_state); 1510 BUG_ON(q->pi_state);
1511 1511
@@ -1525,8 +1525,7 @@ retry:
1525static void unqueue_me_pi(struct futex_q *q) 1525static void unqueue_me_pi(struct futex_q *q)
1526 __releases(q->lock_ptr) 1526 __releases(q->lock_ptr)
1527{ 1527{
1528 WARN_ON(plist_node_empty(&q->list)); 1528 __unqueue_futex(q);
1529 plist_del(&q->list, &q->list.plist);
1530 1529
1531 BUG_ON(!q->pi_state); 1530 BUG_ON(!q->pi_state);
1532 free_pi_state(q->pi_state); 1531 free_pi_state(q->pi_state);
@@ -1556,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1556 1555
1557 /* 1556 /*
1558 * We are here either because we stole the rtmutex from the 1557 * We are here either because we stole the rtmutex from the
1559 * pending owner or we are the pending owner which failed to 1558 * previous highest priority waiter or we are the highest priority
1560 * get the rtmutex. We have to replace the pending owner TID 1559 * waiter but failed to get the rtmutex the first time.
1561 * in the user space variable. This must be atomic as we have 1560 * We have to replace the newowner TID in the user space variable.
1562 * to preserve the owner died bit here. 1561 * This must be atomic as we have to preserve the owner died bit here.
1563 * 1562 *
1564 * Note: We write the user space value _before_ changing the pi_state 1563 * Note: We write the user space value _before_ changing the pi_state
1565 * because we can fault here. Imagine swapped out pages or a fork 1564 * because we can fault here. Imagine swapped out pages or a fork
@@ -1578,9 +1577,7 @@ retry:
1578 while (1) { 1577 while (1) {
1579 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1578 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1580 1579
1581 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1580 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1582
1583 if (curval == -EFAULT)
1584 goto handle_fault; 1581 goto handle_fault;
1585 if (curval == uval) 1582 if (curval == uval)
1586 break; 1583 break;
@@ -1608,8 +1605,8 @@ retry:
1608 1605
1609 /* 1606 /*
1610 * To handle the page fault we need to drop the hash bucket 1607 * To handle the page fault we need to drop the hash bucket
1611 * lock here. That gives the other task (either the pending 1608 * lock here. That gives the other task (either the highest priority
1612 * owner itself or the task which stole the rtmutex) the 1609 * waiter itself or the task which stole the rtmutex) the
1613 * chance to try the fixup of the pi_state. So once we are 1610 * chance to try the fixup of the pi_state. So once we are
1614 * back from handling the fault we need to check the pi_state 1611 * back from handling the fault we need to check the pi_state
1615 * after reacquiring the hash bucket lock and before trying to 1612 * after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1685 /* 1682 /*
1686 * pi_state is incorrect, some other task did a lock steal and 1683 * pi_state is incorrect, some other task did a lock steal and
1687 * we returned due to timeout or signal without taking the 1684 * we returned due to timeout or signal without taking the
1688 * rt_mutex. Too late. We can access the rt_mutex_owner without 1685 * rt_mutex. Too late.
1689 * locking, as the other task is now blocked on the hash bucket
1690 * lock. Fix the state up.
1691 */ 1686 */
1687 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1692 owner = rt_mutex_owner(&q->pi_state->pi_mutex); 1688 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1689 if (!owner)
1690 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1691 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1693 ret = fixup_pi_state_owner(uaddr, q, owner); 1692 ret = fixup_pi_state_owner(uaddr, q, owner);
1694 goto out; 1693 goto out;
1695 } 1694 }
1696 1695
1697 /* 1696 /*
1698 * Paranoia check. If we did not take the lock, then we should not be 1697 * Paranoia check. If we did not take the lock, then we should not be
1699 * the owner, nor the pending owner, of the rt_mutex. 1698 * the owner of the rt_mutex.
1700 */ 1699 */
1701 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) 1700 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1702 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " 1701 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
@@ -1781,13 +1780,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1781 * 1780 *
1782 * The basic logical guarantee of a futex is that it blocks ONLY 1781 * The basic logical guarantee of a futex is that it blocks ONLY
1783 * if cond(var) is known to be true at the time of blocking, for 1782 * if cond(var) is known to be true at the time of blocking, for
1784 * any cond. If we queued after testing *uaddr, that would open 1783 * any cond. If we locked the hash-bucket after testing *uaddr, that
1785 * a race condition where we could block indefinitely with 1784 * would open a race condition where we could block indefinitely with
1786 * cond(var) false, which would violate the guarantee. 1785 * cond(var) false, which would violate the guarantee.
1787 * 1786 *
1788 * A consequence is that futex_wait() can return zero and absorb 1787 * On the other hand, we insert q and release the hash-bucket only
1789 * a wakeup when *uaddr != val on entry to the syscall. This is 1788 * after testing *uaddr. This guarantees that futex_wait() will NOT
1790 * rare, but normal. 1789 * absorb a wakeup if *uaddr does not match the desired values
1790 * while the syscall executes.
1791 */ 1791 */
1792retry: 1792retry:
1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); 1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
@@ -2046,9 +2046,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2046{ 2046{
2047 struct futex_hash_bucket *hb; 2047 struct futex_hash_bucket *hb;
2048 struct futex_q *this, *next; 2048 struct futex_q *this, *next;
2049 u32 uval;
2050 struct plist_head *head; 2049 struct plist_head *head;
2051 union futex_key key = FUTEX_KEY_INIT; 2050 union futex_key key = FUTEX_KEY_INIT;
2051 u32 uval, vpid = task_pid_vnr(current);
2052 int ret; 2052 int ret;
2053 2053
2054retry: 2054retry:
@@ -2057,7 +2057,7 @@ retry:
2057 /* 2057 /*
2058 * We release only a lock we actually own: 2058 * We release only a lock we actually own:
2059 */ 2059 */
2060 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2060 if ((uval & FUTEX_TID_MASK) != vpid)
2061 return -EPERM; 2061 return -EPERM;
2062 2062
2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); 2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
@@ -2072,17 +2072,14 @@ retry:
2072 * again. If it succeeds then we can return without waking 2072 * again. If it succeeds then we can return without waking
2073 * anyone else up: 2073 * anyone else up:
2074 */ 2074 */
2075 if (!(uval & FUTEX_OWNER_DIED)) 2075 if (!(uval & FUTEX_OWNER_DIED) &&
2076 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); 2076 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2077
2078
2079 if (unlikely(uval == -EFAULT))
2080 goto pi_faulted; 2077 goto pi_faulted;
2081 /* 2078 /*
2082 * Rare case: we managed to release the lock atomically, 2079 * Rare case: we managed to release the lock atomically,
2083 * no need to wake anyone else up: 2080 * no need to wake anyone else up:
2084 */ 2081 */
2085 if (unlikely(uval == task_pid_vnr(current))) 2082 if (unlikely(uval == vpid))
2086 goto out_unlock; 2083 goto out_unlock;
2087 2084
2088 /* 2085 /*
@@ -2167,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2167 * We were woken prior to requeue by a timeout or a signal. 2164 * We were woken prior to requeue by a timeout or a signal.
2168 * Unqueue the futex_q and determine which it was. 2165 * Unqueue the futex_q and determine which it was.
2169 */ 2166 */
2170 plist_del(&q->list, &q->list.plist); 2167 plist_del(&q->list, &hb->chain);
2171 2168
2172 /* Handle spurious wakeups gracefully */ 2169 /* Handle spurious wakeups gracefully */
2173 ret = -EWOULDBLOCK; 2170 ret = -EWOULDBLOCK;
@@ -2463,11 +2460,20 @@ retry:
2463 * userspace. 2460 * userspace.
2464 */ 2461 */
2465 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 2462 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2466 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); 2463 /*
2467 2464 * We are not holding a lock here, but we want to have
2468 if (nval == -EFAULT) 2465 * the pagefault_disable/enable() protection because
2469 return -1; 2466 * we want to handle the fault gracefully. If the
2470 2467 * access fails we try to fault in the futex with R/W
2468 * verification via get_user_pages. get_user() above
2469 * does not guarantee R/W access. If that fails we
2470 * give up and leave the futex locked.
2471 */
2472 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2473 if (fault_in_user_writeable(uaddr))
2474 return -1;
2475 goto retry;
2476 }
2471 if (nval != uval) 2477 if (nval != uval)
2472 goto retry; 2478 goto retry;
2473 2479
@@ -2678,8 +2684,7 @@ static int __init futex_init(void)
2678 * implementation, the non-functional ones will return 2684 * implementation, the non-functional ones will return
2679 * -ENOSYS. 2685 * -ENOSYS.
2680 */ 2686 */
2681 curval = cmpxchg_futex_value_locked(NULL, 0, 0); 2687 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2682 if (curval == -EFAULT)
2683 futex_cmpxchg_enabled = 1; 2688 futex_cmpxchg_enabled = 1;
2684 2689
2685 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2690 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0c8d7c048615..e38f5a073d01 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -334,6 +334,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
334 334
335static struct debug_obj_descr hrtimer_debug_descr; 335static struct debug_obj_descr hrtimer_debug_descr;
336 336
337static void *hrtimer_debug_hint(void *addr)
338{
339 return ((struct hrtimer *) addr)->function;
340}
341
337/* 342/*
338 * fixup_init is called when: 343 * fixup_init is called when:
339 * - an active object is initialized 344 * - an active object is initialized
@@ -393,6 +398,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
393 398
394static struct debug_obj_descr hrtimer_debug_descr = { 399static struct debug_obj_descr hrtimer_debug_descr = {
395 .name = "hrtimer", 400 .name = "hrtimer",
401 .debug_hint = hrtimer_debug_hint,
396 .fixup_init = hrtimer_fixup_init, 402 .fixup_init = hrtimer_fixup_init,
397 .fixup_activate = hrtimer_fixup_activate, 403 .fixup_activate = hrtimer_fixup_activate,
398 .fixup_free = hrtimer_fixup_free, 404 .fixup_free = hrtimer_fixup_free,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9033c1c70828..2782bacdf494 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq);
282 282
283void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) 283void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
284{ 284{
285 if (resume) 285 if (resume) {
286 if (!(desc->status & IRQ_SUSPENDED)) {
287 if (!desc->action)
288 return;
289 if (!(desc->action->flags & IRQF_FORCE_RESUME))
290 return;
291 /* Pretend that it got disabled ! */
292 desc->depth++;
293 }
286 desc->status &= ~IRQ_SUSPENDED; 294 desc->status &= ~IRQ_SUSPENDED;
295 }
287 296
288 switch (desc->depth) { 297 switch (desc->depth) {
289 case 0: 298 case 0:
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 0d4005d85b03..d6bfb89cce91 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
53 for_each_irq_desc(irq, desc) { 53 for_each_irq_desc(irq, desc) {
54 unsigned long flags; 54 unsigned long flags;
55 55
56 if (!(desc->status & IRQ_SUSPENDED))
57 continue;
58
59 raw_spin_lock_irqsave(&desc->lock, flags); 56 raw_spin_lock_irqsave(&desc->lock, flags);
60 __enable_irq(desc, irq, true); 57 __enable_irq(desc, irq, true);
61 raw_spin_unlock_irqrestore(&desc->lock, flags); 58 raw_spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c8..3c7cbc2c33be 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
215 put_pid(waiter->deadlock_task_pid); 215 put_pid(waiter->deadlock_task_pid);
216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); 216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
218 TRACE_WARN_ON(waiter->task);
219 memset(waiter, 0x22, sizeof(*waiter)); 218 memset(waiter, 0x22, sizeof(*waiter));
220} 219}
221 220
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef1..5c9ccd380966 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
9#include <linux/kthread.h> 9#include <linux/kthread.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp_lock.h>
13#include <linux/spinlock.h> 12#include <linux/spinlock.h>
14#include <linux/sysdev.h> 13#include <linux/sysdev.h>
15#include <linux/timer.h> 14#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
27 int opcode; 26 int opcode;
28 int opdata; 27 int opdata;
29 int mutexes[MAX_RT_TEST_MUTEXES]; 28 int mutexes[MAX_RT_TEST_MUTEXES];
30 int bkl;
31 int event; 29 int event;
32 struct sys_device sysdev; 30 struct sys_device sysdev;
33}; 31};
@@ -46,9 +44,8 @@ enum test_opcodes {
46 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ 44 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
47 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ 45 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
48 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ 46 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
49 RTTEST_LOCKBKL, /* 9 Lock BKL */ 47 /* 9, 10 - reserved for BKL commemoration */
50 RTTEST_UNLOCKBKL, /* 10 Unlock BKL */ 48 RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
51 RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
52 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ 49 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
53 RTTEST_RESET = 99, /* 99 Reset all pending operations */ 50 RTTEST_RESET = 99, /* 99 Reset all pending operations */
54}; 51};
@@ -74,13 +71,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
74 td->mutexes[i] = 0; 71 td->mutexes[i] = 0;
75 } 72 }
76 } 73 }
77
78 if (!lockwakeup && td->bkl == 4) {
79#ifdef CONFIG_LOCK_KERNEL
80 unlock_kernel();
81#endif
82 td->bkl = 0;
83 }
84 return 0; 74 return 0;
85 75
86 case RTTEST_RESETEVENT: 76 case RTTEST_RESETEVENT:
@@ -131,25 +121,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
131 td->mutexes[id] = 0; 121 td->mutexes[id] = 0;
132 return 0; 122 return 0;
133 123
134 case RTTEST_LOCKBKL:
135 if (td->bkl)
136 return 0;
137 td->bkl = 1;
138#ifdef CONFIG_LOCK_KERNEL
139 lock_kernel();
140#endif
141 td->bkl = 4;
142 return 0;
143
144 case RTTEST_UNLOCKBKL:
145 if (td->bkl != 4)
146 break;
147#ifdef CONFIG_LOCK_KERNEL
148 unlock_kernel();
149#endif
150 td->bkl = 0;
151 return 0;
152
153 default: 124 default:
154 break; 125 break;
155 } 126 }
@@ -196,7 +167,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
196 td->event = atomic_add_return(1, &rttest_event); 167 td->event = atomic_add_return(1, &rttest_event);
197 break; 168 break;
198 169
199 case RTTEST_LOCKBKL:
200 default: 170 default:
201 break; 171 break;
202 } 172 }
@@ -229,8 +199,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
229 td->event = atomic_add_return(1, &rttest_event); 199 td->event = atomic_add_return(1, &rttest_event);
230 return; 200 return;
231 201
232 case RTTEST_LOCKBKL:
233 return;
234 default: 202 default:
235 return; 203 return;
236 } 204 }
@@ -380,11 +348,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
380 spin_lock(&rttest_lock); 348 spin_lock(&rttest_lock);
381 349
382 curr += sprintf(curr, 350 curr += sprintf(curr,
383 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:", 351 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
384 td->opcode, td->event, tsk->state, 352 td->opcode, td->event, tsk->state,
385 (MAX_RT_PRIO - 1) - tsk->prio, 353 (MAX_RT_PRIO - 1) - tsk->prio,
386 (MAX_RT_PRIO - 1) - tsk->normal_prio, 354 (MAX_RT_PRIO - 1) - tsk->normal_prio,
387 tsk->pi_blocked_on, td->bkl); 355 tsk->pi_blocked_on);
388 356
389 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) 357 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
390 curr += sprintf(curr, "%d", td->mutexes[i]); 358 curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..ab449117aaf2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
20/* 20/*
21 * lock->owner state tracking: 21 * lock->owner state tracking:
22 * 22 *
23 * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1 23 * lock->owner holds the task_struct pointer of the owner. Bit 0
24 * are used to keep track of the "owner is pending" and "lock has 24 * is used to keep track of the "lock has waiters" state.
25 * waiters" state.
26 * 25 *
27 * owner bit1 bit0 26 * owner bit0
28 * NULL 0 0 lock is free (fast acquire possible) 27 * NULL 0 lock is free (fast acquire possible)
29 * NULL 0 1 invalid state 28 * NULL 1 lock is free and has waiters and the top waiter
30 * NULL 1 0 Transitional State* 29 * is going to take the lock*
31 * NULL 1 1 invalid state 30 * taskpointer 0 lock is held (fast release possible)
32 * taskpointer 0 0 lock is held (fast release possible) 31 * taskpointer 1 lock is held and has waiters**
33 * taskpointer 0 1 task is pending owner
34 * taskpointer 1 0 lock is held and has waiters
35 * taskpointer 1 1 task is pending owner and lock has more waiters
36 *
37 * Pending ownership is assigned to the top (highest priority)
38 * waiter of the lock, when the lock is released. The thread is woken
39 * up and can now take the lock. Until the lock is taken (bit 0
40 * cleared) a competing higher priority thread can steal the lock
41 * which puts the woken up thread back on the waiters list.
42 * 32 *
43 * The fast atomic compare exchange based acquire and release is only 33 * The fast atomic compare exchange based acquire and release is only
44 * possible when bit 0 and 1 of lock->owner are 0. 34 * possible when bit 0 of lock->owner is 0.
35 *
36 * (*) It also can be a transitional state when grabbing the lock
37 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
38 * we need to set the bit0 before looking at the lock, and the owner may be
39 * NULL in this small time, hence this can be a transitional state.
45 * 40 *
46 * (*) There's a small time where the owner can be NULL and the 41 * (**) There is a small time when bit 0 is set but there are no
47 * "lock has waiters" bit is set. This can happen when grabbing the lock. 42 * waiters. This can happen when grabbing the lock in the slow path.
48 * To prevent a cmpxchg of the owner releasing the lock, we need to set this 43 * To prevent a cmpxchg of the owner releasing the lock, we need to
49 * bit before looking at the lock, hence the reason this is a transitional 44 * set this bit before looking at the lock.
50 * state.
51 */ 45 */
52 46
53static void 47static void
54rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, 48rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
55 unsigned long mask)
56{ 49{
57 unsigned long val = (unsigned long)owner | mask; 50 unsigned long val = (unsigned long)owner;
58 51
59 if (rt_mutex_has_waiters(lock)) 52 if (rt_mutex_has_waiters(lock))
60 val |= RT_MUTEX_HAS_WAITERS; 53 val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
203 * reached or the state of the chain has changed while we 196 * reached or the state of the chain has changed while we
204 * dropped the locks. 197 * dropped the locks.
205 */ 198 */
206 if (!waiter || !waiter->task) 199 if (!waiter)
207 goto out_unlock_pi; 200 goto out_unlock_pi;
208 201
209 /* 202 /*
210 * Check the orig_waiter state. After we dropped the locks, 203 * Check the orig_waiter state. After we dropped the locks,
211 * the previous owner of the lock might have released the lock 204 * the previous owner of the lock might have released the lock.
212 * and made us the pending owner:
213 */ 205 */
214 if (orig_waiter && !orig_waiter->task) 206 if (orig_waiter && !rt_mutex_owner(orig_lock))
215 goto out_unlock_pi; 207 goto out_unlock_pi;
216 208
217 /* 209 /*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
254 246
255 /* Release the task */ 247 /* Release the task */
256 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 248 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
249 if (!rt_mutex_owner(lock)) {
250 /*
251 * If the requeue above changed the top waiter, then we need
252 * to wake the new top waiter up to try to get the lock.
253 */
254
255 if (top_waiter != rt_mutex_top_waiter(lock))
256 wake_up_process(rt_mutex_top_waiter(lock)->task);
257 raw_spin_unlock(&lock->wait_lock);
258 goto out_put_task;
259 }
257 put_task_struct(task); 260 put_task_struct(task);
258 261
259 /* Grab the next task */ 262 /* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
296} 299}
297 300
298/* 301/*
299 * Optimization: check if we can steal the lock from the
300 * assigned pending owner [which might not have taken the
301 * lock yet]:
302 */
303static inline int try_to_steal_lock(struct rt_mutex *lock,
304 struct task_struct *task)
305{
306 struct task_struct *pendowner = rt_mutex_owner(lock);
307 struct rt_mutex_waiter *next;
308 unsigned long flags;
309
310 if (!rt_mutex_owner_pending(lock))
311 return 0;
312
313 if (pendowner == task)
314 return 1;
315
316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
317 if (task->prio >= pendowner->prio) {
318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
319 return 0;
320 }
321
322 /*
323 * Check if a waiter is enqueued on the pending owners
324 * pi_waiters list. Remove it and readjust pending owners
325 * priority.
326 */
327 if (likely(!rt_mutex_has_waiters(lock))) {
328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
329 return 1;
330 }
331
332 /* No chain handling, pending owner is not blocked on anything: */
333 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner);
336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
337
338 /*
339 * We are going to steal the lock and a waiter was
340 * enqueued on the pending owners pi_waiters queue. So
341 * we have to enqueue this waiter into
342 * task->pi_waiters list. This covers the case,
343 * where task is boosted because it holds another
344 * lock and gets unboosted because the booster is
345 * interrupted, so we would delay a waiter with higher
346 * priority as task->normal_prio.
347 *
348 * Note: in the rare case of a SCHED_OTHER task changing
349 * its priority and thus stealing the lock, next->task
350 * might be task:
351 */
352 if (likely(next->task != task)) {
353 raw_spin_lock_irqsave(&task->pi_lock, flags);
354 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task);
356 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
357 }
358 return 1;
359}
360
361/*
362 * Try to take an rt-mutex 302 * Try to take an rt-mutex
363 * 303 *
364 * This fails
365 * - when the lock has a real owner
366 * - when a different pending owner exists and has higher priority than current
367 *
368 * Must be called with lock->wait_lock held. 304 * Must be called with lock->wait_lock held.
305 *
306 * @lock: the lock to be acquired.
307 * @task: the task which wants to acquire the lock
308 * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
369 */ 309 */
370static int try_to_take_rt_mutex(struct rt_mutex *lock) 310static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
311 struct rt_mutex_waiter *waiter)
371{ 312{
372 /* 313 /*
373 * We have to be careful here if the atomic speedups are 314 * We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
390 */ 331 */
391 mark_rt_mutex_waiters(lock); 332 mark_rt_mutex_waiters(lock);
392 333
393 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) 334 if (rt_mutex_owner(lock))
394 return 0; 335 return 0;
395 336
337 /*
338 * It will get the lock because of one of these conditions:
339 * 1) there is no waiter
340 * 2) higher priority than waiters
341 * 3) it is top waiter
342 */
343 if (rt_mutex_has_waiters(lock)) {
344 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
345 if (!waiter || waiter != rt_mutex_top_waiter(lock))
346 return 0;
347 }
348 }
349
350 if (waiter || rt_mutex_has_waiters(lock)) {
351 unsigned long flags;
352 struct rt_mutex_waiter *top;
353
354 raw_spin_lock_irqsave(&task->pi_lock, flags);
355
356 /* remove the queued waiter. */
357 if (waiter) {
358 plist_del(&waiter->list_entry, &lock->wait_list);
359 task->pi_blocked_on = NULL;
360 }
361
362 /*
363 * We have to enqueue the top waiter(if it exists) into
364 * task->pi_waiters list.
365 */
366 if (rt_mutex_has_waiters(lock)) {
367 top = rt_mutex_top_waiter(lock);
368 top->pi_list_entry.prio = top->list_entry.prio;
369 plist_add(&top->pi_list_entry, &task->pi_waiters);
370 }
371 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
372 }
373
396 /* We got the lock. */ 374 /* We got the lock. */
397 debug_rt_mutex_lock(lock); 375 debug_rt_mutex_lock(lock);
398 376
399 rt_mutex_set_owner(lock, current, 0); 377 rt_mutex_set_owner(lock, task);
400 378
401 rt_mutex_deadlock_account_lock(lock, current); 379 rt_mutex_deadlock_account_lock(lock, task);
402 380
403 return 1; 381 return 1;
404} 382}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
436 414
437 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 415 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
438 416
417 if (!owner)
418 return 0;
419
439 if (waiter == rt_mutex_top_waiter(lock)) { 420 if (waiter == rt_mutex_top_waiter(lock)) {
440 raw_spin_lock_irqsave(&owner->pi_lock, flags); 421 raw_spin_lock_irqsave(&owner->pi_lock, flags);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 422 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
472/* 453/*
473 * Wake up the next waiter on the lock. 454 * Wake up the next waiter on the lock.
474 * 455 *
475 * Remove the top waiter from the current tasks waiter list and from 456 * Remove the top waiter from the current tasks waiter list and wake it up.
476 * the lock waiter list. Set it as pending owner. Then wake it up.
477 * 457 *
478 * Called with lock->wait_lock held. 458 * Called with lock->wait_lock held.
479 */ 459 */
480static void wakeup_next_waiter(struct rt_mutex *lock) 460static void wakeup_next_waiter(struct rt_mutex *lock)
481{ 461{
482 struct rt_mutex_waiter *waiter; 462 struct rt_mutex_waiter *waiter;
483 struct task_struct *pendowner;
484 unsigned long flags; 463 unsigned long flags;
485 464
486 raw_spin_lock_irqsave(&current->pi_lock, flags); 465 raw_spin_lock_irqsave(&current->pi_lock, flags);
487 466
488 waiter = rt_mutex_top_waiter(lock); 467 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list);
490 468
491 /* 469 /*
492 * Remove it from current->pi_waiters. We do not adjust a 470 * Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
495 * lock->wait_lock. 473 * lock->wait_lock.
496 */ 474 */
497 plist_del(&waiter->pi_list_entry, &current->pi_waiters); 475 plist_del(&waiter->pi_list_entry, &current->pi_waiters);
498 pendowner = waiter->task;
499 waiter->task = NULL;
500 476
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 477 rt_mutex_set_owner(lock, NULL);
502 478
503 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 479 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
504 480
505 /* 481 wake_up_process(waiter->task);
506 * Clear the pi_blocked_on variable and enqueue a possible
507 * waiter into the pi_waiters list of the pending owner. This
508 * prevents that in case the pending owner gets unboosted a
509 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner.
511 */
512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
513
514 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter);
516 WARN_ON(pendowner->pi_blocked_on->lock != lock);
517
518 pendowner->pi_blocked_on = NULL;
519
520 if (rt_mutex_has_waiters(lock)) {
521 struct rt_mutex_waiter *next;
522
523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 }
526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527
528 wake_up_process(pendowner);
529} 482}
530 483
531/* 484/*
532 * Remove a waiter from a lock 485 * Remove a waiter from a lock and give up
533 * 486 *
534 * Must be called with lock->wait_lock held 487 * Must be called with lock->wait_lock held and
488 * have just failed to try_to_take_rt_mutex().
535 */ 489 */
536static void remove_waiter(struct rt_mutex *lock, 490static void remove_waiter(struct rt_mutex *lock,
537 struct rt_mutex_waiter *waiter) 491 struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
543 497
544 raw_spin_lock_irqsave(&current->pi_lock, flags); 498 raw_spin_lock_irqsave(&current->pi_lock, flags);
545 plist_del(&waiter->list_entry, &lock->wait_list); 499 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 500 current->pi_blocked_on = NULL;
548 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 501 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
549 502
550 if (first && owner != current) { 503 if (!owner)
504 return;
505
506 if (first) {
551 507
552 raw_spin_lock_irqsave(&owner->pi_lock, flags); 508 raw_spin_lock_irqsave(&owner->pi_lock, flags);
553 509
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
614 * or TASK_UNINTERRUPTIBLE) 570 * or TASK_UNINTERRUPTIBLE)
615 * @timeout: the pre-initialized and started timer, or NULL for none 571 * @timeout: the pre-initialized and started timer, or NULL for none
616 * @waiter: the pre-initialized rt_mutex_waiter 572 * @waiter: the pre-initialized rt_mutex_waiter
617 * @detect_deadlock: passed to task_blocks_on_rt_mutex
618 * 573 *
619 * lock->wait_lock must be held by the caller. 574 * lock->wait_lock must be held by the caller.
620 */ 575 */
621static int __sched 576static int __sched
622__rt_mutex_slowlock(struct rt_mutex *lock, int state, 577__rt_mutex_slowlock(struct rt_mutex *lock, int state,
623 struct hrtimer_sleeper *timeout, 578 struct hrtimer_sleeper *timeout,
624 struct rt_mutex_waiter *waiter, 579 struct rt_mutex_waiter *waiter)
625 int detect_deadlock)
626{ 580{
627 int ret = 0; 581 int ret = 0;
628 582
629 for (;;) { 583 for (;;) {
630 /* Try to acquire the lock: */ 584 /* Try to acquire the lock: */
631 if (try_to_take_rt_mutex(lock)) 585 if (try_to_take_rt_mutex(lock, current, waiter))
632 break; 586 break;
633 587
634 /* 588 /*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
645 break; 599 break;
646 } 600 }
647 601
648 /*
649 * waiter->task is NULL the first time we come here and
650 * when we have been woken up by the previous owner
651 * but the lock got stolen by a higher prio task.
652 */
653 if (!waiter->task) {
654 ret = task_blocks_on_rt_mutex(lock, waiter, current,
655 detect_deadlock);
656 /*
657 * If we got woken up by the owner then start loop
658 * all over without going into schedule to try
659 * to get the lock now:
660 */
661 if (unlikely(!waiter->task)) {
662 /*
663 * Reset the return value. We might
664 * have returned with -EDEADLK and the
665 * owner released the lock while we
666 * were walking the pi chain.
667 */
668 ret = 0;
669 continue;
670 }
671 if (unlikely(ret))
672 break;
673 }
674
675 raw_spin_unlock(&lock->wait_lock); 602 raw_spin_unlock(&lock->wait_lock);
676 603
677 debug_rt_mutex_print_deadlock(waiter); 604 debug_rt_mutex_print_deadlock(waiter);
678 605
679 if (waiter->task) 606 schedule_rt_mutex(lock);
680 schedule_rt_mutex(lock);
681 607
682 raw_spin_lock(&lock->wait_lock); 608 raw_spin_lock(&lock->wait_lock);
683 set_current_state(state); 609 set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
698 int ret = 0; 624 int ret = 0;
699 625
700 debug_rt_mutex_init_waiter(&waiter); 626 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL;
702 627
703 raw_spin_lock(&lock->wait_lock); 628 raw_spin_lock(&lock->wait_lock);
704 629
705 /* Try to acquire the lock again: */ 630 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 631 if (try_to_take_rt_mutex(lock, current, NULL)) {
707 raw_spin_unlock(&lock->wait_lock); 632 raw_spin_unlock(&lock->wait_lock);
708 return 0; 633 return 0;
709 } 634 }
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
717 timeout->task = NULL; 642 timeout->task = NULL;
718 } 643 }
719 644
720 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, 645 ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
721 detect_deadlock); 646
647 if (likely(!ret))
648 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
722 649
723 set_current_state(TASK_RUNNING); 650 set_current_state(TASK_RUNNING);
724 651
725 if (unlikely(waiter.task)) 652 if (unlikely(ret))
726 remove_waiter(lock, &waiter); 653 remove_waiter(lock, &waiter);
727 654
728 /* 655 /*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
737 if (unlikely(timeout)) 664 if (unlikely(timeout))
738 hrtimer_cancel(&timeout->timer); 665 hrtimer_cancel(&timeout->timer);
739 666
740 /*
741 * Readjust priority, when we did not get the lock. We might
742 * have been the pending owner and boosted. Since we did not
743 * take the lock, the PI boost has to go.
744 */
745 if (unlikely(ret))
746 rt_mutex_adjust_prio(current);
747
748 debug_rt_mutex_free_waiter(&waiter); 667 debug_rt_mutex_free_waiter(&waiter);
749 668
750 return ret; 669 return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
762 681
763 if (likely(rt_mutex_owner(lock) != current)) { 682 if (likely(rt_mutex_owner(lock) != current)) {
764 683
765 ret = try_to_take_rt_mutex(lock); 684 ret = try_to_take_rt_mutex(lock, current, NULL);
766 /* 685 /*
767 * try_to_take_rt_mutex() sets the lock waiters 686 * try_to_take_rt_mutex() sets the lock waiters
768 * bit unconditionally. Clean this up. 687 * bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
992{ 911{
993 __rt_mutex_init(lock, NULL); 912 __rt_mutex_init(lock, NULL);
994 debug_rt_mutex_proxy_lock(lock, proxy_owner); 913 debug_rt_mutex_proxy_lock(lock, proxy_owner);
995 rt_mutex_set_owner(lock, proxy_owner, 0); 914 rt_mutex_set_owner(lock, proxy_owner);
996 rt_mutex_deadlock_account_lock(lock, proxy_owner); 915 rt_mutex_deadlock_account_lock(lock, proxy_owner);
997} 916}
998 917
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1008 struct task_struct *proxy_owner) 927 struct task_struct *proxy_owner)
1009{ 928{
1010 debug_rt_mutex_proxy_unlock(lock); 929 debug_rt_mutex_proxy_unlock(lock);
1011 rt_mutex_set_owner(lock, NULL, 0); 930 rt_mutex_set_owner(lock, NULL);
1012 rt_mutex_deadlock_account_unlock(proxy_owner); 931 rt_mutex_deadlock_account_unlock(proxy_owner);
1013} 932}
1014 933
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1034 953
1035 raw_spin_lock(&lock->wait_lock); 954 raw_spin_lock(&lock->wait_lock);
1036 955
1037 mark_rt_mutex_waiters(lock); 956 if (try_to_take_rt_mutex(lock, task, NULL)) {
1038
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0);
1043 raw_spin_unlock(&lock->wait_lock); 957 raw_spin_unlock(&lock->wait_lock);
1044 rt_mutex_deadlock_account_lock(lock, task);
1045 return 1; 958 return 1;
1046 } 959 }
1047 960
1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 961 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1049 962
1050 if (ret && !waiter->task) { 963 if (ret && !rt_mutex_owner(lock)) {
1051 /* 964 /*
1052 * Reset the return value. We might have 965 * Reset the return value. We might have
1053 * returned with -EDEADLK and the owner 966 * returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 969 */
1057 ret = 0; 970 ret = 0;
1058 } 971 }
972
973 if (unlikely(ret))
974 remove_waiter(lock, waiter);
975
1059 raw_spin_unlock(&lock->wait_lock); 976 raw_spin_unlock(&lock->wait_lock);
1060 977
1061 debug_rt_mutex_print_deadlock(waiter); 978 debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1110 1027
1111 set_current_state(TASK_INTERRUPTIBLE); 1028 set_current_state(TASK_INTERRUPTIBLE);
1112 1029
1113 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, 1030 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1114 detect_deadlock);
1115 1031
1116 set_current_state(TASK_RUNNING); 1032 set_current_state(TASK_RUNNING);
1117 1033
1118 if (unlikely(waiter->task)) 1034 if (unlikely(ret))
1119 remove_waiter(lock, waiter); 1035 remove_waiter(lock, waiter);
1120 1036
1121 /* 1037 /*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1126 1042
1127 raw_spin_unlock(&lock->wait_lock); 1043 raw_spin_unlock(&lock->wait_lock);
1128 1044
1129 /*
1130 * Readjust priority, when we did not get the lock. We might have been
1131 * the pending owner and boosted. Since we did not take the lock, the
1132 * PI boost has to go.
1133 */
1134 if (unlikely(ret))
1135 rt_mutex_adjust_prio(current);
1136
1137 return ret; 1045 return ret;
1138} 1046}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866af..53a66c85261b 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
91/* 91/*
92 * lock->owner state tracking: 92 * lock->owner state tracking:
93 */ 93 */
94#define RT_MUTEX_OWNER_PENDING 1UL 94#define RT_MUTEX_HAS_WAITERS 1UL
95#define RT_MUTEX_HAS_WAITERS 2UL 95#define RT_MUTEX_OWNER_MASKALL 1UL
96#define RT_MUTEX_OWNER_MASKALL 3UL
97 96
98static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) 97static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
99{ 98{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
101 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); 100 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
102} 101}
103 102
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{
106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108}
109
110static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
111{
112 return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
113}
114
115/* 103/*
116 * PI-futex support (proxy locking functions, etc.): 104 * PI-futex support (proxy locking functions, etc.):
117 */ 105 */
diff --git a/kernel/sched.c b/kernel/sched.c
index 79e611cd83dd..57a18e8d28c8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4189,6 +4189,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
4189{ 4189{
4190 __wake_up_common(q, mode, 1, 0, key); 4190 __wake_up_common(q, mode, 1, 0, key);
4191} 4191}
4192EXPORT_SYMBOL_GPL(__wake_up_locked_key);
4192 4193
4193/** 4194/**
4194 * __wake_up_sync_key - wake up threads blocked on a waitqueue. 4195 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ad6267714c84..01f75a5f17af 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
210 210
211static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 211static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
212{ 212{
213 int this_cpu = smp_processor_id();
214 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 213 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
215 struct sched_rt_entity *rt_se; 214 struct sched_rt_entity *rt_se;
216 215
217 rt_se = rt_rq->tg->rt_se[this_cpu]; 216 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
217
218 rt_se = rt_rq->tg->rt_se[cpu];
218 219
219 if (rt_rq->rt_nr_running) { 220 if (rt_rq->rt_nr_running) {
220 if (rt_se && !on_rt_rq(rt_se)) 221 if (rt_se && !on_rt_rq(rt_se))
@@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
226 227
227static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 228static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
228{ 229{
229 int this_cpu = smp_processor_id();
230 struct sched_rt_entity *rt_se; 230 struct sched_rt_entity *rt_se;
231 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
231 232
232 rt_se = rt_rq->tg->rt_se[this_cpu]; 233 rt_se = rt_rq->tg->rt_se[cpu];
233 234
234 if (rt_se && on_rt_rq(rt_se)) 235 if (rt_se && on_rt_rq(rt_se))
235 dequeue_rt_entity(rt_se); 236 dequeue_rt_entity(rt_se);
@@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
565 if (rt_rq->rt_time || rt_rq->rt_nr_running) 566 if (rt_rq->rt_time || rt_rq->rt_nr_running)
566 idle = 0; 567 idle = 0;
567 raw_spin_unlock(&rt_rq->rt_runtime_lock); 568 raw_spin_unlock(&rt_rq->rt_runtime_lock);
568 } else if (rt_rq->rt_nr_running) 569 } else if (rt_rq->rt_nr_running) {
569 idle = 0; 570 idle = 0;
571 if (!rt_rq_throttled(rt_rq))
572 enqueue = 1;
573 }
570 574
571 if (enqueue) 575 if (enqueue)
572 sched_rt_rq_enqueue(rt_rq); 576 sched_rt_rq_enqueue(rt_rq);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c782fe9924c7..25cc41cd8f33 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open);
186/* fanotify! */ 186/* fanotify! */
187cond_syscall(sys_fanotify_init); 187cond_syscall(sys_fanotify_init);
188cond_syscall(sys_fanotify_mark); 188cond_syscall(sys_fanotify_mark);
189
190/* open by handle */
191cond_syscall(sys_name_to_handle_at);
192cond_syscall(sys_open_by_handle_at);
193cond_syscall(compat_sys_open_by_handle_at);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index daef911cbadb..19b9d85e06cc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -194,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
194static struct ctl_table root_table[]; 194static struct ctl_table root_table[];
195static struct ctl_table_root sysctl_table_root; 195static struct ctl_table_root sysctl_table_root;
196static struct ctl_table_header root_table_header = { 196static struct ctl_table_header root_table_header = {
197 .count = 1, 197 {{.count = 1,
198 .ctl_table = root_table, 198 .ctl_table = root_table,
199 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), 199 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
200 .root = &sysctl_table_root, 200 .root = &sysctl_table_root,
201 .set = &sysctl_table_root.default_set, 201 .set = &sysctl_table_root.default_set,
202}; 202};
@@ -1567,11 +1567,16 @@ void sysctl_head_get(struct ctl_table_header *head)
1567 spin_unlock(&sysctl_lock); 1567 spin_unlock(&sysctl_lock);
1568} 1568}
1569 1569
1570static void free_head(struct rcu_head *rcu)
1571{
1572 kfree(container_of(rcu, struct ctl_table_header, rcu));
1573}
1574
1570void sysctl_head_put(struct ctl_table_header *head) 1575void sysctl_head_put(struct ctl_table_header *head)
1571{ 1576{
1572 spin_lock(&sysctl_lock); 1577 spin_lock(&sysctl_lock);
1573 if (!--head->count) 1578 if (!--head->count)
1574 kfree(head); 1579 call_rcu(&head->rcu, free_head);
1575 spin_unlock(&sysctl_lock); 1580 spin_unlock(&sysctl_lock);
1576} 1581}
1577 1582
@@ -1948,10 +1953,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
1948 start_unregistering(header); 1953 start_unregistering(header);
1949 if (!--header->parent->count) { 1954 if (!--header->parent->count) {
1950 WARN_ON(1); 1955 WARN_ON(1);
1951 kfree(header->parent); 1956 call_rcu(&header->parent->rcu, free_head);
1952 } 1957 }
1953 if (!--header->count) 1958 if (!--header->count)
1954 kfree(header); 1959 call_rcu(&header->rcu, free_head);
1955 spin_unlock(&sysctl_lock); 1960 spin_unlock(&sysctl_lock);
1956} 1961}
1957 1962
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b875bedf7c9a..3b8e028b9601 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) 1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1322{ 1322{
1323 const struct bin_table *table = NULL; 1323 const struct bin_table *table = NULL;
1324 struct nameidata nd;
1325 struct vfsmount *mnt; 1324 struct vfsmount *mnt;
1326 struct file *file; 1325 struct file *file;
1327 ssize_t result; 1326 ssize_t result;
1328 char *pathname; 1327 char *pathname;
1329 int flags; 1328 int flags;
1330 int acc_mode;
1331 1329
1332 pathname = sysctl_getname(name, nlen, &table); 1330 pathname = sysctl_getname(name, nlen, &table);
1333 result = PTR_ERR(pathname); 1331 result = PTR_ERR(pathname);
@@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1337 /* How should the sysctl be accessed? */ 1335 /* How should the sysctl be accessed? */
1338 if (oldval && oldlen && newval && newlen) { 1336 if (oldval && oldlen && newval && newlen) {
1339 flags = O_RDWR; 1337 flags = O_RDWR;
1340 acc_mode = MAY_READ | MAY_WRITE;
1341 } else if (newval && newlen) { 1338 } else if (newval && newlen) {
1342 flags = O_WRONLY; 1339 flags = O_WRONLY;
1343 acc_mode = MAY_WRITE;
1344 } else if (oldval && oldlen) { 1340 } else if (oldval && oldlen) {
1345 flags = O_RDONLY; 1341 flags = O_RDONLY;
1346 acc_mode = MAY_READ;
1347 } else { 1342 } else {
1348 result = 0; 1343 result = 0;
1349 goto out_putname; 1344 goto out_putname;
1350 } 1345 }
1351 1346
1352 mnt = current->nsproxy->pid_ns->proc_mnt; 1347 mnt = current->nsproxy->pid_ns->proc_mnt;
1353 result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); 1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
1354 if (result)
1355 goto out_putname;
1356
1357 result = may_open(&nd.path, acc_mode, flags);
1358 if (result)
1359 goto out_putpath;
1360
1361 file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
1362 result = PTR_ERR(file); 1349 result = PTR_ERR(file);
1363 if (IS_ERR(file)) 1350 if (IS_ERR(file))
1364 goto out_putname; 1351 goto out_putname;
@@ -1370,10 +1357,6 @@ out_putname:
1370 putname(pathname); 1357 putname(pathname);
1371out: 1358out:
1372 return result; 1359 return result;
1373
1374out_putpath:
1375 path_put(&nd.path);
1376 goto out_putname;
1377} 1360}
1378 1361
1379 1362
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d245..3503c17ac1d3 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
404 404
405static struct debug_obj_descr timer_debug_descr; 405static struct debug_obj_descr timer_debug_descr;
406 406
407static void *timer_debug_hint(void *addr)
408{
409 return ((struct timer_list *) addr)->function;
410}
411
407/* 412/*
408 * fixup_init is called when: 413 * fixup_init is called when:
409 * - an active object is initialized 414 * - an active object is initialized
@@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
477 482
478static struct debug_obj_descr timer_debug_descr = { 483static struct debug_obj_descr timer_debug_descr = {
479 .name = "timer_list", 484 .name = "timer_list",
485 .debug_hint = timer_debug_hint,
480 .fixup_init = timer_fixup_init, 486 .fixup_init = timer_fixup_init,
481 .fixup_activate = timer_fixup_activate, 487 .fixup_activate = timer_fixup_activate,
482 .fixup_free = timer_fixup_free, 488 .fixup_free = timer_fixup_free,
@@ -964,6 +970,25 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
964 * add_timer_on(). Upon exit the timer is not queued and the handler is 970 * add_timer_on(). Upon exit the timer is not queued and the handler is
965 * not running on any CPU. 971 * not running on any CPU.
966 * 972 *
973 * Note: You must not hold locks that are held in interrupt context
974 * while calling this function. Even if the lock has nothing to do
975 * with the timer in question. Here's why:
976 *
977 * CPU0 CPU1
978 * ---- ----
979 * <SOFTIRQ>
980 * call_timer_fn();
981 * base->running_timer = mytimer;
982 * spin_lock_irq(somelock);
983 * <IRQ>
984 * spin_lock(somelock);
985 * del_timer_sync(mytimer);
986 * while (base->running_timer == mytimer);
987 *
988 * Now del_timer_sync() will never return and never release somelock.
989 * The interrupt on the other CPU is waiting to grab somelock but
990 * it has interrupted the softirq that CPU0 is waiting to finish.
991 *
967 * The function returns whether it has deactivated a pending timer or not. 992 * The function returns whether it has deactivated a pending timer or not.
968 */ 993 */
969int del_timer_sync(struct timer_list *timer) 994int del_timer_sync(struct timer_list *timer)
@@ -971,6 +996,10 @@ int del_timer_sync(struct timer_list *timer)
971#ifdef CONFIG_LOCKDEP 996#ifdef CONFIG_LOCKDEP
972 unsigned long flags; 997 unsigned long flags;
973 998
999 /*
1000 * If lockdep gives a backtrace here, please reference
1001 * the synchronization rules above.
1002 */
974 local_irq_save(flags); 1003 local_irq_save(flags);
975 lock_map_acquire(&timer->lockdep_map); 1004 lock_map_acquire(&timer->lockdep_map);
976 lock_map_release(&timer->lockdep_map); 1005 lock_map_release(&timer->lockdep_map);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee6578b578ad..b5fe4c00eb3c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -316,6 +316,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
316 316
317static struct debug_obj_descr work_debug_descr; 317static struct debug_obj_descr work_debug_descr;
318 318
319static void *work_debug_hint(void *addr)
320{
321 return ((struct work_struct *) addr)->func;
322}
323
319/* 324/*
320 * fixup_init is called when: 325 * fixup_init is called when:
321 * - an active object is initialized 326 * - an active object is initialized
@@ -387,6 +392,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state)
387 392
388static struct debug_obj_descr work_debug_descr = { 393static struct debug_obj_descr work_debug_descr = {
389 .name = "work_struct", 394 .name = "work_struct",
395 .debug_hint = work_debug_hint,
390 .fixup_init = work_fixup_init, 396 .fixup_init = work_fixup_init,
391 .fixup_activate = work_fixup_activate, 397 .fixup_activate = work_fixup_activate,
392 .fixup_free = work_fixup_free, 398 .fixup_free = work_fixup_free,
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e6..9d86e45086f5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
249 249
250static void debug_print_object(struct debug_obj *obj, char *msg) 250static void debug_print_object(struct debug_obj *obj, char *msg)
251{ 251{
252 struct debug_obj_descr *descr = obj->descr;
252 static int limit; 253 static int limit;
253 254
254 if (limit < 5 && obj->descr != descr_test) { 255 if (limit < 5 && descr != descr_test) {
256 void *hint = descr->debug_hint ?
257 descr->debug_hint(obj->object) : NULL;
255 limit++; 258 limit++;
256 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " 259 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
257 "object type: %s\n", 260 "object type: %s hint: %pS\n",
258 msg, obj_states[obj->state], obj->astate, 261 msg, obj_states[obj->state], obj->astate,
259 obj->descr->name); 262 descr->name, hint);
260 } 263 }
261 debug_objects_warnings++; 264 debug_objects_warnings++;
262} 265}
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d9190..0ae7e6431726 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
28 28
29#ifdef CONFIG_DEBUG_PI_LIST 29#ifdef CONFIG_DEBUG_PI_LIST
30 30
31static struct plist_head test_head;
32
31static void plist_check_prev_next(struct list_head *t, struct list_head *p, 33static void plist_check_prev_next(struct list_head *t, struct list_head *p,
32 struct list_head *n) 34 struct list_head *n)
33{ 35{
@@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top)
54 56
55static void plist_check_head(struct plist_head *head) 57static void plist_check_head(struct plist_head *head)
56{ 58{
57 WARN_ON(!head->rawlock && !head->spinlock); 59 WARN_ON(head != &test_head && !head->rawlock && !head->spinlock);
58 if (head->rawlock) 60 if (head->rawlock)
59 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock)); 61 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
60 if (head->spinlock) 62 if (head->spinlock)
61 WARN_ON_SMP(!spin_is_locked(head->spinlock)); 63 WARN_ON_SMP(!spin_is_locked(head->spinlock));
62 plist_check_list(&head->prio_list); 64 if (!plist_head_empty(head))
65 plist_check_list(&plist_first(head)->prio_list);
63 plist_check_list(&head->node_list); 66 plist_check_list(&head->node_list);
64} 67}
65 68
@@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head)
75 */ 78 */
76void plist_add(struct plist_node *node, struct plist_head *head) 79void plist_add(struct plist_node *node, struct plist_head *head)
77{ 80{
78 struct plist_node *iter; 81 struct plist_node *first, *iter, *prev = NULL;
82 struct list_head *node_next = &head->node_list;
79 83
80 plist_check_head(head); 84 plist_check_head(head);
81 WARN_ON(!plist_node_empty(node)); 85 WARN_ON(!plist_node_empty(node));
86 WARN_ON(!list_empty(&node->prio_list));
87
88 if (plist_head_empty(head))
89 goto ins_node;
82 90
83 list_for_each_entry(iter, &head->prio_list, plist.prio_list) { 91 first = iter = plist_first(head);
84 if (node->prio < iter->prio) 92
85 goto lt_prio; 93 do {
86 else if (node->prio == iter->prio) { 94 if (node->prio < iter->prio) {
87 iter = list_entry(iter->plist.prio_list.next, 95 node_next = &iter->node_list;
88 struct plist_node, plist.prio_list); 96 break;
89 goto eq_prio;
90 } 97 }
91 }
92 98
93lt_prio: 99 prev = iter;
94 list_add_tail(&node->plist.prio_list, &iter->plist.prio_list); 100 iter = list_entry(iter->prio_list.next,
95eq_prio: 101 struct plist_node, prio_list);
96 list_add_tail(&node->plist.node_list, &iter->plist.node_list); 102 } while (iter != first);
103
104 if (!prev || prev->prio != node->prio)
105 list_add_tail(&node->prio_list, &iter->prio_list);
106ins_node:
107 list_add_tail(&node->node_list, node_next);
97 108
98 plist_check_head(head); 109 plist_check_head(head);
99} 110}
@@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
108{ 119{
109 plist_check_head(head); 120 plist_check_head(head);
110 121
111 if (!list_empty(&node->plist.prio_list)) { 122 if (!list_empty(&node->prio_list)) {
112 struct plist_node *next = plist_first(&node->plist); 123 if (node->node_list.next != &head->node_list) {
124 struct plist_node *next;
125
126 next = list_entry(node->node_list.next,
127 struct plist_node, node_list);
113 128
114 list_move_tail(&next->plist.prio_list, &node->plist.prio_list); 129 /* add the next plist_node into prio_list */
115 list_del_init(&node->plist.prio_list); 130 if (list_empty(&next->prio_list))
131 list_add(&next->prio_list, &node->prio_list);
132 }
133 list_del_init(&node->prio_list);
116 } 134 }
117 135
118 list_del_init(&node->plist.node_list); 136 list_del_init(&node->node_list);
119 137
120 plist_check_head(head); 138 plist_check_head(head);
121} 139}
140
141#ifdef CONFIG_DEBUG_PI_LIST
142#include <linux/sched.h>
143#include <linux/module.h>
144#include <linux/init.h>
145
146static struct plist_node __initdata test_node[241];
147
148static void __init plist_test_check(int nr_expect)
149{
150 struct plist_node *first, *prio_pos, *node_pos;
151
152 if (plist_head_empty(&test_head)) {
153 BUG_ON(nr_expect != 0);
154 return;
155 }
156
157 prio_pos = first = plist_first(&test_head);
158 plist_for_each(node_pos, &test_head) {
159 if (nr_expect-- < 0)
160 break;
161 if (node_pos == first)
162 continue;
163 if (node_pos->prio == prio_pos->prio) {
164 BUG_ON(!list_empty(&node_pos->prio_list));
165 continue;
166 }
167
168 BUG_ON(prio_pos->prio > node_pos->prio);
169 BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
170 prio_pos = node_pos;
171 }
172
173 BUG_ON(nr_expect != 0);
174 BUG_ON(prio_pos->prio_list.next != &first->prio_list);
175}
176
177static int __init plist_test(void)
178{
179 int nr_expect = 0, i, loop;
180 unsigned int r = local_clock();
181
182 printk(KERN_INFO "start plist test\n");
183 plist_head_init(&test_head, NULL);
184 for (i = 0; i < ARRAY_SIZE(test_node); i++)
185 plist_node_init(test_node + i, 0);
186
187 for (loop = 0; loop < 1000; loop++) {
188 r = r * 193939 % 47629;
189 i = r % ARRAY_SIZE(test_node);
190 if (plist_node_empty(test_node + i)) {
191 r = r * 193939 % 47629;
192 test_node[i].prio = r % 99;
193 plist_add(test_node + i, &test_head);
194 nr_expect++;
195 } else {
196 plist_del(test_node + i, &test_head);
197 nr_expect--;
198 }
199 plist_test_check(nr_expect);
200 }
201
202 for (i = 0; i < ARRAY_SIZE(test_node); i++) {
203 if (plist_node_empty(test_node + i))
204 continue;
205 plist_del(test_node + i, &test_head);
206 nr_expect--;
207 plist_test_check(nr_expect);
208 }
209
210 printk(KERN_INFO "end plist test\n");
211 return 0;
212}
213
214module_init(plist_test);
215
216#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
222/* 222/*
223 * wait for the read lock to be granted 223 * wait for the read lock to be granted
224 */ 224 */
225asmregparm struct rw_semaphore __sched * 225struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
226rwsem_down_read_failed(struct rw_semaphore *sem)
227{ 226{
228 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, 227 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
229 -RWSEM_ACTIVE_READ_BIAS); 228 -RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
232/* 231/*
233 * wait for the write lock to be granted 232 * wait for the write lock to be granted
234 */ 233 */
235asmregparm struct rw_semaphore __sched * 234struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
236rwsem_down_write_failed(struct rw_semaphore *sem)
237{ 235{
238 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, 236 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
239 -RWSEM_ACTIVE_WRITE_BIAS); 237 -RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
243 * handle waking up a waiter on the semaphore 241 * handle waking up a waiter on the semaphore
244 * - up_read/up_write has decremented the active part of count if we come here 242 * - up_read/up_write has decremented the active part of count if we come here
245 */ 243 */
246asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) 244struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
247{ 245{
248 unsigned long flags; 246 unsigned long flags;
249 247
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
263 * - caller incremented waiting part of count and discovered it still negative 261 * - caller incremented waiting part of count and discovered it still negative
264 * - just wake up any readers at the front of the queue 262 * - just wake up any readers at the front of the queue
265 */ 263 */
266asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) 264struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
267{ 265{
268 unsigned long flags; 266 unsigned long flags;
269 267
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dbe99a5f2073..113e35c47502 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1762,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
1762#ifndef CONFIG_NUMA 1762#ifndef CONFIG_NUMA
1763 VM_BUG_ON(!*hpage); 1763 VM_BUG_ON(!*hpage);
1764 new_page = *hpage; 1764 new_page = *hpage;
1765 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1766 up_read(&mm->mmap_sem);
1767 return;
1768 }
1765#else 1769#else
1766 VM_BUG_ON(*hpage); 1770 VM_BUG_ON(*hpage);
1767 /* 1771 /*
@@ -1781,12 +1785,12 @@ static void collapse_huge_page(struct mm_struct *mm,
1781 *hpage = ERR_PTR(-ENOMEM); 1785 *hpage = ERR_PTR(-ENOMEM);
1782 return; 1786 return;
1783 } 1787 }
1784#endif
1785 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1788 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1786 up_read(&mm->mmap_sem); 1789 up_read(&mm->mmap_sem);
1787 put_page(new_page); 1790 put_page(new_page);
1788 return; 1791 return;
1789 } 1792 }
1793#endif
1790 1794
1791 /* after allocating the hugepage upgrade to mmap_sem write mode */ 1795 /* after allocating the hugepage upgrade to mmap_sem write mode */
1792 up_read(&mm->mmap_sem); 1796 up_read(&mm->mmap_sem);
diff --git a/mm/rmap.c b/mm/rmap.c
index f21f4a1d6a1c..941bf82e8961 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
497 struct mm_struct *mm = vma->vm_mm; 497 struct mm_struct *mm = vma->vm_mm;
498 int referenced = 0; 498 int referenced = 0;
499 499
500 /*
501 * Don't want to elevate referenced for mlocked page that gets this far,
502 * in order that it progresses to try_to_unmap and is moved to the
503 * unevictable list.
504 */
505 if (vma->vm_flags & VM_LOCKED) {
506 *mapcount = 0; /* break early from loop */
507 *vm_flags |= VM_LOCKED;
508 goto out;
509 }
510
511 /* Pretend the page is referenced if the task has the
512 swap token and is in the middle of a page fault. */
513 if (mm != current->mm && has_swap_token(mm) &&
514 rwsem_is_locked(&mm->mmap_sem))
515 referenced++;
516
517 if (unlikely(PageTransHuge(page))) { 500 if (unlikely(PageTransHuge(page))) {
518 pmd_t *pmd; 501 pmd_t *pmd;
519 502
520 spin_lock(&mm->page_table_lock); 503 spin_lock(&mm->page_table_lock);
504 /*
505 * rmap might return false positives; we must filter
506 * these out using page_check_address_pmd().
507 */
521 pmd = page_check_address_pmd(page, mm, address, 508 pmd = page_check_address_pmd(page, mm, address,
522 PAGE_CHECK_ADDRESS_PMD_FLAG); 509 PAGE_CHECK_ADDRESS_PMD_FLAG);
523 if (pmd && !pmd_trans_splitting(*pmd) && 510 if (!pmd) {
524 pmdp_clear_flush_young_notify(vma, address, pmd)) 511 spin_unlock(&mm->page_table_lock);
512 goto out;
513 }
514
515 if (vma->vm_flags & VM_LOCKED) {
516 spin_unlock(&mm->page_table_lock);
517 *mapcount = 0; /* break early from loop */
518 *vm_flags |= VM_LOCKED;
519 goto out;
520 }
521
522 /* go ahead even if the pmd is pmd_trans_splitting() */
523 if (pmdp_clear_flush_young_notify(vma, address, pmd))
525 referenced++; 524 referenced++;
526 spin_unlock(&mm->page_table_lock); 525 spin_unlock(&mm->page_table_lock);
527 } else { 526 } else {
528 pte_t *pte; 527 pte_t *pte;
529 spinlock_t *ptl; 528 spinlock_t *ptl;
530 529
530 /*
531 * rmap might return false positives; we must filter
532 * these out using page_check_address().
533 */
531 pte = page_check_address(page, mm, address, &ptl, 0); 534 pte = page_check_address(page, mm, address, &ptl, 0);
532 if (!pte) 535 if (!pte)
533 goto out; 536 goto out;
534 537
538 if (vma->vm_flags & VM_LOCKED) {
539 pte_unmap_unlock(pte, ptl);
540 *mapcount = 0; /* break early from loop */
541 *vm_flags |= VM_LOCKED;
542 goto out;
543 }
544
535 if (ptep_clear_flush_young_notify(vma, address, pte)) { 545 if (ptep_clear_flush_young_notify(vma, address, pte)) {
536 /* 546 /*
537 * Don't treat a reference through a sequentially read 547 * Don't treat a reference through a sequentially read
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
546 pte_unmap_unlock(pte, ptl); 556 pte_unmap_unlock(pte, ptl);
547 } 557 }
548 558
559 /* Pretend the page is referenced if the task has the
560 swap token and is in the middle of a page fault. */
561 if (mm != current->mm && has_swap_token(mm) &&
562 rwsem_is_locked(&mm->mmap_sem))
563 referenced++;
564
549 (*mapcount)--; 565 (*mapcount)--;
550 566
551 if (referenced) 567 if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c990602..3437b65d6d6e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2144,8 +2144,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2144{ 2144{
2145 struct inode *inode = dentry->d_inode; 2145 struct inode *inode = dentry->d_inode;
2146 2146
2147 if (*len < 3) 2147 if (*len < 3) {
2148 *len = 3;
2148 return 255; 2149 return 255;
2150 }
2149 2151
2150 if (inode_unhashed(inode)) { 2152 if (inode_unhashed(inode)) {
2151 /* Unfortunately insert_inode_hash is not idempotent, 2153 /* Unfortunately insert_inode_hash is not idempotent,
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c53..a51d9465e628 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
19obj-$(CONFIG_INET) += ipv4/ 19obj-$(CONFIG_INET) += ipv4/
20obj-$(CONFIG_XFRM) += xfrm/ 20obj-$(CONFIG_XFRM) += xfrm/
21obj-$(CONFIG_UNIX) += unix/ 21obj-$(CONFIG_UNIX) += unix/
22ifneq ($(CONFIG_IPV6),) 22obj-$(CONFIG_NET) += ipv6/
23obj-y += ipv6/
24endif
25obj-$(CONFIG_PACKET) += packet/ 23obj-$(CONFIG_PACKET) += packet/
26obj-$(CONFIG_NET_KEY) += key/ 24obj-$(CONFIG_NET_KEY) += key/
27obj-$(CONFIG_BRIDGE) += bridge/ 25obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb4..6dee7bf648a9 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
6 tristate "802.1d Ethernet Bridging" 6 tristate "802.1d Ethernet Bridging"
7 select LLC 7 select LLC
8 select STP 8 select STP
9 depends on IPV6 || IPV6=n
9 ---help--- 10 ---help---
10 If you say Y here, then your Linux box will be able to act as an 11 If you say Y here, then your Linux box will be able to act as an
11 Ethernet bridge, which means that the different Ethernet segments it 12 Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc2..6561021d22d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1114void dev_load(struct net *net, const char *name)
1115{ 1115{
1116 struct net_device *dev; 1116 struct net_device *dev;
1117 int no_module;
1117 1118
1118 rcu_read_lock(); 1119 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1120 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1121 rcu_read_unlock();
1121 1122
1122 if (!dev && capable(CAP_NET_ADMIN)) 1123 no_module = !dev;
1123 request_module("%s", name); 1124 if (no_module && capable(CAP_NET_ADMIN))
1125 no_module = request_module("netdev-%s", name);
1126 if (no_module && capable(CAP_SYS_MODULE)) {
1127 if (!request_module("%s", name))
1128 pr_err("Loading kernel module for a network device "
1129"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1130"instead\n", name);
1131 }
1124} 1132}
1125EXPORT_SYMBOL(dev_load); 1133EXPORT_SYMBOL(dev_load);
1126 1134
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461f..b5bada92f637 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
3321 pkt_dev->started_at); 3321 pkt_dev->started_at);
3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); 3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
3323 3323
3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", 3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
3325 (unsigned long long)ktime_to_us(elapsed), 3325 (unsigned long long)ktime_to_us(elapsed),
3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), 3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
3327 (unsigned long long)ktime_to_us(idle), 3327 (unsigned long long)ktime_to_us(idle),
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
95 int fd = fdp[i]; 95 int fd = fdp[i];
96 struct file *file; 96 struct file *file;
97 97
98 if (fd < 0 || !(file = fget(fd))) 98 if (fd < 0 || !(file = fget_raw(fd)))
99 return -EBADF; 99 return -EBADF;
100 *fpp++ = file; 100 *fpp++ = file;
101 fpl->count++; 101 fpl->count++;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce929..036652c8166d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
670 ifap = &ifa->ifa_next) { 670 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr == 672 sin_orig.sin_addr.s_addr ==
673 ifa->ifa_address) { 673 ifa->ifa_local) {
674 break; /* found */ 674 break; /* found */
675 } 675 }
676 } 676 }
@@ -1040,8 +1040,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
1040 return; 1040 return;
1041 1041
1042 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1042 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043 ifa->ifa_address, dev, 1043 ifa->ifa_local, dev,
1044 ifa->ifa_address, NULL, 1044 ifa->ifa_local, NULL,
1045 dev->dev_addr, NULL); 1045 dev->dev_addr, NULL);
1046} 1046}
1047 1047
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6613edfac28c..d1d0e2c256fc 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1765,4 +1765,4 @@ module_exit(ipgre_fini);
1765MODULE_LICENSE("GPL"); 1765MODULE_LICENSE("GPL");
1766MODULE_ALIAS_RTNL_LINK("gre"); 1766MODULE_ALIAS_RTNL_LINK("gre");
1767MODULE_ALIAS_RTNL_LINK("gretap"); 1767MODULE_ALIAS_RTNL_LINK("gretap");
1768MODULE_ALIAS("gre0"); 1768MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54a..a5f58e7cbb26 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,4 +913,4 @@ static void __exit ipip_fini(void)
913module_init(ipip_init); 913module_init(ipip_init);
914module_exit(ipip_fini); 914module_exit(ipip_fini);
915MODULE_LICENSE("GPL"); 915MODULE_LICENSE("GPL");
916MODULE_ALIAS("tunl0"); 916MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697bd..e528a42a52be 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
57MODULE_AUTHOR("Ville Nuorvala"); 57MODULE_AUTHOR("Ville Nuorvala");
58MODULE_DESCRIPTION("IPv6 tunneling device"); 58MODULE_DESCRIPTION("IPv6 tunneling device");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
60MODULE_ALIAS_NETDEV("ip6tnl0");
60 61
61#ifdef IP6_TNL_DEBUG 62#ifdef IP6_TNL_DEBUG
62#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 904312e25a3c..e7db7014e89f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -739,8 +739,10 @@ restart:
739 739
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
742 else 742 else if (!(rt->dst.flags & DST_HOST))
743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
744 else
745 goto out2;
744 746
745 dst_release(&rt->dst); 747 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry; 748 rt = nrt ? : net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a547..d2c16e10f650 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1290,4 +1290,4 @@ static int __init sit_init(void)
1290module_init(sit_init); 1290module_init(sit_init);
1291module_exit(sit_cleanup); 1291module_exit(sit_cleanup);
1292MODULE_LICENSE("GPL"); 1292MODULE_LICENSE("GPL");
1293MODULE_ALIAS("sit0"); 1293MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421bc..c47a511f203d 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
551 if (conn->c_loopback 551 if (conn->c_loopback
552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
554 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 554 scat = &rm->data.op_sg[sg];
555 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
556 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
557 return ret;
555 } 558 }
556 559
557 /* FIXME we may overallocate here */ 560 /* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b17..bca6761a3ca2 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 struct scatterlist *sgp = &rm->data.op_sg[sg];
65 int ret = sizeof(struct rds_header) +
66 be32_to_cpu(rm->m_inc.i_hdr.h_len);
67
64 /* Do not send cong updates to loopback */ 68 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 69 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 70 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 71 ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
72 goto out;
68 } 73 }
69 74
70 BUG_ON(hdr_off || sg || off); 75 BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
80 NULL); 85 NULL);
81 86
82 rds_inc_put(&rm->m_inc); 87 rds_inc_put(&rm->m_inc);
83 88out:
84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 89 return ret;
85} 90}
86 91
87/* 92/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164e..59e599498e37 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
252 252
253/* 253/*
254 * Mark an RPC call as having completed by clearing the 'active' bit 254 * Mark an RPC call as having completed by clearing the 'active' bit
255 * and then waking up all tasks that were sleeping.
255 */ 256 */
256static void rpc_mark_complete_task(struct rpc_task *task) 257static int rpc_complete_task(struct rpc_task *task)
257{ 258{
258 smp_mb__before_clear_bit(); 259 void *m = &task->tk_runstate;
260 wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
261 struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
262 unsigned long flags;
263 int ret;
264
265 spin_lock_irqsave(&wq->lock, flags);
259 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 266 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
260 smp_mb__after_clear_bit(); 267 ret = atomic_dec_and_test(&task->tk_count);
261 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); 268 if (waitqueue_active(wq))
269 __wake_up_locked_key(wq, TASK_NORMAL, &k);
270 spin_unlock_irqrestore(&wq->lock, flags);
271 return ret;
262} 272}
263 273
264/* 274/*
265 * Allow callers to wait for completion of an RPC call 275 * Allow callers to wait for completion of an RPC call
276 *
277 * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
278 * to enforce taking of the wq->lock and hence avoid races with
279 * rpc_complete_task().
266 */ 280 */
267int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) 281int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
268{ 282{
269 if (action == NULL) 283 if (action == NULL)
270 action = rpc_wait_bit_killable; 284 action = rpc_wait_bit_killable;
271 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, 285 return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
272 action, TASK_KILLABLE); 286 action, TASK_KILLABLE);
273} 287}
274EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); 288EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work)
857 rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); 871 rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
858} 872}
859 873
860void rpc_put_task(struct rpc_task *task) 874static void rpc_release_resources_task(struct rpc_task *task)
861{ 875{
862 if (!atomic_dec_and_test(&task->tk_count))
863 return;
864 /* Release resources */
865 if (task->tk_rqstp) 876 if (task->tk_rqstp)
866 xprt_release(task); 877 xprt_release(task);
867 if (task->tk_msg.rpc_cred) 878 if (task->tk_msg.rpc_cred)
868 put_rpccred(task->tk_msg.rpc_cred); 879 put_rpccred(task->tk_msg.rpc_cred);
869 rpc_task_release_client(task); 880 rpc_task_release_client(task);
870 if (task->tk_workqueue != NULL) { 881}
882
883static void rpc_final_put_task(struct rpc_task *task,
884 struct workqueue_struct *q)
885{
886 if (q != NULL) {
871 INIT_WORK(&task->u.tk_work, rpc_async_release); 887 INIT_WORK(&task->u.tk_work, rpc_async_release);
872 queue_work(task->tk_workqueue, &task->u.tk_work); 888 queue_work(q, &task->u.tk_work);
873 } else 889 } else
874 rpc_free_task(task); 890 rpc_free_task(task);
875} 891}
892
893static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
894{
895 if (atomic_dec_and_test(&task->tk_count)) {
896 rpc_release_resources_task(task);
897 rpc_final_put_task(task, q);
898 }
899}
900
901void rpc_put_task(struct rpc_task *task)
902{
903 rpc_do_put_task(task, NULL);
904}
876EXPORT_SYMBOL_GPL(rpc_put_task); 905EXPORT_SYMBOL_GPL(rpc_put_task);
877 906
907void rpc_put_task_async(struct rpc_task *task)
908{
909 rpc_do_put_task(task, task->tk_workqueue);
910}
911EXPORT_SYMBOL_GPL(rpc_put_task_async);
912
878static void rpc_release_task(struct rpc_task *task) 913static void rpc_release_task(struct rpc_task *task)
879{ 914{
880 dprintk("RPC: %5u release task\n", task->tk_pid); 915 dprintk("RPC: %5u release task\n", task->tk_pid);
881 916
882 BUG_ON (RPC_IS_QUEUED(task)); 917 BUG_ON (RPC_IS_QUEUED(task));
883 918
884 /* Wake up anyone who is waiting for task completion */ 919 rpc_release_resources_task(task);
885 rpc_mark_complete_task(task);
886 920
887 rpc_put_task(task); 921 /*
922 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
923 * so it should be safe to use task->tk_count as a test for whether
924 * or not any other processes still hold references to our rpc_task.
925 */
926 if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
927 /* Wake up anyone who may be waiting for task completion */
928 if (!rpc_complete_task(task))
929 return;
930 } else {
931 if (!atomic_dec_and_test(&task->tk_count))
932 return;
933 }
934 rpc_final_put_task(task, task->tk_workqueue);
888} 935}
889 936
890int rpciod_up(void) 937int rpciod_up(void)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912a..1a10dcd999ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1335 p, 0, length, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1337 put_page(p); 1337 put_page(p);
1338 svc_rdma_put_context(ctxt, 1);
1338 return; 1339 return;
1339 } 1340 }
1340 atomic_inc(&xprt->sc_dma_used); 1341 atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a57960..be96d429b475 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1631 } 1631 }
1632 xs_reclassify_socket(family, sock); 1632 xs_reclassify_socket(family, sock);
1633 1633
1634 if (xs_bind(transport, sock)) { 1634 err = xs_bind(transport, sock);
1635 if (err) {
1635 sock_release(sock); 1636 sock_release(sock);
1636 goto out; 1637 goto out;
1637 } 1638 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d286204..ba5b8c208498 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
850 * Get the parent directory, calculate the hash for last 850 * Get the parent directory, calculate the hash for last
851 * component. 851 * component.
852 */ 852 */
853 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 853 err = kern_path_parent(sunaddr->sun_path, &nd);
854 if (err) 854 if (err)
855 goto out_mknod_parent; 855 goto out_mknod_parent;
856 856
@@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1724 1724
1725 msg->msg_namelen = 0; 1725 msg->msg_namelen = 0;
1726 1726
1727 mutex_lock(&u->readlock); 1727 err = mutex_lock_interruptible(&u->readlock);
1728 if (err) {
1729 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1730 goto out;
1731 }
1728 1732
1729 skb = skb_recv_datagram(sk, flags, noblock, &err); 1733 skb = skb_recv_datagram(sk, flags, noblock, &err);
1730 if (!skb) { 1734 if (!skb) {
@@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1864 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1868 memset(&tmp_scm, 0, sizeof(tmp_scm));
1865 } 1869 }
1866 1870
1867 mutex_lock(&u->readlock); 1871 err = mutex_lock_interruptible(&u->readlock);
1872 if (err) {
1873 err = sock_intr_errno(timeo);
1874 goto out;
1875 }
1868 1876
1869 do { 1877 do {
1870 int chunk; 1878 int chunk;
@@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1895 1903
1896 timeo = unix_stream_data_wait(sk, timeo); 1904 timeo = unix_stream_data_wait(sk, timeo);
1897 1905
1898 if (signal_pending(current)) { 1906 if (signal_pending(current)
1907 || mutex_lock_interruptible(&u->readlock)) {
1899 err = sock_intr_errno(timeo); 1908 err = sock_intr_errno(timeo);
1900 goto out; 1909 goto out;
1901 } 1910 }
1902 mutex_lock(&u->readlock); 1911
1903 continue; 1912 continue;
1904 unlock: 1913 unlock:
1905 unix_state_unlock(sk); 1914 unix_state_unlock(sk);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
104 /* 104 /*
105 * Socket ? 105 * Socket ?
106 */ 106 */
107 if (S_ISSOCK(inode->i_mode)) { 107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 108 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 109 struct sock *s = sock->sk;
110 110
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 6c94c6ce2925..291228e25984 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -309,6 +309,11 @@ static void do_config_file(const char *filename)
309 close(fd); 309 close(fd);
310} 310}
311 311
312/*
313 * Important: The below generated source_foo.o and deps_foo.o variable
314 * assignments are parsed not only by make, but also by the rather simple
315 * parser in scripts/mod/sumversion.c.
316 */
312static void parse_dep_file(void *map, size_t len) 317static void parse_dep_file(void *map, size_t len)
313{ 318{
314 char *m = map; 319 char *m = map;
@@ -323,7 +328,6 @@ static void parse_dep_file(void *map, size_t len)
323 exit(1); 328 exit(1);
324 } 329 }
325 memcpy(s, m, p-m); s[p-m] = 0; 330 memcpy(s, m, p-m); s[p-m] = 0;
326 printf("deps_%s := \\\n", target);
327 m = p+1; 331 m = p+1;
328 332
329 clear_config(); 333 clear_config();
@@ -343,12 +347,15 @@ static void parse_dep_file(void *map, size_t len)
343 strrcmp(s, "arch/um/include/uml-config.h") && 347 strrcmp(s, "arch/um/include/uml-config.h") &&
344 strrcmp(s, ".ver")) { 348 strrcmp(s, ".ver")) {
345 /* 349 /*
346 * Do not output the first dependency (the 350 * Do not list the source file as dependency, so that
347 * source file), so that kbuild is not confused 351 * kbuild is not confused if a .c file is rewritten
348 * if a .c file is rewritten into .S or vice 352 * into .S or vice versa. Storing it in source_* is
349 * versa. 353 * needed for modpost to compute srcversions.
350 */ 354 */
351 if (!first) 355 if (first) {
356 printf("source_%s := %s\n\n", target, s);
357 printf("deps_%s := \\\n", target);
358 } else
352 printf(" %s \\\n", s); 359 printf(" %s \\\n", s);
353 do_config_file(s); 360 do_config_file(s);
354 } 361 }
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9a..58848e3e392c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
2654 WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); 2654 WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
2655 } 2655 }
2656 2656
2657# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
2658 if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
2659 ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
2660 }
2661
2662# warn about #if 0 2657# warn about #if 0
2663 if ($line =~ /^.\s*\#\s*if\s+0\b/) { 2658 if ($line =~ /^.\s*\#\s*if\s+0\b/) {
2664 CHK("if this code is redundant consider removing it\n" . 2659 CHK("if this code is redundant consider removing it\n" .
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index ecf9c7dc1825..9dfcd6d988da 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -300,8 +300,8 @@ static int is_static_library(const char *objfile)
300 return 0; 300 return 0;
301} 301}
302 302
303/* We have dir/file.o. Open dir/.file.o.cmd, look for deps_ line to 303/* We have dir/file.o. Open dir/.file.o.cmd, look for source_ and deps_ line
304 * figure out source file. */ 304 * to figure out source files. */
305static int parse_source_files(const char *objfile, struct md4_ctx *md) 305static int parse_source_files(const char *objfile, struct md4_ctx *md)
306{ 306{
307 char *cmd, *file, *line, *dir; 307 char *cmd, *file, *line, *dir;
@@ -340,6 +340,21 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
340 */ 340 */
341 while ((line = get_next_line(&pos, file, flen)) != NULL) { 341 while ((line = get_next_line(&pos, file, flen)) != NULL) {
342 char* p = line; 342 char* p = line;
343
344 if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
345 p = strrchr(line, ' ');
346 if (!p) {
347 warn("malformed line: %s\n", line);
348 goto out_file;
349 }
350 p++;
351 if (!parse_file(p, md)) {
352 warn("could not open %s: %s\n",
353 p, strerror(errno));
354 goto out_file;
355 }
356 continue;
357 }
343 if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) { 358 if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) {
344 check_files = 1; 359 check_files = 1;
345 continue; 360 continue;
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 44423b4dcb82..8c81d76959ee 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py
@@ -33,8 +33,6 @@ cmd_opcodes = {
33 "lockintnowait" : "6", 33 "lockintnowait" : "6",
34 "lockcont" : "7", 34 "lockcont" : "7",
35 "unlock" : "8", 35 "unlock" : "8",
36 "lockbkl" : "9",
37 "unlockbkl" : "10",
38 "signal" : "11", 36 "signal" : "11",
39 "resetevent" : "98", 37 "resetevent" : "98",
40 "reset" : "99", 38 "reset" : "99",
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
index 8821f27cc8be..3710c8b2090d 100644
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
index cde1f189a02b..b4cc95975adb 100644
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ b/scripts/rt-tester/t2-l1-pi.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
index 3ab0bfc49950..1b57376cc1f7 100644
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ b/scripts/rt-tester/t2-l1-signal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
index f4b5d5d6215f..68b10629b6f4 100644
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
index 63440ca2cce9..8e6c8b11ae56 100644
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-1rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
index e5816fe67df3..69c2212fc520 100644
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-2rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
index 718b82b5d3bb..9b0f1eb26a88 100644
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-3rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
index c6e213563498..39ec74ab06ee 100644
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ b/scripts/rt-tester/t3-l1-pi-signal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
index f53749d59d79..e03db7e010fa 100644
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ b/scripts/rt-tester/t3-l1-pi-steal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
index cdc3e4fd7bac..7b59100d3e48 100644
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ b/scripts/rt-tester/t3-l2-pi.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
index baa14137f473..2f0e049d6443 100644
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ b/scripts/rt-tester/t4-l2-pi-deboost.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
index e6ec0c81b54d..04f4034ff895 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
index ca64f8bbf4bc..a48a6ee29ddc 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
index 4bbc3442703f..8dfb0a0da673 100644
--- a/sound/soc/codecs/wm8978.c
+++ b/sound/soc/codecs/wm8978.c
@@ -145,18 +145,18 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
145 SOC_SINGLE("DAC Playback Limiter Threshold", 145 SOC_SINGLE("DAC Playback Limiter Threshold",
146 WM8978_DAC_LIMITER_2, 4, 7, 0), 146 WM8978_DAC_LIMITER_2, 4, 7, 0),
147 SOC_SINGLE("DAC Playback Limiter Boost", 147 SOC_SINGLE("DAC Playback Limiter Boost",
148 WM8978_DAC_LIMITER_2, 0, 15, 0), 148 WM8978_DAC_LIMITER_2, 0, 12, 0),
149 149
150 SOC_ENUM("ALC Enable Switch", alc1), 150 SOC_ENUM("ALC Enable Switch", alc1),
151 SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0), 151 SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0),
152 SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0), 152 SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0),
153 153
154 SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 7, 0), 154 SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 10, 0),
155 SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0), 155 SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0),
156 156
157 SOC_ENUM("ALC Capture Mode", alc3), 157 SOC_ENUM("ALC Capture Mode", alc3),
158 SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 15, 0), 158 SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 10, 0),
159 SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 15, 0), 159 SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 10, 0),
160 160
161 SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0), 161 SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0),
162 SOC_SINGLE("ALC Capture Noise Gate Threshold", 162 SOC_SINGLE("ALC Capture Noise Gate Threshold",
@@ -211,8 +211,10 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
211 WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1), 211 WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1),
212 212
213 /* DAC / ADC oversampling */ 213 /* DAC / ADC oversampling */
214 SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, 8, 1, 0), 214 SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL,
215 SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, 8, 1, 0), 215 5, 1, 0),
216 SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL,
217 5, 1, 0),
216}; 218};
217 219
218/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */ 220/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 4afbe3b2e443..c6c958ee5d59 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -1418,7 +1418,7 @@ SND_SOC_DAPM_DAC_E("DAC1R", NULL, SND_SOC_NOPM, 0, 0,
1418 1418
1419static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = { 1419static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = {
1420SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0), 1420SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
1421SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0), 1421SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
1422SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0), 1422SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
1423SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0), 1423SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
1424}; 1424};
@@ -3325,6 +3325,12 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
3325 case WM8958: 3325 case WM8958:
3326 snd_soc_add_controls(codec, wm8958_snd_controls, 3326 snd_soc_add_controls(codec, wm8958_snd_controls,
3327 ARRAY_SIZE(wm8958_snd_controls)); 3327 ARRAY_SIZE(wm8958_snd_controls));
3328 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
3329 ARRAY_SIZE(wm8994_lateclk_widgets));
3330 snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
3331 ARRAY_SIZE(wm8994_adc_widgets));
3332 snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
3333 ARRAY_SIZE(wm8994_dac_widgets));
3328 snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets, 3334 snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
3329 ARRAY_SIZE(wm8958_dapm_widgets)); 3335 ARRAY_SIZE(wm8958_dapm_widgets));
3330 break; 3336 break;
@@ -3350,6 +3356,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
3350 } 3356 }
3351 break; 3357 break;
3352 case WM8958: 3358 case WM8958:
3359 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
3360 ARRAY_SIZE(wm8994_lateclk_intercon));
3353 snd_soc_dapm_add_routes(dapm, wm8958_intercon, 3361 snd_soc_dapm_add_routes(dapm, wm8958_intercon,
3354 ARRAY_SIZE(wm8958_intercon)); 3362 ARRAY_SIZE(wm8958_intercon));
3355 break; 3363 break;
diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c
index 161750443ebc..73dde4a1adc3 100644
--- a/sound/soc/omap/am3517evm.c
+++ b/sound/soc/omap/am3517evm.c
@@ -139,7 +139,7 @@ static struct snd_soc_dai_link am3517evm_dai = {
139 .cpu_dai_name ="omap-mcbsp-dai.0", 139 .cpu_dai_name ="omap-mcbsp-dai.0",
140 .codec_dai_name = "tlv320aic23-hifi", 140 .codec_dai_name = "tlv320aic23-hifi",
141 .platform_name = "omap-pcm-audio", 141 .platform_name = "omap-pcm-audio",
142 .codec_name = "tlv320aic23-codec", 142 .codec_name = "tlv320aic23-codec.2-001a",
143 .init = am3517evm_aic23_init, 143 .init = am3517evm_aic23_init,
144 .ops = &am3517evm_ops, 144 .ops = &am3517evm_ops,
145}; 145};
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 25e54230cc6a..1790f83ee665 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -941,7 +941,7 @@ static void dapm_seq_run(struct snd_soc_dapm_context *dapm,
941 } 941 }
942 942
943 if (!list_empty(&pending)) 943 if (!list_empty(&pending))
944 dapm_seq_run_coalesced(dapm, &pending); 944 dapm_seq_run_coalesced(cur_dapm, &pending);
945} 945}
946 946
947static void dapm_widget_update(struct snd_soc_dapm_context *dapm) 947static void dapm_widget_update(struct snd_soc_dapm_context *dapm)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 5a72d421e211..e5230c0ef95b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -189,11 +189,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
189 const char *name, bool is_kallsyms) 189 const char *name, bool is_kallsyms)
190{ 190{
191 const size_t size = PATH_MAX; 191 const size_t size = PATH_MAX;
192 char *realname = realpath(name, NULL), 192 char *realname, *filename = malloc(size),
193 *filename = malloc(size),
194 *linkname = malloc(size), *targetname; 193 *linkname = malloc(size), *targetname;
195 int len, err = -1; 194 int len, err = -1;
196 195
196 if (is_kallsyms)
197 realname = (char *)name;
198 else
199 realname = realpath(name, NULL);
200
197 if (realname == NULL || filename == NULL || linkname == NULL) 201 if (realname == NULL || filename == NULL || linkname == NULL)
198 goto out_free; 202 goto out_free;
199 203
@@ -225,7 +229,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
225 if (symlink(targetname, linkname) == 0) 229 if (symlink(targetname, linkname) == 0)
226 err = 0; 230 err = 0;
227out_free: 231out_free:
228 free(realname); 232 if (!is_kallsyms)
233 free(realname);
229 free(filename); 234 free(filename);
230 free(linkname); 235 free(linkname);
231 return err; 236 return err;