aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@suse.de>2011-08-08 00:08:28 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2011-08-08 00:08:41 -0400
commitb6741d1fe947f829bd4303397fd888e1d4b66bae (patch)
treea41c8cf58ddfd82d3083b666479a82df018297f7
parentacab460b0f618e1e9663eea8c52ad5edd552ed1d (diff)
parent322a8b034003c0d46d39af85bf24fee27b902f48 (diff)
Merge 3.1-rc1 into usb-linus
Gives us a good starting point to base patches off of. Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r--Documentation/ABI/testing/pstore6
-rw-r--r--Documentation/ABI/testing/sysfs-platform-ideapad-laptop17
-rw-r--r--Documentation/CodingStyle23
-rw-r--r--Documentation/acpi/apei/einj.txt11
-rw-r--r--Documentation/device-mapper/dm-crypt.txt21
-rw-r--r--Documentation/device-mapper/dm-flakey.txt48
-rw-r--r--Documentation/device-mapper/dm-raid.txt138
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_keys.txt2
-rw-r--r--Documentation/devicetree/bindings/input/fsl-mma8450.txt11
-rw-r--r--Documentation/dmaengine.txt234
-rw-r--r--Documentation/fault-injection/fault-injection.txt3
-rw-r--r--Documentation/feature-removal-schedule.txt20
-rw-r--r--Documentation/frv/booting.txt13
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--Documentation/m68k/kernel-options.txt14
-rw-r--r--Documentation/networking/bonding.txt2
-rw-r--r--Documentation/power/runtime_pm.txt10
-rw-r--r--MAINTAINERS18
-rw-r--r--Makefile4
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/arm/kernel/armksyms.c3
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/arm/lib/Makefile2
-rw-r--r--arch/arm/lib/sha1.S211
-rw-r--r--arch/avr32/Kconfig1
-rw-r--r--arch/cris/arch-v10/drivers/sync_serial.c6
-rw-r--r--arch/cris/arch-v10/kernel/irq.c3
-rw-r--r--arch/cris/include/asm/thread_info.h6
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/ia64/Kconfig5
-rw-r--r--arch/ia64/include/asm/gpio.h55
-rw-r--r--arch/ia64/kernel/efi.c2
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/atomic.h4
-rw-r--r--arch/parisc/include/asm/futex.h66
-rw-r--r--arch/parisc/include/asm/unistd.h3
-rw-r--r--arch/parisc/kernel/syscall_table.S1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/s390/Kconfig9
-rw-r--r--arch/s390/include/asm/ipl.h1
-rw-r--r--arch/s390/include/asm/lowcore.h11
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/system.h1
-rw-r--r--arch/s390/kernel/asm-offsets.c10
-rw-r--r--arch/s390/kernel/base.S36
-rw-r--r--arch/s390/kernel/compat_signal.c43
-rw-r--r--arch/s390/kernel/entry.S28
-rw-r--r--arch/s390/kernel/entry64.S20
-rw-r--r--arch/s390/kernel/ipl.c45
-rw-r--r--arch/s390/kernel/reipl64.S80
-rw-r--r--arch/s390/kernel/setup.c25
-rw-r--r--arch/s390/kernel/signal.c61
-rw-r--r--arch/s390/kernel/smp.c24
-rw-r--r--arch/s390/mm/maccess.c16
-rw-r--r--arch/s390/mm/pgtable.c1
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/kernel/idle.c6
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/Kbuild5
-rw-r--r--arch/sparc/include/asm/bitops_64.h49
-rw-r--r--arch/sparc/include/asm/div64.h1
-rw-r--r--arch/sparc/include/asm/elf_64.h65
-rw-r--r--arch/sparc/include/asm/hypervisor.h13
-rw-r--r--arch/sparc/include/asm/irq_regs.h1
-rw-r--r--arch/sparc/include/asm/local.h6
-rw-r--r--arch/sparc/include/asm/local64.h1
-rw-r--r--arch/sparc/include/asm/tsb.h51
-rw-r--r--arch/sparc/kernel/cpu.c1
-rw-r--r--arch/sparc/kernel/ds.c30
-rw-r--r--arch/sparc/kernel/entry.h14
-rw-r--r--arch/sparc/kernel/head_64.S2
-rw-r--r--arch/sparc/kernel/hvapi.c6
-rw-r--r--arch/sparc/kernel/hvcalls.S7
-rw-r--r--arch/sparc/kernel/kernel.h15
-rw-r--r--arch/sparc/kernel/ktlb.S24
-rw-r--r--arch/sparc/kernel/mdesc.c30
-rw-r--r--arch/sparc/kernel/setup_64.c186
-rw-r--r--arch/sparc/kernel/sparc_ksyms_64.c11
-rw-r--r--arch/sparc/kernel/sstate.c9
-rw-r--r--arch/sparc/kernel/unaligned_64.c15
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S21
-rw-r--r--arch/sparc/lib/Makefile4
-rw-r--r--arch/sparc/lib/NG2page.S61
-rw-r--r--arch/sparc/lib/NGpage.S114
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/sparc/lib/ffs.S84
-rw-r--r--arch/sparc/lib/hweight.S51
-rw-r--r--arch/sparc/mm/init_64.c42
-rw-r--r--arch/tile/Kconfig1
-rw-r--r--arch/tile/include/asm/Kbuild38
-rw-r--r--arch/tile/include/asm/bug.h1
-rw-r--r--arch/tile/include/asm/bugs.h1
-rw-r--r--arch/tile/include/asm/cputime.h1
-rw-r--r--arch/tile/include/asm/device.h1
-rw-r--r--arch/tile/include/asm/div64.h1
-rw-r--r--arch/tile/include/asm/emergency-restart.h1
-rw-r--r--arch/tile/include/asm/errno.h1
-rw-r--r--arch/tile/include/asm/fb.h1
-rw-r--r--arch/tile/include/asm/fcntl.h1
-rw-r--r--arch/tile/include/asm/fixmap.h6
-rw-r--r--arch/tile/include/asm/ioctl.h1
-rw-r--r--arch/tile/include/asm/ioctls.h1
-rw-r--r--arch/tile/include/asm/ipc.h1
-rw-r--r--arch/tile/include/asm/ipcbuf.h1
-rw-r--r--arch/tile/include/asm/irq_regs.h1
-rw-r--r--arch/tile/include/asm/kdebug.h1
-rw-r--r--arch/tile/include/asm/local.h1
-rw-r--r--arch/tile/include/asm/module.h1
-rw-r--r--arch/tile/include/asm/msgbuf.h1
-rw-r--r--arch/tile/include/asm/mutex.h1
-rw-r--r--arch/tile/include/asm/param.h1
-rw-r--r--arch/tile/include/asm/parport.h1
-rw-r--r--arch/tile/include/asm/poll.h1
-rw-r--r--arch/tile/include/asm/posix_types.h1
-rw-r--r--arch/tile/include/asm/resource.h1
-rw-r--r--arch/tile/include/asm/scatterlist.h1
-rw-r--r--arch/tile/include/asm/sembuf.h1
-rw-r--r--arch/tile/include/asm/serial.h1
-rw-r--r--arch/tile/include/asm/shmbuf.h1
-rw-r--r--arch/tile/include/asm/shmparam.h1
-rw-r--r--arch/tile/include/asm/socket.h1
-rw-r--r--arch/tile/include/asm/sockios.h1
-rw-r--r--arch/tile/include/asm/statfs.h1
-rw-r--r--arch/tile/include/asm/termbits.h1
-rw-r--r--arch/tile/include/asm/termios.h1
-rw-r--r--arch/tile/include/asm/types.h1
-rw-r--r--arch/tile/include/asm/ucontext.h1
-rw-r--r--arch/tile/include/asm/xor.h1
-rw-r--r--arch/tile/include/hv/drv_srom_intf.h41
-rw-r--r--arch/tile/kernel/time.c5
-rw-r--r--arch/tile/mm/init.c3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/io.h3
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/acpi/cstate.c23
-rw-r--r--arch/x86/kernel/process.c23
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/platform/mrst/Makefile1
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/setup.c13
-rw-r--r--arch/x86/xen/trace.c1
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-timeout.c5
-rw-r--r--crypto/md5.c92
-rw-r--r--drivers/acpi/acpica/acglobal.h6
-rw-r--r--drivers/acpi/acpica/aclocal.h1
-rw-r--r--drivers/acpi/acpica/acpredef.h1
-rw-r--r--drivers/acpi/acpica/nspredef.c19
-rw-r--r--drivers/acpi/acpica/nsrepair2.c15
-rw-r--r--drivers/acpi/acpica/tbinstal.c27
-rw-r--r--drivers/acpi/apei/Kconfig11
-rw-r--r--drivers/acpi/apei/apei-base.c35
-rw-r--r--drivers/acpi/apei/apei-internal.h15
-rw-r--r--drivers/acpi/apei/einj.c43
-rw-r--r--drivers/acpi/apei/erst-dbg.c6
-rw-r--r--drivers/acpi/apei/erst.c32
-rw-r--r--drivers/acpi/apei/ghes.c431
-rw-r--r--drivers/acpi/apei/hest.c17
-rw-r--r--drivers/acpi/battery.c86
-rw-r--r--drivers/acpi/bus.c14
-rw-r--r--drivers/acpi/dock.c4
-rw-r--r--drivers/acpi/ec_sys.c2
-rw-r--r--drivers/acpi/fan.c2
-rw-r--r--drivers/acpi/osl.c25
-rw-r--r--drivers/acpi/pci_irq.c58
-rw-r--r--drivers/acpi/pci_root.c3
-rw-r--r--drivers/acpi/processor_thermal.c2
-rw-r--r--drivers/acpi/sbs.c13
-rw-r--r--drivers/acpi/sleep.c16
-rw-r--r--drivers/acpi/sysfs.c4
-rw-r--r--drivers/acpi/thermal.c2
-rw-r--r--drivers/acpi/video.c2
-rw-r--r--drivers/ata/libata-acpi.c4
-rw-r--r--drivers/base/devtmpfs.c2
-rw-r--r--drivers/base/power/domain.c3
-rw-r--r--drivers/base/power/runtime.c10
-rw-r--r--drivers/char/Kconfig11
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/ramoops.c8
-rw-r--r--drivers/char/random.c349
-rw-r--r--drivers/char/tile-srom.c481
-rw-r--r--drivers/char/tpm/tpm_tis.c7
-rw-r--r--drivers/connector/cn_proc.c8
-rw-r--r--drivers/cpuidle/cpuidle.c50
-rw-r--r--drivers/cpuidle/cpuidle.h1
-rw-r--r--drivers/cpuidle/driver.c3
-rw-r--r--drivers/cpuidle/governor.c3
-rw-r--r--drivers/dma/TODO1
-rw-r--r--drivers/dma/amba-pl08x.c246
-rw-r--r--drivers/dma/at_hdmac.c4
-rw-r--r--drivers/dma/coh901318.c19
-rw-r--r--drivers/dma/dmaengine.c8
-rw-r--r--drivers/dma/ep93xx_dma.c2
-rw-r--r--drivers/dma/imx-sdma.c4
-rw-r--r--drivers/dma/intel_mid_dma.c2
-rw-r--r--drivers/dma/ioat/dma_v3.c8
-rw-r--r--drivers/dma/ioat/pci.c11
-rw-r--r--drivers/dma/ipu/ipu_idmac.c6
-rw-r--r--drivers/dma/mv_xor.c3
-rw-r--r--drivers/dma/mxs-dma.c13
-rw-r--r--drivers/dma/pch_dma.c127
-rw-r--r--drivers/dma/pl330.c64
-rw-r--r--drivers/dma/ste_dma40.c270
-rw-r--r--drivers/dma/ste_dma40_ll.h3
-rw-r--r--drivers/eisa/pci_eisa.c4
-rw-r--r--drivers/firmware/efivars.c243
-rw-r--r--drivers/gpu/drm/drm_debugfs.c4
-rw-r--r--drivers/gpu/drm/drm_edid.c33
-rw-r--r--drivers/gpu/drm/drm_irq.c26
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c189
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c6
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c3
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h36
-rw-r--r--drivers/gpu/drm/i915/i915_suspend.c2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c138
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c111
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h23
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c158
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c8
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c4
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c3
-rw-r--r--drivers/gpu/drm/radeon/Makefile1
-rw-r--r--drivers/gpu/drm/radeon/atom.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c2
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c21
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c54
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_i2c.c32
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h6
-rw-r--r--drivers/ide/cy82c693.c2
-rw-r--r--drivers/ide/ide_platform.c6
-rw-r--r--drivers/input/keyboard/gpio_keys.c2
-rw-r--r--drivers/input/keyboard/lm8323.c9
-rw-r--r--drivers/input/keyboard/tegra-kbc.c5
-rw-r--r--drivers/input/misc/kxtj9.c1
-rw-r--r--drivers/input/misc/mma8450.c8
-rw-r--r--drivers/input/mouse/hgpk.c1
-rw-r--r--drivers/input/serio/xilinx_ps2.c2
-rw-r--r--drivers/input/touchscreen/ad7879.c4
-rw-r--r--drivers/md/Kconfig5
-rw-r--r--drivers/md/dm-crypt.c62
-rw-r--r--drivers/md/dm-flakey.c270
-rw-r--r--drivers/md/dm-io.c29
-rw-r--r--drivers/md/dm-ioctl.c89
-rw-r--r--drivers/md/dm-kcopyd.c42
-rw-r--r--drivers/md/dm-log-userspace-base.c3
-rw-r--r--drivers/md/dm-log.c32
-rw-r--r--drivers/md/dm-mpath.c147
-rw-r--r--drivers/md/dm-raid.c621
-rw-r--r--drivers/md/dm-snap-persistent.c80
-rw-r--r--drivers/md/dm-snap.c84
-rw-r--r--drivers/md/dm-table.c155
-rw-r--r--drivers/md/dm.c75
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.c10
-rw-r--r--drivers/net/bnx2x/bnx2x_hsi.h2
-rw-r--r--drivers/net/bnx2x/bnx2x_link.c218
-rw-r--r--drivers/net/bnx2x/bnx2x_link.h3
-rw-r--r--drivers/net/bnx2x/bnx2x_reg.h6
-rw-r--r--drivers/net/e1000/e1000_ethtool.c6
-rw-r--r--drivers/net/e1000/e1000_hw.c3
-rw-r--r--drivers/net/e1000e/es2lan.c2
-rw-r--r--drivers/net/e1000e/ethtool.c11
-rw-r--r--drivers/net/e1000e/ich8lan.c7
-rw-r--r--drivers/net/e1000e/lib.c1
-rw-r--r--drivers/net/e1000e/netdev.c2
-rw-r--r--drivers/net/e1000e/phy.c2
-rw-r--r--drivers/net/igb/e1000_nvm.c1
-rw-r--r--drivers/net/igb/igb_ethtool.c5
-rw-r--r--drivers/net/igb/igb_main.c4
-rw-r--r--drivers/net/igbvf/netdev.c2
-rw-r--r--drivers/net/irda/smsc-ircc2.c18
-rw-r--r--drivers/net/ixgb/ixgb_ee.c9
-rw-r--r--drivers/net/ixgb/ixgb_hw.c2
-rw-r--r--drivers/net/ixgbe/ixgbe_82599.c1
-rw-r--r--drivers/net/ixgbe/ixgbe_common.c1
-rw-r--r--drivers/net/ixgbe/ixgbe_ethtool.c5
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ixgbe/ixgbe_phy.c3
-rw-r--r--drivers/net/ixgbe/ixgbe_x540.c1
-rw-r--r--drivers/net/macb.c3
-rw-r--r--drivers/net/mlx4/en_port.c2
-rw-r--r--drivers/net/mlx4/main.c2
-rw-r--r--drivers/net/mlx4/port.c9
-rw-r--r--drivers/net/niu.c4
-rw-r--r--drivers/net/r8169.c28
-rw-r--r--drivers/net/sis190.c12
-rw-r--r--drivers/net/usb/cdc_ncm.c156
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9002_hw.c6
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9003_hw.c6
-rw-r--r--drivers/net/wireless/ath/ath9k/hw.c11
-rw-r--r--drivers/net/wireless/ath/ath9k/hw.h3
-rw-r--r--drivers/net/wireless/ath/ath9k/init.c2
-rw-r--r--drivers/net/wireless/ath/ath9k/pci.c27
-rw-r--r--drivers/net/wireless/iwlegacy/iwl-3945.c6
-rw-r--r--drivers/net/wireless/iwlegacy/iwl-4965.c8
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-5000.c1
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-core.h2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-pci.c18
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-power.c3
-rw-r--r--drivers/net/wireless/rt2x00/rt2800lib.c3
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00lib.h3
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00mac.c5
-rw-r--r--drivers/net/wireless/rtlwifi/pci.c20
-rw-r--r--drivers/of/address.c2
-rw-r--r--drivers/of/base.c7
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c2
-rw-r--r--drivers/platform/x86/Kconfig8
-rw-r--r--drivers/platform/x86/Makefile1
-rw-r--r--drivers/platform/x86/acer-wmi.c40
-rw-r--r--drivers/platform/x86/acerhdf.c13
-rw-r--r--drivers/platform/x86/asus-laptop.c9
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c27
-rw-r--r--drivers/platform/x86/asus-wmi.c239
-rw-r--r--drivers/platform/x86/asus-wmi.h7
-rw-r--r--drivers/platform/x86/dell-laptop.c1
-rw-r--r--drivers/platform/x86/dell-wmi.c10
-rw-r--r--drivers/platform/x86/eeepc-wmi.c27
-rw-r--r--drivers/platform/x86/ideapad-laptop.c195
-rw-r--r--drivers/platform/x86/intel_ips.c4
-rw-r--r--drivers/platform/x86/intel_menlow.c2
-rw-r--r--drivers/platform/x86/intel_mid_thermal.c26
-rw-r--r--drivers/platform/x86/intel_rar_register.c4
-rw-r--r--drivers/platform/x86/intel_scu_ipc.c2
-rw-r--r--drivers/platform/x86/msi-laptop.c10
-rw-r--r--drivers/platform/x86/msi-wmi.c1
-rw-r--r--drivers/platform/x86/samsung-laptop.c20
-rw-r--r--drivers/platform/x86/samsung-q10.c196
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c11
-rw-r--r--drivers/regulator/core.c190
-rw-r--r--drivers/regulator/dummy.c32
-rw-r--r--drivers/regulator/tps65910-regulator.c63
-rw-r--r--drivers/regulator/twl-regulator.c66
-rw-r--r--drivers/regulator/wm831x-dcdc.c126
-rw-r--r--drivers/regulator/wm831x-ldo.c25
-rw-r--r--drivers/regulator/wm8994-regulator.c4
-rw-r--r--drivers/rtc/rtc-omap.c2
-rw-r--r--drivers/s390/block/dasd.c9
-rw-r--r--drivers/s390/block/dasd_eckd.c9
-rw-r--r--drivers/s390/block/dasd_proc.c4
-rw-r--r--drivers/s390/char/sclp_async.c9
-rw-r--r--drivers/s390/cio/qdio.h2
-rw-r--r--drivers/s390/cio/qdio_debug.c12
-rw-r--r--drivers/s390/cio/qdio_main.c21
-rw-r--r--drivers/spi/spi-pl022.c11
-rw-r--r--drivers/target/iscsi/Kconfig1
-rw-r--r--drivers/target/iscsi/iscsi_target.c15
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c2
-rw-r--r--drivers/target/iscsi/iscsi_target_nego.c2
-rw-r--r--drivers/target/target_core_transport.c33
-rw-r--r--drivers/target/tcm_fc/tcm_fc.h5
-rw-r--r--drivers/target/tcm_fc/tfc_cmd.c1
-rw-r--r--drivers/target/tcm_fc/tfc_io.c121
-rw-r--r--drivers/thermal/Kconfig8
-rw-r--r--drivers/thermal/thermal_sys.c142
-rw-r--r--drivers/tty/serial/imx.c13
-rw-r--r--drivers/tty/serial/sh-sci.c2
-rw-r--r--drivers/video/backlight/Kconfig2
-rw-r--r--drivers/video/backlight/aat2870_bl.c8
-rw-r--r--drivers/video/savage/savagefb.h2
-rw-r--r--drivers/watchdog/Kconfig3
-rw-r--r--drivers/watchdog/nv_tco.c8
-rw-r--r--drivers/watchdog/shwdt.c2
-rw-r--r--drivers/xen/Kconfig2
-rw-r--r--fs/9p/acl.c6
-rw-r--r--fs/9p/acl.h4
-rw-r--r--fs/9p/vfs_inode_dotl.c6
-rw-r--r--fs/Kconfig15
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c27
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.h30
-rw-r--r--fs/btrfs/dir-item.c30
-rw-r--r--fs/btrfs/extent-tree.c45
-rw-r--r--fs/btrfs/extent_io.c139
-rw-r--r--fs/btrfs/extent_io.h20
-rw-r--r--fs/btrfs/extent_map.c155
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/inode.c145
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/ref-cache.c68
-rw-r--r--fs/btrfs/ref-cache.h52
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/transaction.c65
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/cifs/cifs_dfs_ref.c5
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/dns_resolve.c4
-rw-r--r--fs/cifs/inode.c14
-rw-r--r--fs/cifs/sess.c3
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dcache.c72
-rw-r--r--fs/exofs/Kbuild5
-rw-r--r--fs/exofs/Kconfig4
-rw-r--r--fs/exofs/exofs.h159
-rw-r--r--fs/exofs/inode.c152
-rw-r--r--fs/exofs/ore.c (renamed from fs/exofs/ios.c)370
-rw-r--r--fs/exofs/pnfs.h45
-rw-r--r--fs/exofs/super.c251
-rw-r--r--fs/ext2/acl.c8
-rw-r--r--fs/ext3/acl.c9
-rw-r--r--fs/ext4/Makefile2
-rw-r--r--fs/ext4/acl.c9
-rw-r--r--fs/ext4/balloc.c48
-rw-r--r--fs/ext4/block_validity.c21
-rw-r--r--fs/ext4/ext4.h55
-rw-r--r--fs/ext4/extents.c129
-rw-r--r--fs/ext4/fsync.c26
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/indirect.c1482
-rw-r--r--fs/ext4/inode.c1596
-rw-r--r--fs/ext4/ioctl.c12
-rw-r--r--fs/ext4/mballoc.c230
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/namei.c21
-rw-r--r--fs/ext4/page-io.c6
-rw-r--r--fs/ext4/resize.c199
-rw-r--r--fs/ext4/super.c88
-rw-r--r--fs/ext4/truncate.h43
-rw-r--r--fs/generic_acl.c13
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/hppfs/hppfs.c1
-rw-r--r--fs/inode.c14
-rw-r--r--fs/jbd2/checkpoint.c5
-rw-r--r--fs/jbd2/journal.c67
-rw-r--r--fs/jffs2/acl.c4
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/acl.c4
-rw-r--r--fs/jfs/xattr.c4
-rw-r--r--fs/namei.c117
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c6
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/posix_acl.c16
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/pstore/inode.c12
-rw-r--r--fs/pstore/internal.h2
-rw-r--r--fs/pstore/platform.c30
-rw-r--r--fs/reiserfs/xattr_acl.c10
-rw-r--r--fs/stack.c5
-rw-r--r--fs/stat.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c6
-rw-r--r--include/acpi/acpi_drivers.h2
-rw-r--r--include/acpi/acpixf.h3
-rw-r--r--include/acpi/apei.h5
-rw-r--r--include/acpi/processor.h2
-rw-r--r--include/drm/drm_crtc.h3
-rw-r--r--include/drm/i915_drm.h2
-rw-r--r--include/linux/acpi.h3
-rw-r--r--include/linux/amba/pl08x.h9
-rw-r--r--include/linux/bitmap.h1
-rw-r--r--include/linux/cpuidle.h4
-rw-r--r--include/linux/cred.h11
-rw-r--r--include/linux/cryptohash.h7
-rw-r--r--include/linux/dcache.h30
-rw-r--r--include/linux/device-mapper.h43
-rw-r--r--include/linux/dm-ioctl.h4
-rw-r--r--include/linux/dm-kcopyd.h15
-rw-r--r--include/linux/efi.h6
-rw-r--r--include/linux/fault-inject.h18
-rw-r--r--include/linux/fs.h68
-rw-r--r--include/linux/genalloc.h34
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/idr.h4
-rw-r--r--include/linux/input.h2
-rw-r--r--include/linux/jbd2.h6
-rw-r--r--include/linux/llist.h126
-rw-r--r--include/linux/memcontrol.h8
-rw-r--r--include/linux/mfd/aat2870.h2
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/nfs_xdr.h10
-rw-r--r--include/linux/of.h15
-rw-r--r--include/linux/pci_ids.h10
-rw-r--r--include/linux/posix_acl.h82
-rw-r--r--include/linux/pstore.h9
-rw-r--r--include/linux/radix-tree.h37
-rw-r--r--include/linux/random.h12
-rw-r--r--include/linux/regulator/consumer.h3
-rw-r--r--include/linux/regulator/driver.h4
-rw-r--r--include/linux/shmem_fs.h17
-rw-r--r--include/linux/swapops.h23
-rw-r--r--include/linux/thermal.h22
-rw-r--r--include/net/cipso_ipv4.h2
-rw-r--r--include/net/dst.h17
-rw-r--r--include/net/netlabel.h2
-rw-r--r--include/net/secure_seq.h20
-rw-r--r--include/scsi/osd_ore.h125
-rw-r--r--include/trace/events/ext4.h87
-rw-r--r--include/trace/events/jbd2.h36
-rw-r--r--init/main.c7
-rw-r--r--ipc/shm.c16
-rw-r--r--kernel/debug/gdbstub.c22
-rw-r--r--kernel/debug/kdb/kdb_bt.c5
-rw-r--r--kernel/debug/kdb/kdb_cmds4
-rw-r--r--kernel/debug/kdb/kdb_debugger.c21
-rw-r--r--kernel/debug/kdb/kdb_io.c36
-rw-r--r--kernel/debug/kdb/kdb_main.c4
-rw-r--r--kernel/debug/kdb/kdb_private.h3
-rw-r--r--kernel/futex.c54
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/lockdep.c37
-rw-r--r--kernel/taskstats.c18
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile4
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/fault-inject.c20
-rw-r--r--lib/genalloc.c300
-rw-r--r--lib/idr.c67
-rw-r--r--lib/llist.c129
-rw-r--r--lib/md5.c95
-rw-r--r--lib/radix-tree.c121
-rw-r--r--lib/sha1.c212
-rw-r--r--mm/failslab.c14
-rw-r--r--mm/filemap.c106
-rw-r--r--mm/memcontrol.c66
-rw-r--r--mm/memory-failure.c92
-rw-r--r--mm/mincore.c11
-rw-r--r--mm/oom_kill.c4
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/shmem.c1493
-rw-r--r--mm/slab.c92
-rw-r--r--mm/swapfile.c20
-rw-r--r--mm/truncate.c8
-rw-r--r--net/atm/br2684.c2
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/secure_seq.c184
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c9
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_hashtables.c1
-rw-r--r--net/ipv4/inetpeer.c1
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c1
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/datagram.c11
-rw-r--r--net/ipv6/inet6_hashtables.c1
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_output.c13
-rw-r--r--net/ipv6/route.c35
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c1
-rw-r--r--net/netlabel/Makefile2
-rw-r--r--net/netlabel/netlabel_addrlist.c2
-rw-r--r--net/netlabel/netlabel_addrlist.h2
-rw-r--r--net/netlabel/netlabel_cipso_v4.c2
-rw-r--r--net/netlabel/netlabel_cipso_v4.h2
-rw-r--r--net/netlabel/netlabel_domainhash.c2
-rw-r--r--net/netlabel/netlabel_domainhash.h2
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/netlabel/netlabel_mgmt.c2
-rw-r--r--net/netlabel/netlabel_mgmt.h2
-rw-r--r--net/netlabel/netlabel_unlabeled.c2
-rw-r--r--net/netlabel/netlabel_unlabeled.h2
-rw-r--r--net/netlabel/netlabel_user.c2
-rw-r--r--net/netlabel/netlabel_user.h2
-rw-r--r--net/sched/sch_sfq.c7
-rw-r--r--net/socket.c73
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/xfrm/xfrm_algo.c4
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--security/selinux/include/netif.h2
-rw-r--r--security/selinux/include/netlabel.h2
-rw-r--r--security/selinux/include/netnode.h2
-rw-r--r--security/selinux/include/netport.h2
-rw-r--r--security/selinux/netif.c2
-rw-r--r--security/selinux/netlabel.c2
-rw-r--r--security/selinux/netnode.c2
-rw-r--r--security/selinux/netport.c2
-rw-r--r--security/selinux/selinuxfs.c2
-rw-r--r--security/selinux/ss/ebitmap.c2
-rw-r--r--security/selinux/ss/mls.c2
-rw-r--r--security/selinux/ss/mls.h2
-rw-r--r--security/selinux/ss/policydb.c2
-rw-r--r--security/selinux/ss/services.c2
-rw-r--r--security/smack/smack_lsm.c2
-rw-r--r--sound/core/pcm_compat.c2
-rw-r--r--sound/core/rtctimer.c2
-rw-r--r--sound/pci/asihpi/hpidspcd.c9
-rw-r--r--sound/pci/asihpi/hpioctl.c19
-rw-r--r--sound/pci/rme9652/hdspm.c109
-rw-r--r--sound/soc/txx9/txx9aclc.c1
-rw-r--r--tools/power/x86/turbostat/turbostat.c46
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c5
604 files changed, 14784 insertions, 7919 deletions
diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore
index ddf451ee2a0..ff1df4e3b05 100644
--- a/Documentation/ABI/testing/pstore
+++ b/Documentation/ABI/testing/pstore
@@ -39,3 +39,9 @@ Description: Generic interface to platform dependent persistent storage.
39 multiple) files based on the record size of the underlying 39 multiple) files based on the record size of the underlying
40 persistent storage until at least this amount is reached. 40 persistent storage until at least this amount is reached.
41 Default is 10 Kbytes. 41 Default is 10 Kbytes.
42
43 Pstore only supports one backend at a time. If multiple
44 backends are available, the preferred backend may be
45 set by passing the pstore.backend= argument to the kernel at
46 boot time.
47
diff --git a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
index 807fca2ae2a..ff53183c384 100644
--- a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
+++ b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
@@ -4,3 +4,20 @@ KernelVersion: 2.6.37
4Contact: "Ike Panhc <ike.pan@canonical.com>" 4Contact: "Ike Panhc <ike.pan@canonical.com>"
5Description: 5Description:
6 Control the power of camera module. 1 means on, 0 means off. 6 Control the power of camera module. 1 means on, 0 means off.
7
8What: /sys/devices/platform/ideapad/cfg
9Date: Jun 2011
10KernelVersion: 3.1
11Contact: "Ike Panhc <ike.pan@canonical.com>"
12Description:
13 Ideapad capability bits.
14 Bit 8-10: 1 - Intel graphic only
15 2 - ATI graphic only
16 3 - Nvidia graphic only
17 4 - Intel and ATI graphic
18 5 - Intel and Nvidia graphic
19 Bit 16: Bluetooth exist (1 for exist)
20 Bit 17: 3G exist (1 for exist)
21 Bit 18: Wifi exist (1 for exist)
22 Bit 19: Camera exist (1 for exist)
23
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index fa6e25b94a5..c940239d967 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -80,22 +80,13 @@ available tools.
80The limit on the length of lines is 80 columns and this is a strongly 80The limit on the length of lines is 80 columns and this is a strongly
81preferred limit. 81preferred limit.
82 82
83Statements longer than 80 columns will be broken into sensible chunks. 83Statements longer than 80 columns will be broken into sensible chunks, unless
84Descendants are always substantially shorter than the parent and are placed 84exceeding 80 columns significantly increases readability and does not hide
85substantially to the right. The same applies to function headers with a long 85information. Descendants are always substantially shorter than the parent and
86argument list. Long strings are as well broken into shorter strings. The 86are placed substantially to the right. The same applies to function headers
87only exception to this is where exceeding 80 columns significantly increases 87with a long argument list. However, never break user-visible strings such as
88readability and does not hide information. 88printk messages, because that breaks the ability to grep for them.
89 89
90void fun(int a, int b, int c)
91{
92 if (condition)
93 printk(KERN_WARNING "Warning this is a long printk with "
94 "3 parameters a: %u b: %u "
95 "c: %u \n", a, b, c);
96 else
97 next_statement;
98}
99 90
100 Chapter 3: Placing Braces and Spaces 91 Chapter 3: Placing Braces and Spaces
101 92
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index dfab71848dc..5cc699ba545 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -48,12 +48,19 @@ directory apei/einj. The following files are provided.
48- param1 48- param1
49 This file is used to set the first error parameter value. Effect of 49 This file is used to set the first error parameter value. Effect of
50 parameter depends on error_type specified. For memory error, this is 50 parameter depends on error_type specified. For memory error, this is
51 physical memory address. 51 physical memory address. Only available if param_extension module
52 parameter is specified.
52 53
53- param2 54- param2
54 This file is used to set the second error parameter value. Effect of 55 This file is used to set the second error parameter value. Effect of
55 parameter depends on error_type specified. For memory error, this is 56 parameter depends on error_type specified. For memory error, this is
56 physical memory address mask. 57 physical memory address mask. Only available if param_extension
58 module parameter is specified.
59
60Injecting parameter support is a BIOS version specific extension, that
61is, it only works on some BIOS version. If you want to use it, please
62make sure your BIOS version has the proper support and specify
63"param_extension=y" in module parameter.
57 64
58For more information about EINJ, please refer to ACPI specification 65For more information about EINJ, please refer to ACPI specification
59version 4.0, section 17.5. 66version 4.0, section 17.5.
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index 6b5c42dbbe8..2c656ae43ba 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -4,7 +4,8 @@ dm-crypt
4Device-Mapper's "crypt" target provides transparent encryption of block devices 4Device-Mapper's "crypt" target provides transparent encryption of block devices
5using the kernel crypto API. 5using the kernel crypto API.
6 6
7Parameters: <cipher> <key> <iv_offset> <device path> <offset> 7Parameters: <cipher> <key> <iv_offset> <device path> \
8 <offset> [<#opt_params> <opt_params>]
8 9
9<cipher> 10<cipher>
10 Encryption cipher and an optional IV generation mode. 11 Encryption cipher and an optional IV generation mode.
@@ -37,6 +38,24 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
37<offset> 38<offset>
38 Starting sector within the device where the encrypted data begins. 39 Starting sector within the device where the encrypted data begins.
39 40
41<#opt_params>
42 Number of optional parameters. If there are no optional parameters,
43 the optional paramaters section can be skipped or #opt_params can be zero.
44 Otherwise #opt_params is the number of following arguments.
45
46 Example of optional parameters section:
47 1 allow_discards
48
49allow_discards
50 Block discard requests (a.k.a. TRIM) are passed through the crypt device.
51 The default is to ignore discard requests.
52
53 WARNING: Assess the specific security risks carefully before enabling this
54 option. For example, allowing discards on encrypted devices may lead to
55 the leak of information about the ciphertext device (filesystem type,
56 used space etc.) if the discarded blocks can be located easily on the
57 device later.
58
40Example scripts 59Example scripts
41=============== 60===============
42LUKS (Linux Unified Key Setup) is now the preferred way to set up disk 61LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt
index c8efdfd19a6..6ff5c232722 100644
--- a/Documentation/device-mapper/dm-flakey.txt
+++ b/Documentation/device-mapper/dm-flakey.txt
@@ -1,17 +1,53 @@
1dm-flakey 1dm-flakey
2========= 2=========
3 3
4This target is the same as the linear target except that it returns I/O 4This target is the same as the linear target except that it exhibits
5errors periodically. It's been found useful in simulating failing 5unreliable behaviour periodically. It's been found useful in simulating
6devices for testing purposes. 6failing devices for testing purposes.
7 7
8Starting from the time the table is loaded, the device is available for 8Starting from the time the table is loaded, the device is available for
9<up interval> seconds, then returns errors for <down interval> seconds, 9<up interval> seconds, then exhibits unreliable behaviour for <down
10and then this cycle repeats. 10interval> seconds, and then this cycle repeats.
11 11
12Parameters: <dev path> <offset> <up interval> <down interval> 12Also, consider using this in combination with the dm-delay target too,
13which can delay reads and writes and/or send them to different
14underlying devices.
15
16Table parameters
17----------------
18 <dev path> <offset> <up interval> <down interval> \
19 [<num_features> [<feature arguments>]]
20
21Mandatory parameters:
13 <dev path>: Full pathname to the underlying block-device, or a 22 <dev path>: Full pathname to the underlying block-device, or a
14 "major:minor" device-number. 23 "major:minor" device-number.
15 <offset>: Starting sector within the device. 24 <offset>: Starting sector within the device.
16 <up interval>: Number of seconds device is available. 25 <up interval>: Number of seconds device is available.
17 <down interval>: Number of seconds device returns errors. 26 <down interval>: Number of seconds device returns errors.
27
28Optional feature parameters:
29 If no feature parameters are present, during the periods of
30 unreliability, all I/O returns errors.
31
32 drop_writes:
33 All write I/O is silently ignored.
34 Read I/O is handled correctly.
35
36 corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
37 During <down interval>, replace <Nth_byte> of the data of
38 each matching bio with <value>.
39
40 <Nth_byte>: The offset of the byte to replace.
41 Counting starts at 1, to replace the first byte.
42 <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
43 'w' is incompatible with drop_writes.
44 <value>: The value (from 0-255) to write.
45 <flags>: Perform the replacement only if bio->bi_rw has all the
46 selected flags set.
47
48Examples:
49 corrupt_bio_byte 32 r 1 0
50 - replaces the 32nd byte of READ bios with the value 1
51
52 corrupt_bio_byte 224 w 0 32
53 - replaces the 224th byte of REQ_META (=32) bios with the value 0
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index 33b6b7071ac..2a8c11331d2 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -1,70 +1,108 @@
1Device-mapper RAID (dm-raid) is a bridge from DM to MD. It 1dm-raid
2provides a way to use device-mapper interfaces to access the MD RAID 2-------
3drivers.
4 3
5As with all device-mapper targets, the nominal public interfaces are the 4The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
6constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO 5It allows the MD RAID drivers to be accessed using a device-mapper
7and STATUSTYPE_TABLE). The CTR table looks like the following: 6interface.
8 7
91: <s> <l> raid \ 8The target is named "raid" and it accepts the following parameters:
102: <raid_type> <#raid_params> <raid_params> \ 9
113: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN> 10 <raid_type> <#raid_params> <raid_params> \
12 11 <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
13Line 1 contains the standard first three arguments to any device-mapper 12
14target - the start, length, and target type fields. The target type in 13<raid_type>:
15this case is "raid". 14 raid1 RAID1 mirroring
16 15 raid4 RAID4 dedicated parity disk
17Line 2 contains the arguments that define the particular raid 16 raid5_la RAID5 left asymmetric
18type/personality/level, the required arguments for that raid type, and 17 - rotating parity 0 with data continuation
19any optional arguments. Possible raid types include: raid4, raid5_la, 18 raid5_ra RAID5 right asymmetric
20raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is 19 - rotating parity N with data continuation
21planned for the future.) The list of required and optional parameters 20 raid5_ls RAID5 left symmetric
22is the same for all the current raid types. The required parameters are 21 - rotating parity 0 with data restart
23positional, while the optional parameters are given as key/value pairs. 22 raid5_rs RAID5 right symmetric
24The possible parameters are as follows: 23 - rotating parity N with data restart
25 <chunk_size> Chunk size in sectors. 24 raid6_zr RAID6 zero restart
26 [[no]sync] Force/Prevent RAID initialization 25 - rotating parity zero (left-to-right) with data restart
27 [rebuild <idx>] Rebuild the drive indicated by the index 26 raid6_nr RAID6 N restart
28 [daemon_sleep <ms>] Time between bitmap daemon work to clear bits 27 - rotating parity N (right-to-left) with data restart
29 [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization 28 raid6_nc RAID6 N continue
30 [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization 29 - rotating parity N (right-to-left) with data continuation
31 [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 30
32 [stripe_cache <sectors>] Stripe cache size for higher RAIDs 31 Refererence: Chapter 4 of
33 32 http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
34Line 3 contains the list of devices that compose the array in 33
35metadata/data device pairs. If the metadata is stored separately, a '-' 34<#raid_params>: The number of parameters that follow.
36is given for the metadata device position. If a drive has failed or is 35
37missing at creation time, a '-' can be given for both the metadata and 36<raid_params> consists of
38data drives for a given position. 37 Mandatory parameters:
39 38 <chunk_size>: Chunk size in sectors. This parameter is often known as
40NB. Currently all metadata devices must be specified as '-'. 39 "stripe size". It is the only mandatory parameter and
41 40 is placed first.
42Examples: 41
43# RAID4 - 4 data drives, 1 parity 42 followed by optional parameters (in any order):
43 [sync|nosync] Force or prevent RAID initialization.
44
45 [rebuild <idx>] Rebuild drive number idx (first drive is 0).
46
47 [daemon_sleep <ms>]
48 Interval between runs of the bitmap daemon that
49 clear bits. A longer interval means less bitmap I/O but
50 resyncing after a failure is likely to take longer.
51
52 [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
53 [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
54 [write_mostly <idx>] Drive index is write-mostly
55 [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
56 [stripe_cache <sectors>] Stripe cache size (higher RAIDs only)
57 [region_size <sectors>]
58 The region_size multiplied by the number of regions is the
59 logical size of the array. The bitmap records the device
60 synchronisation state for each region.
61
62<#raid_devs>: The number of devices composing the array.
63 Each device consists of two entries. The first is the device
64 containing the metadata (if any); the second is the one containing the
65 data.
66
67 If a drive has failed or is missing at creation time, a '-' can be
68 given for both the metadata and data drives for a given position.
69
70
71Example tables
72--------------
73# RAID4 - 4 data drives, 1 parity (no metadata devices)
44# No metadata devices specified to hold superblock/bitmap info 74# No metadata devices specified to hold superblock/bitmap info
45# Chunk size of 1MiB 75# Chunk size of 1MiB
46# (Lines separated for easy reading) 76# (Lines separated for easy reading)
77
470 1960893648 raid \ 780 1960893648 raid \
48 raid4 1 2048 \ 79 raid4 1 2048 \
49 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 80 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
50 81
51# RAID4 - 4 data drives, 1 parity (no metadata devices) 82# RAID4 - 4 data drives, 1 parity (with metadata devices)
52# Chunk size of 1MiB, force RAID initialization, 83# Chunk size of 1MiB, force RAID initialization,
53# min recovery rate at 20 kiB/sec/disk 84# min recovery rate at 20 kiB/sec/disk
85
540 1960893648 raid \ 860 1960893648 raid \
55 raid4 4 2048 min_recovery_rate 20 sync\ 87 raid4 4 2048 sync min_recovery_rate 20 \
56 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81 88 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
57 89
58Performing a 'dmsetup table' should display the CTR table used to 90'dmsetup table' displays the table used to construct the mapping.
59construct the mapping (with possible reordering of optional 91The optional parameters are always printed in the order listed
60parameters). 92above with "sync" or "nosync" always output ahead of the other
93arguments, regardless of the order used when originally loading the table.
94Arguments that can be repeated are ordered by value.
61 95
62Performing a 'dmsetup status' will yield information on the state and 96'dmsetup status' yields information on the state and health of the
63health of the array. The output is as follows: 97array.
98The output is as follows:
641: <s> <l> raid \ 991: <s> <l> raid \
652: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> 1002: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
66 101
67Line 1 is standard DM output. Line 2 is best shown by example: 102Line 1 is the standard output produced by device-mapper.
103Line 2 is produced by the raid target, and best explained by example:
68 0 1960893648 raid raid4 5 AAAAA 2/490221568 104 0 1960893648 raid raid4 5 AAAAA 2/490221568
69Here we can see the RAID type is raid4, there are 5 devices - all of 105Here we can see the RAID type is raid4, there are 5 devices - all of
70which are 'A'live, and the array is 2/490221568 complete with recovery. 106which are 'A'live, and the array is 2/490221568 complete with recovery.
107Faulty or missing devices are marked 'D'. Devices that are out-of-sync
108are marked 'a'.
diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
index 7190c99d761..5c2c02140a6 100644
--- a/Documentation/devicetree/bindings/gpio/gpio_keys.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
@@ -10,7 +10,7 @@ Optional properties:
10Each button (key) is represented as a sub-node of "gpio-keys": 10Each button (key) is represented as a sub-node of "gpio-keys":
11Subnode properties: 11Subnode properties:
12 12
13 - gpios: OF devcie-tree gpio specificatin. 13 - gpios: OF device-tree gpio specification.
14 - label: Descriptive name of the key. 14 - label: Descriptive name of the key.
15 - linux,code: Keycode to emit. 15 - linux,code: Keycode to emit.
16 16
diff --git a/Documentation/devicetree/bindings/input/fsl-mma8450.txt b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
new file mode 100644
index 00000000000..a00c94ccbde
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
@@ -0,0 +1,11 @@
1* Freescale MMA8450 3-Axis Accelerometer
2
3Required properties:
4- compatible : "fsl,mma8450".
5
6Example:
7
8accelerometer: mma8450@1c {
9 compatible = "fsl,mma8450";
10 reg = <0x1c>;
11};
diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 5a0cb1ef616..94b7e0f96b3 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt
@@ -10,87 +10,181 @@ NOTE: For DMA Engine usage in async_tx please see:
10Below is a guide to device driver writers on how to use the Slave-DMA API of the 10Below is a guide to device driver writers on how to use the Slave-DMA API of the
11DMA Engine. This is applicable only for slave DMA usage only. 11DMA Engine. This is applicable only for slave DMA usage only.
12 12
13The slave DMA usage consists of following steps 13The slave DMA usage consists of following steps:
141. Allocate a DMA slave channel 141. Allocate a DMA slave channel
152. Set slave and controller specific parameters 152. Set slave and controller specific parameters
163. Get a descriptor for transaction 163. Get a descriptor for transaction
174. Submit the transaction and wait for callback notification 174. Submit the transaction
185. Issue pending requests and wait for callback notification
18 19
191. Allocate a DMA slave channel 201. Allocate a DMA slave channel
20Channel allocation is slightly different in the slave DMA context, client 21
21drivers typically need a channel from a particular DMA controller only and even 22 Channel allocation is slightly different in the slave DMA context,
22in some cases a specific channel is desired. To request a channel 23 client drivers typically need a channel from a particular DMA
23dma_request_channel() API is used. 24 controller only and even in some cases a specific channel is desired.
24 25 To request a channel dma_request_channel() API is used.
25Interface: 26
26struct dma_chan *dma_request_channel(dma_cap_mask_t mask, 27 Interface:
27 dma_filter_fn filter_fn, 28 struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
28 void *filter_param); 29 dma_filter_fn filter_fn,
29where dma_filter_fn is defined as: 30 void *filter_param);
30typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); 31 where dma_filter_fn is defined as:
31 32 typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
32When the optional 'filter_fn' parameter is set to NULL dma_request_channel 33
33simply returns the first channel that satisfies the capability mask. Otherwise, 34 The 'filter_fn' parameter is optional, but highly recommended for
34when the mask parameter is insufficient for specifying the necessary channel, 35 slave and cyclic channels as they typically need to obtain a specific
35the filter_fn routine can be used to disposition the available channels in the 36 DMA channel.
36system. The filter_fn routine is called once for each free channel in the 37
37system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags 38 When the optional 'filter_fn' parameter is NULL, dma_request_channel()
38that channel to be the return value from dma_request_channel. A channel 39 simply returns the first channel that satisfies the capability mask.
39allocated via this interface is exclusive to the caller, until 40
40dma_release_channel() is called. 41 Otherwise, the 'filter_fn' routine will be called once for each free
42 channel which has a capability in 'mask'. 'filter_fn' is expected to
43 return 'true' when the desired DMA channel is found.
44
45 A channel allocated via this interface is exclusive to the caller,
46 until dma_release_channel() is called.
41 47
422. Set slave and controller specific parameters 482. Set slave and controller specific parameters
43Next step is always to pass some specific information to the DMA driver. Most of 49
44the generic information which a slave DMA can use is in struct dma_slave_config. 50 Next step is always to pass some specific information to the DMA
45It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA 51 driver. Most of the generic information which a slave DMA can use
46burst lengths etc. If some DMA controllers have more parameters to be sent then 52 is in struct dma_slave_config. This allows the clients to specify
47they should try to embed struct dma_slave_config in their controller specific 53 DMA direction, DMA addresses, bus widths, DMA burst lengths etc
48structure. That gives flexibility to client to pass more parameters, if 54 for the peripheral.
49required. 55
50 56 If some DMA controllers have more parameters to be sent then they
51Interface: 57 should try to embed struct dma_slave_config in their controller
52int dmaengine_slave_config(struct dma_chan *chan, 58 specific structure. That gives flexibility to client to pass more
53 struct dma_slave_config *config) 59 parameters, if required.
60
61 Interface:
62 int dmaengine_slave_config(struct dma_chan *chan,
63 struct dma_slave_config *config)
64
65 Please see the dma_slave_config structure definition in dmaengine.h
66 for a detailed explaination of the struct members. Please note
67 that the 'direction' member will be going away as it duplicates the
68 direction given in the prepare call.
54 69
553. Get a descriptor for transaction 703. Get a descriptor for transaction
56For slave usage the various modes of slave transfers supported by the 71
57DMA-engine are: 72 For slave usage the various modes of slave transfers supported by the
58slave_sg - DMA a list of scatter gather buffers from/to a peripheral 73 DMA-engine are:
59dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the 74
75 slave_sg - DMA a list of scatter gather buffers from/to a peripheral
76 dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the
60 operation is explicitly stopped. 77 operation is explicitly stopped.
61The non NULL return of this transfer API represents a "descriptor" for the given 78
62transaction. 79 A non-NULL return of this transfer API represents a "descriptor" for
63 80 the given transaction.
64Interface: 81
65struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( 82 Interface:
66 struct dma_chan *chan, 83 struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)(
67 struct scatterlist *dst_sg, unsigned int dst_nents, 84 struct dma_chan *chan, struct scatterlist *sgl,
68 struct scatterlist *src_sg, unsigned int src_nents, 85 unsigned int sg_len, enum dma_data_direction direction,
69 unsigned long flags); 86 unsigned long flags);
70struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( 87
88 struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
71 struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, 89 struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
72 size_t period_len, enum dma_data_direction direction); 90 size_t period_len, enum dma_data_direction direction);
73 91
744. Submit the transaction and wait for callback notification 92 The peripheral driver is expected to have mapped the scatterlist for
75To schedule the transaction to be scheduled by dma device, the "descriptor" 93 the DMA operation prior to calling device_prep_slave_sg, and must
76returned in above (3) needs to be submitted. 94 keep the scatterlist mapped until the DMA operation has completed.
77To tell the dma driver that a transaction is ready to be serviced, the 95 The scatterlist must be mapped using the DMA struct device. So,
78descriptor->submit() callback needs to be invoked. This chains the descriptor to 96 normal setup should look like this:
79the pending queue. 97
80The transactions in the pending queue can be activated by calling the 98 nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len);
81issue_pending API. If channel is idle then the first transaction in queue is 99 if (nr_sg == 0)
82started and subsequent ones queued up. 100 /* error */
83On completion of the DMA operation the next in queue is submitted and a tasklet 101
84triggered. The tasklet would then call the client driver completion callback 102 desc = chan->device->device_prep_slave_sg(chan, sgl, nr_sg,
85routine for notification, if set. 103 direction, flags);
86Interface: 104
87void dma_async_issue_pending(struct dma_chan *chan); 105 Once a descriptor has been obtained, the callback information can be
88 106 added and the descriptor must then be submitted. Some DMA engine
89============================================================================== 107 drivers may hold a spinlock between a successful preparation and
90 108 submission so it is important that these two operations are closely
91Additional usage notes for dma driver writers 109 paired.
921/ Although DMA engine specifies that completion callback routines cannot submit 110
93any new operations, but typically for slave DMA subsequent transaction may not 111 Note:
94be available for submit prior to callback routine being called. This requirement 112 Although the async_tx API specifies that completion callback
95is not a requirement for DMA-slave devices. But they should take care to drop 113 routines cannot submit any new operations, this is not the
96the spin-lock they might be holding before calling the callback routine 114 case for slave/cyclic DMA.
115
116 For slave DMA, the subsequent transaction may not be available
117 for submission prior to callback function being invoked, so
118 slave DMA callbacks are permitted to prepare and submit a new
119 transaction.
120
121 For cyclic DMA, a callback function may wish to terminate the
122 DMA via dmaengine_terminate_all().
123
124 Therefore, it is important that DMA engine drivers drop any
125 locks before calling the callback function which may cause a
126 deadlock.
127
128 Note that callbacks will always be invoked from the DMA
129 engines tasklet, never from interrupt context.
130
1314. Submit the transaction
132
133 Once the descriptor has been prepared and the callback information
134 added, it must be placed on the DMA engine drivers pending queue.
135
136 Interface:
137 dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
138
139 This returns a cookie can be used to check the progress of DMA engine
140 activity via other DMA engine calls not covered in this document.
141
142 dmaengine_submit() will not start the DMA operation, it merely adds
143 it to the pending queue. For this, see step 5, dma_async_issue_pending.
144
1455. Issue pending DMA requests and wait for callback notification
146
147 The transactions in the pending queue can be activated by calling the
148 issue_pending API. If channel is idle then the first transaction in
149 queue is started and subsequent ones queued up.
150
151 On completion of each DMA operation, the next in queue is started and
152 a tasklet triggered. The tasklet will then call the client driver
153 completion callback routine for notification, if set.
154
155 Interface:
156 void dma_async_issue_pending(struct dma_chan *chan);
157
158Further APIs:
159
1601. int dmaengine_terminate_all(struct dma_chan *chan)
161
162 This causes all activity for the DMA channel to be stopped, and may
163 discard data in the DMA FIFO which hasn't been fully transferred.
164 No callback functions will be called for any incomplete transfers.
165
1662. int dmaengine_pause(struct dma_chan *chan)
167
168 This pauses activity on the DMA channel without data loss.
169
1703. int dmaengine_resume(struct dma_chan *chan)
171
172 Resume a previously paused DMA channel. It is invalid to resume a
173 channel which is not currently paused.
174
1754. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
176 dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
177
178 This can be used to check the status of the channel. Please see
179 the documentation in include/linux/dmaengine.h for a more complete
180 description of this API.
181
182 This can be used in conjunction with dma_async_is_complete() and
183 the cookie returned from 'descriptor->submit()' to check for
184 completion of a specific DMA transaction.
185
186 Note:
187 Not all DMA engine drivers can return reliable information for
188 a running DMA channel. It is recommended that DMA engine users
189 pause or stop (via dmaengine_terminate_all) the channel before
190 using this API.
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 7be15e44d48..82a5d250d75 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -143,8 +143,7 @@ o provide a way to configure fault attributes
143 failslab, fail_page_alloc, and fail_make_request use this way. 143 failslab, fail_page_alloc, and fail_make_request use this way.
144 Helper functions: 144 Helper functions:
145 145
146 init_fault_attr_dentries(entries, attr, name); 146 fault_create_debugfs_attr(name, parent, attr);
147 void cleanup_fault_attr_dentries(entries);
148 147
149- module parameters 148- module parameters
150 149
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ea0bace0124..c4a6e148732 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -296,15 +296,6 @@ Who: Ravikiran Thirumalai <kiran@scalex86.org>
296 296
297--------------------------- 297---------------------------
298 298
299What: CONFIG_THERMAL_HWMON
300When: January 2009
301Why: This option was introduced just to allow older lm-sensors userspace
302 to keep working over the upgrade to 2.6.26. At the scheduled time of
303 removal fixed lm-sensors (2.x or 3.x) should be readily available.
304Who: Rene Herman <rene.herman@gmail.com>
305
306---------------------------
307
308What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS 299What: Code that is now under CONFIG_WIRELESS_EXT_SYSFS
309 (in net/core/net-sysfs.c) 300 (in net/core/net-sysfs.c)
310When: After the only user (hal) has seen a release with the patches 301When: After the only user (hal) has seen a release with the patches
@@ -590,3 +581,14 @@ Why: This driver has been superseded by g_mass_storage.
590Who: Alan Stern <stern@rowland.harvard.edu> 581Who: Alan Stern <stern@rowland.harvard.edu>
591 582
592---------------------------- 583----------------------------
584
585What: threeg and interface sysfs files in /sys/devices/platform/acer-wmi
586When: 2012
587Why: In 3.0, we can now autodetect internal 3G device and already have
588 the threeg rfkill device. So, we plan to remove threeg sysfs support
589 for it's no longer necessary.
590
591 We also plan to remove interface sysfs file that exposed which ACPI-WMI
592 interface that was used by acer-wmi driver. It will replaced by
593 information log when acer-wmi initial.
594Who: Lee, Chun-Yi <jlee@novell.com>
diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt
index ace200b7c21..37c4d84a0e5 100644
--- a/Documentation/frv/booting.txt
+++ b/Documentation/frv/booting.txt
@@ -106,13 +106,20 @@ separated by spaces:
106 To use the first on-chip serial port at baud rate 115200, no parity, 8 106 To use the first on-chip serial port at baud rate 115200, no parity, 8
107 bits, and no flow control. 107 bits, and no flow control.
108 108
109 (*) root=/dev/<xxxx> 109 (*) root=<xxxx>
110 110
111 This specifies the device upon which the root filesystem resides. For 111 This specifies the device upon which the root filesystem resides. It
112 example: 112 may be specified by major and minor number, device path, or even
113 partition uuid, if supported. For example:
113 114
114 /dev/nfs NFS root filesystem 115 /dev/nfs NFS root filesystem
115 /dev/mtdblock3 Fourth RedBoot partition on the System Flash 116 /dev/mtdblock3 Fourth RedBoot partition on the System Flash
117 PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
118 first partition after the partition with the given UUID
119 253:0 Device with major 253 and minor 0
120
121 Authoritative information can be found in
122 "Documentation/kernel-parameters.txt".
116 123
117 (*) rw 124 (*) rw
118 125
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 72ba8d51dbc..845a191004b 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments
292 <mailto:buk@buks.ipn.de> 292 <mailto:buk@buks.ipn.de>
2930xA0 all linux/sdp/sdp.h Industrial Device Project 2930xA0 all linux/sdp/sdp.h Industrial Device Project
294 <mailto:kenji@bitgate.com> 294 <mailto:kenji@bitgate.com>
2950xA2 00-0F arch/tile/include/asm/hardwall.h
2950xA3 80-8F Port ACL in development: 2960xA3 80-8F Port ACL in development:
296 <mailto:tlewis@mindspring.com> 297 <mailto:tlewis@mindspring.com>
2970xA3 90-9F linux/dtlk.h 2980xA3 90-9F linux/dtlk.h
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4ca93898fbd..e279b724291 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
163 163
164 See also Documentation/power/pm.txt, pci=noacpi 164 See also Documentation/power/pm.txt, pci=noacpi
165 165
166 acpi_rsdp= [ACPI,EFI,KEXEC]
167 Pass the RSDP address to the kernel, mostly used
168 on machines running EFI runtime service to boot the
169 second kernel for kdump.
170
166 acpi_apic_instance= [ACPI, IOAPIC] 171 acpi_apic_instance= [ACPI, IOAPIC]
167 Format: <int> 172 Format: <int>
168 2: use 2nd APIC table, if available 173 2: use 2nd APIC table, if available
@@ -546,6 +551,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
546 /proc/<pid>/coredump_filter. 551 /proc/<pid>/coredump_filter.
547 See also Documentation/filesystems/proc.txt. 552 See also Documentation/filesystems/proc.txt.
548 553
554 cpuidle.off=1 [CPU_IDLE]
555 disable the cpuidle sub-system
556
549 cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver 557 cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
550 Format: 558 Format:
551 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>] 559 <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
@@ -2153,6 +2161,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2153 [HW,MOUSE] Controls Logitech smartscroll autorepeat. 2161 [HW,MOUSE] Controls Logitech smartscroll autorepeat.
2154 0 = disabled, 1 = enabled (default). 2162 0 = disabled, 1 = enabled (default).
2155 2163
2164 pstore.backend= Specify the name of the pstore backend to use
2165
2156 pt. [PARIDE] 2166 pt. [PARIDE]
2157 See Documentation/blockdev/paride.txt. 2167 See Documentation/blockdev/paride.txt.
2158 2168
@@ -2238,6 +2248,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2238 ro [KNL] Mount root device read-only on boot 2248 ro [KNL] Mount root device read-only on boot
2239 2249
2240 root= [KNL] Root filesystem 2250 root= [KNL] Root filesystem
2251 See name_to_dev_t comment in init/do_mounts.c.
2241 2252
2242 rootdelay= [KNL] Delay (in seconds) to pause before attempting to 2253 rootdelay= [KNL] Delay (in seconds) to pause before attempting to
2243 mount the root filesystem 2254 mount the root filesystem
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
index c93bed66e25..97d45f276fe 100644
--- a/Documentation/m68k/kernel-options.txt
+++ b/Documentation/m68k/kernel-options.txt
@@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
129the first of these. You can find out all valid major numbers by 129the first of these. You can find out all valid major numbers by
130looking into include/linux/major.h. 130looking into include/linux/major.h.
131 131
132In addition to major and minor numbers, if the device containing your
133root partition uses a partition table format with unique partition
134identifiers, then you may use them. For instance,
135"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also
136possible to reference another partition on the same device using a
137known partition UUID as the starting point. For example,
138if partition 5 of the device has the UUID of
13900112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
140follows:
141 PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
142
143Authoritative information can be found in
144"Documentation/kernel-parameters.txt".
145
132 146
1332.2) ro, rw 1472.2) ro, rw
134----------- 148-----------
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 675612ff41a..5dd960d7517 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -599,7 +599,7 @@ num_unsol_na
599 affect only the active-backup mode. These options were added for 599 affect only the active-backup mode. These options were added for
600 bonding versions 3.3.0 and 3.4.0 respectively. 600 bonding versions 3.3.0 and 3.4.0 respectively.
601 601
602 From Linux 2.6.40 and bonding version 3.7.1, these notifications 602 From Linux 3.0 and bonding version 3.7.1, these notifications
603 are generated by the ipv4 and ipv6 code and the numbers of 603 are generated by the ipv4 and ipv6 code and the numbers of
604 repetitions cannot be set independently. 604 repetitions cannot be set independently.
605 605
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 14dd3c6ad97..4ce5450ab6e 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -54,11 +54,10 @@ referred to as subsystem-level callbacks in what follows.
54By default, the callbacks are always invoked in process context with interrupts 54By default, the callbacks are always invoked in process context with interrupts
55enabled. However, subsystems can use the pm_runtime_irq_safe() helper function 55enabled. However, subsystems can use the pm_runtime_irq_safe() helper function
56to tell the PM core that a device's ->runtime_suspend() and ->runtime_resume() 56to tell the PM core that a device's ->runtime_suspend() and ->runtime_resume()
57callbacks should be invoked in atomic context with interrupts disabled 57callbacks should be invoked in atomic context with interrupts disabled.
58(->runtime_idle() is still invoked the default way). This implies that these 58This implies that these callback routines must not block or sleep, but it also
59callback routines must not block or sleep, but it also means that the 59means that the synchronous helper functions listed at the end of Section 4 can
60synchronous helper functions listed at the end of Section 4 can be used within 60be used within an interrupt handler or in an atomic context.
61an interrupt handler or in an atomic context.
62 61
63The subsystem-level suspend callback is _entirely_ _responsible_ for handling 62The subsystem-level suspend callback is _entirely_ _responsible_ for handling
64the suspend of the device as appropriate, which may, but need not include 63the suspend of the device as appropriate, which may, but need not include
@@ -483,6 +482,7 @@ pm_runtime_suspend()
483pm_runtime_autosuspend() 482pm_runtime_autosuspend()
484pm_runtime_resume() 483pm_runtime_resume()
485pm_runtime_get_sync() 484pm_runtime_get_sync()
485pm_runtime_put_sync()
486pm_runtime_put_sync_suspend() 486pm_runtime_put_sync_suspend()
487 487
4885. Runtime PM Initialization, Device Probing and Removal 4885. Runtime PM Initialization, Device Probing and Removal
diff --git a/MAINTAINERS b/MAINTAINERS
index c9c6324a7a9..51d42fbc8dc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2643,9 +2643,8 @@ S: Maintained
2643F: arch/x86/math-emu/ 2643F: arch/x86/math-emu/
2644 2644
2645FRAME RELAY DLCI/FRAD (Sangoma drivers too) 2645FRAME RELAY DLCI/FRAD (Sangoma drivers too)
2646M: Mike McLagan <mike.mclagan@linux.org>
2647L: netdev@vger.kernel.org 2646L: netdev@vger.kernel.org
2648S: Maintained 2647S: Orphan
2649F: drivers/net/wan/dlci.c 2648F: drivers/net/wan/dlci.c
2650F: drivers/net/wan/sdla.c 2649F: drivers/net/wan/sdla.c
2651 2650
@@ -3367,6 +3366,12 @@ F: drivers/net/ixgb/
3367F: drivers/net/ixgbe/ 3366F: drivers/net/ixgbe/
3368F: drivers/net/ixgbevf/ 3367F: drivers/net/ixgbevf/
3369 3368
3369INTEL MRST PMU DRIVER
3370M: Len Brown <len.brown@intel.com>
3371L: linux-pm@lists.linux-foundation.org
3372S: Supported
3373F: arch/x86/platform/mrst/pmu.*
3374
3370INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT 3375INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT
3371L: linux-wireless@vger.kernel.org 3376L: linux-wireless@vger.kernel.org
3372S: Orphan 3377S: Orphan
@@ -4409,10 +4414,10 @@ F: net/*/netfilter/
4409F: net/netfilter/ 4414F: net/netfilter/
4410 4415
4411NETLABEL 4416NETLABEL
4412M: Paul Moore <paul.moore@hp.com> 4417M: Paul Moore <paul@paul-moore.com>
4413W: http://netlabel.sf.net 4418W: http://netlabel.sf.net
4414L: netdev@vger.kernel.org 4419L: netdev@vger.kernel.org
4415S: Supported 4420S: Maintained
4416F: Documentation/netlabel/ 4421F: Documentation/netlabel/
4417F: include/net/netlabel.h 4422F: include/net/netlabel.h
4418F: net/netlabel/ 4423F: net/netlabel/
@@ -4457,7 +4462,6 @@ F: include/linux/netdevice.h
4457NETWORKING [IPv4/IPv6] 4462NETWORKING [IPv4/IPv6]
4458M: "David S. Miller" <davem@davemloft.net> 4463M: "David S. Miller" <davem@davemloft.net>
4459M: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 4464M: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
4460M: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
4461M: James Morris <jmorris@namei.org> 4465M: James Morris <jmorris@namei.org>
4462M: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org> 4466M: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
4463M: Patrick McHardy <kaber@trash.net> 4467M: Patrick McHardy <kaber@trash.net>
@@ -4470,7 +4474,7 @@ F: include/net/ip*
4470F: arch/x86/net/* 4474F: arch/x86/net/*
4471 4475
4472NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK) 4476NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
4473M: Paul Moore <paul.moore@hp.com> 4477M: Paul Moore <paul@paul-moore.com>
4474L: netdev@vger.kernel.org 4478L: netdev@vger.kernel.org
4475S: Maintained 4479S: Maintained
4476 4480
@@ -4722,6 +4726,7 @@ S: Maintained
4722F: drivers/of 4726F: drivers/of
4723F: include/linux/of*.h 4727F: include/linux/of*.h
4724K: of_get_property 4728K: of_get_property
4729K: of_match_table
4725 4730
4726OPENRISC ARCHITECTURE 4731OPENRISC ARCHITECTURE
4727M: Jonas Bonn <jonas@southpole.se> 4732M: Jonas Bonn <jonas@southpole.se>
@@ -6318,6 +6323,7 @@ F: include/linux/sysv_fs.h
6318TARGET SUBSYSTEM 6323TARGET SUBSYSTEM
6319M: Nicholas A. Bellinger <nab@linux-iscsi.org> 6324M: Nicholas A. Bellinger <nab@linux-iscsi.org>
6320L: linux-scsi@vger.kernel.org 6325L: linux-scsi@vger.kernel.org
6326L: target-devel@vger.kernel.org
6321L: http://groups.google.com/group/linux-iscsi-target-dev 6327L: http://groups.google.com/group/linux-iscsi-target-dev
6322W: http://www.linux-iscsi.org 6328W: http://www.linux-iscsi.org
6323T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master 6329T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master
diff --git a/Makefile b/Makefile
index f676d15cd34..b4ca4e111c9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 0 2PATCHLEVEL = 1
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = 4EXTRAVERSION = -rc1
5NAME = Sneaky Weasel 5NAME = Sneaky Weasel
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 26b0e2397a5..4b0669cbb3b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX
178config HAVE_RCU_TABLE_FREE 178config HAVE_RCU_TABLE_FREE
179 bool 179 bool
180 180
181config ARCH_HAVE_NMI_SAFE_CMPXCHG
182 bool
183
181source "kernel/gcov/Kconfig" 184source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index ca2da8da6e9..60cde53d266 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,6 +14,7 @@ config ALPHA
14 select AUTO_IRQ_AFFINITY if SMP 14 select AUTO_IRQ_AFFINITY if SMP
15 select GENERIC_IRQ_SHOW 15 select GENERIC_IRQ_SHOW
16 select ARCH_WANT_OPTIONAL_GPIOLIB 16 select ARCH_WANT_OPTIONAL_GPIOLIB
17 select ARCH_HAVE_NMI_SAFE_CMPXCHG
17 help 18 help
18 The Alpha is a 64-bit general-purpose processor designed and 19 The Alpha is a 64-bit general-purpose processor designed and
19 marketed by the Digital Equipment Corporation of blessed memory, 20 marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index acca35aebe2..aeef960ff79 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -112,9 +112,6 @@ EXPORT_SYMBOL(__put_user_4);
112EXPORT_SYMBOL(__put_user_8); 112EXPORT_SYMBOL(__put_user_8);
113#endif 113#endif
114 114
115 /* crypto hash */
116EXPORT_SYMBOL(sha_transform);
117
118 /* gcc lib functions */ 115 /* gcc lib functions */
119EXPORT_SYMBOL(__ashldi3); 116EXPORT_SYMBOL(__ashldi3);
120EXPORT_SYMBOL(__ashrdi3); 117EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 5e1e5419722..1a347f481e5 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -30,6 +30,7 @@
30#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/random.h> 31#include <linux/random.h>
32#include <linux/hw_breakpoint.h> 32#include <linux/hw_breakpoint.h>
33#include <linux/cpuidle.h>
33 34
34#include <asm/cacheflush.h> 35#include <asm/cacheflush.h>
35#include <asm/leds.h> 36#include <asm/leds.h>
@@ -196,7 +197,8 @@ void cpu_idle(void)
196 cpu_relax(); 197 cpu_relax();
197 } else { 198 } else {
198 stop_critical_timings(); 199 stop_critical_timings();
199 pm_idle(); 200 if (cpuidle_idle_call())
201 pm_idle();
200 start_critical_timings(); 202 start_critical_timings();
201 /* 203 /*
202 * This will eventually be removed - pm_idle 204 * This will eventually be removed - pm_idle
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 59ff42ddf0a..cf73a7f742d 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
12 strchr.o strrchr.o \ 12 strchr.o strrchr.o \
13 testchangebit.o testclearbit.o testsetbit.o \ 13 testchangebit.o testclearbit.o testsetbit.o \
14 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ 14 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
15 ucmpdi2.o lib1funcs.o div64.o sha1.o \ 15 ucmpdi2.o lib1funcs.o div64.o \
16 io-readsb.o io-writesb.o io-readsl.o io-writesl.o 16 io-readsb.o io-writesb.o io-readsl.o io-writesl.o
17 17
18mmu-y := clear_user.o copy_page.o getuser.o putuser.o 18mmu-y := clear_user.o copy_page.o getuser.o putuser.o
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
deleted file mode 100644
index eb0edb80d7b..00000000000
--- a/arch/arm/lib/sha1.S
+++ /dev/null
@@ -1,211 +0,0 @@
1/*
2 * linux/arch/arm/lib/sha1.S
3 *
4 * SHA transform optimized for ARM
5 *
6 * Copyright: (C) 2005 by Nicolas Pitre <nico@fluxnic.net>
7 * Created: September 17, 2005
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * The reference implementation for this code is linux/lib/sha1.c
14 */
15
16#include <linux/linkage.h>
17
18 .text
19
20
21/*
22 * void sha_transform(__u32 *digest, const char *in, __u32 *W)
23 *
24 * Note: the "in" ptr may be unaligned.
25 */
26
27ENTRY(sha_transform)
28
29 stmfd sp!, {r4 - r8, lr}
30
31 @ for (i = 0; i < 16; i++)
32 @ W[i] = be32_to_cpu(in[i]);
33
34#ifdef __ARMEB__
35 mov r4, r0
36 mov r0, r2
37 mov r2, #64
38 bl memcpy
39 mov r2, r0
40 mov r0, r4
41#else
42 mov r3, r2
43 mov lr, #16
441: ldrb r4, [r1], #1
45 ldrb r5, [r1], #1
46 ldrb r6, [r1], #1
47 ldrb r7, [r1], #1
48 subs lr, lr, #1
49 orr r5, r5, r4, lsl #8
50 orr r6, r6, r5, lsl #8
51 orr r7, r7, r6, lsl #8
52 str r7, [r3], #4
53 bne 1b
54#endif
55
56 @ for (i = 0; i < 64; i++)
57 @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
58
59 sub r3, r2, #4
60 mov lr, #64
612: ldr r4, [r3, #4]!
62 subs lr, lr, #1
63 ldr r5, [r3, #8]
64 ldr r6, [r3, #32]
65 ldr r7, [r3, #52]
66 eor r4, r4, r5
67 eor r4, r4, r6
68 eor r4, r4, r7
69 mov r4, r4, ror #31
70 str r4, [r3, #64]
71 bne 2b
72
73 /*
74 * The SHA functions are:
75 *
76 * f1(B,C,D) = (D ^ (B & (C ^ D)))
77 * f2(B,C,D) = (B ^ C ^ D)
78 * f3(B,C,D) = ((B & C) | (D & (B | C)))
79 *
80 * Then the sub-blocks are processed as follows:
81 *
82 * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
83 * B' = A
84 * C' = ror(B, 2)
85 * D' = C
86 * E' = D
87 *
88 * We therefore unroll each loop 5 times to avoid register shuffling.
89 * Also the ror for C (and also D and E which are successivelyderived
90 * from it) is applied in place to cut on an additional mov insn for
91 * each round.
92 */
93
94 .macro sha_f1, A, B, C, D, E
95 ldr r3, [r2], #4
96 eor ip, \C, \D
97 add \E, r1, \E, ror #2
98 and ip, \B, ip, ror #2
99 add \E, \E, \A, ror #27
100 eor ip, ip, \D, ror #2
101 add \E, \E, r3
102 add \E, \E, ip
103 .endm
104
105 .macro sha_f2, A, B, C, D, E
106 ldr r3, [r2], #4
107 add \E, r1, \E, ror #2
108 eor ip, \B, \C, ror #2
109 add \E, \E, \A, ror #27
110 eor ip, ip, \D, ror #2
111 add \E, \E, r3
112 add \E, \E, ip
113 .endm
114
115 .macro sha_f3, A, B, C, D, E
116 ldr r3, [r2], #4
117 add \E, r1, \E, ror #2
118 orr ip, \B, \C, ror #2
119 add \E, \E, \A, ror #27
120 and ip, ip, \D, ror #2
121 add \E, \E, r3
122 and r3, \B, \C, ror #2
123 orr ip, ip, r3
124 add \E, \E, ip
125 .endm
126
127 ldmia r0, {r4 - r8}
128
129 mov lr, #4
130 ldr r1, .L_sha_K + 0
131
132 /* adjust initial values */
133 mov r6, r6, ror #30
134 mov r7, r7, ror #30
135 mov r8, r8, ror #30
136
1373: subs lr, lr, #1
138 sha_f1 r4, r5, r6, r7, r8
139 sha_f1 r8, r4, r5, r6, r7
140 sha_f1 r7, r8, r4, r5, r6
141 sha_f1 r6, r7, r8, r4, r5
142 sha_f1 r5, r6, r7, r8, r4
143 bne 3b
144
145 ldr r1, .L_sha_K + 4
146 mov lr, #4
147
1484: subs lr, lr, #1
149 sha_f2 r4, r5, r6, r7, r8
150 sha_f2 r8, r4, r5, r6, r7
151 sha_f2 r7, r8, r4, r5, r6
152 sha_f2 r6, r7, r8, r4, r5
153 sha_f2 r5, r6, r7, r8, r4
154 bne 4b
155
156 ldr r1, .L_sha_K + 8
157 mov lr, #4
158
1595: subs lr, lr, #1
160 sha_f3 r4, r5, r6, r7, r8
161 sha_f3 r8, r4, r5, r6, r7
162 sha_f3 r7, r8, r4, r5, r6
163 sha_f3 r6, r7, r8, r4, r5
164 sha_f3 r5, r6, r7, r8, r4
165 bne 5b
166
167 ldr r1, .L_sha_K + 12
168 mov lr, #4
169
1706: subs lr, lr, #1
171 sha_f2 r4, r5, r6, r7, r8
172 sha_f2 r8, r4, r5, r6, r7
173 sha_f2 r7, r8, r4, r5, r6
174 sha_f2 r6, r7, r8, r4, r5
175 sha_f2 r5, r6, r7, r8, r4
176 bne 6b
177
178 ldmia r0, {r1, r2, r3, ip, lr}
179 add r4, r1, r4
180 add r5, r2, r5
181 add r6, r3, r6, ror #2
182 add r7, ip, r7, ror #2
183 add r8, lr, r8, ror #2
184 stmia r0, {r4 - r8}
185
186 ldmfd sp!, {r4 - r8, pc}
187
188ENDPROC(sha_transform)
189
190 .align 2
191.L_sha_K:
192 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
193
194
195/*
196 * void sha_init(__u32 *buf)
197 */
198
199 .align 2
200.L_sha_initial_digest:
201 .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
202
203ENTRY(sha_init)
204
205 str lr, [sp, #-4]!
206 adr r1, .L_sha_initial_digest
207 ldmia r1, {r1, r2, r3, ip, lr}
208 stmia r0, {r1, r2, r3, ip, lr}
209 ldr pc, [sp], #4
210
211ENDPROC(sha_init)
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e9d689b7c83..197e96f7040 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -10,6 +10,7 @@ config AVR32
10 select GENERIC_IRQ_PROBE 10 select GENERIC_IRQ_PROBE
11 select HARDIRQS_SW_RESEND 11 select HARDIRQS_SW_RESEND
12 select GENERIC_IRQ_SHOW 12 select GENERIC_IRQ_SHOW
13 select ARCH_HAVE_NMI_SAFE_CMPXCHG
13 help 14 help
14 AVR32 is a high-performance 32-bit RISC microprocessor core, 15 AVR32 is a high-performance 32-bit RISC microprocessor core,
15 designed for cost-sensitive embedded applications, with particular 16 designed for cost-sensitive embedded applications, with particular
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 85026537361..466af40c582 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file);
158static int sync_serial_release(struct inode *inode, struct file *file); 158static int sync_serial_release(struct inode *inode, struct file *file);
159static unsigned int sync_serial_poll(struct file *filp, poll_table *wait); 159static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
160 160
161static int sync_serial_ioctl(struct file *file, 161static long sync_serial_ioctl(struct file *file,
162 unsigned int cmd, unsigned long arg); 162 unsigned int cmd, unsigned long arg);
163static ssize_t sync_serial_write(struct file *file, const char *buf, 163static ssize_t sync_serial_write(struct file *file, const char *buf,
164 size_t count, loff_t *ppos); 164 size_t count, loff_t *ppos);
@@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file)
625 *R_IRQ_MASK1_SET = 1 << port->data_avail_bit; 625 *R_IRQ_MASK1_SET = 1 << port->data_avail_bit;
626 DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev)); 626 DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev));
627 } 627 }
628 ret = 0; 628 err = 0;
629 629
630out: 630out:
631 mutex_unlock(&sync_serial_mutex); 631 mutex_unlock(&sync_serial_mutex);
632 return ret; 632 return err;
633} 633}
634 634
635static int sync_serial_release(struct inode *inode, struct file *file) 635static int sync_serial_release(struct inode *inode, struct file *file)
diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c
index 907cfb5a873..ba0e5965d6e 100644
--- a/arch/cris/arch-v10/kernel/irq.c
+++ b/arch/cris/arch-v10/kernel/irq.c
@@ -20,6 +20,9 @@
20#define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr)); 20#define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr));
21#define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr)); 21#define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr));
22 22
23extern void kgdb_init(void);
24extern void breakpoint(void);
25
23/* don't use set_int_vector, it bypasses the linux interrupt handlers. it is 26/* don't use set_int_vector, it bypasses the linux interrupt handlers. it is
24 * global just so that the kernel gdb can use it. 27 * global just so that the kernel gdb can use it.
25 */ 28 */
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 29b74a10583..332f19c5455 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -11,8 +11,6 @@
11 11
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13 13
14#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
15
16#ifndef __ASSEMBLY__ 14#ifndef __ASSEMBLY__
17#include <asm/types.h> 15#include <asm/types.h>
18#include <asm/processor.h> 16#include <asm/processor.h>
@@ -67,8 +65,10 @@ struct thread_info {
67 65
68#define init_thread_info (init_thread_union.thread_info) 66#define init_thread_info (init_thread_union.thread_info)
69 67
68#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
70/* thread information allocation */ 69/* thread information allocation */
71#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) 70#define alloc_thread_info_node(tsk, node) \
71 ((struct thread_info *) __get_free_pages(GFP_KERNEL, 1))
72#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) 72#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
73 73
74#endif /* !__ASSEMBLY__ */ 74#endif /* !__ASSEMBLY__ */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index cb884e48942..bad27a6ff40 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
7 select HAVE_PERF_EVENTS 7 select HAVE_PERF_EVENTS
8 select HAVE_GENERIC_HARDIRQS 8 select HAVE_GENERIC_HARDIRQS
9 select GENERIC_IRQ_SHOW 9 select GENERIC_IRQ_SHOW
10 select ARCH_HAVE_NMI_SAFE_CMPXCHG
10 11
11config ZONE_DMA 12config ZONE_DMA
12 bool 13 bool
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 137b277f7e5..12485471495 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -27,6 +27,8 @@ config IA64
27 select GENERIC_PENDING_IRQ if SMP 27 select GENERIC_PENDING_IRQ if SMP
28 select IRQ_PER_CPU 28 select IRQ_PER_CPU
29 select GENERIC_IRQ_SHOW 29 select GENERIC_IRQ_SHOW
30 select ARCH_WANT_OPTIONAL_GPIOLIB
31 select ARCH_HAVE_NMI_SAFE_CMPXCHG
30 default y 32 default y
31 help 33 help
32 The Itanium Processor Family is Intel's 64-bit successor to 34 The Itanium Processor Family is Intel's 64-bit successor to
@@ -89,6 +91,9 @@ config GENERIC_TIME_VSYSCALL
89config HAVE_SETUP_PER_CPU_AREA 91config HAVE_SETUP_PER_CPU_AREA
90 def_bool y 92 def_bool y
91 93
94config GENERIC_GPIO
95 def_bool y
96
92config DMI 97config DMI
93 bool 98 bool
94 default y 99 default y
diff --git a/arch/ia64/include/asm/gpio.h b/arch/ia64/include/asm/gpio.h
new file mode 100644
index 00000000000..590a20debc4
--- /dev/null
+++ b/arch/ia64/include/asm/gpio.h
@@ -0,0 +1,55 @@
1/*
2 * Generic GPIO API implementation for IA-64.
3 *
4 * A stright copy of that for PowerPC which was:
5 *
6 * Copyright (c) 2007-2008 MontaVista Software, Inc.
7 *
8 * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 */
15
16#ifndef _ASM_IA64_GPIO_H
17#define _ASM_IA64_GPIO_H
18
19#include <linux/errno.h>
20#include <asm-generic/gpio.h>
21
22#ifdef CONFIG_GPIOLIB
23
24/*
25 * We don't (yet) implement inlined/rapid versions for on-chip gpios.
26 * Just call gpiolib.
27 */
28static inline int gpio_get_value(unsigned int gpio)
29{
30 return __gpio_get_value(gpio);
31}
32
33static inline void gpio_set_value(unsigned int gpio, int value)
34{
35 __gpio_set_value(gpio, value);
36}
37
38static inline int gpio_cansleep(unsigned int gpio)
39{
40 return __gpio_cansleep(gpio);
41}
42
43static inline int gpio_to_irq(unsigned int gpio)
44{
45 return __gpio_to_irq(gpio);
46}
47
48static inline int irq_to_gpio(unsigned int irq)
49{
50 return -EINVAL;
51}
52
53#endif /* CONFIG_GPIOLIB */
54
55#endif /* _ASM_IA64_GPIO_H */
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 6fc03aff046..c38d22e5e90 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -156,7 +156,7 @@ prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \
156#define STUB_SET_VARIABLE(prefix, adjust_arg) \ 156#define STUB_SET_VARIABLE(prefix, adjust_arg) \
157static efi_status_t \ 157static efi_status_t \
158prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ 158prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \
159 unsigned long attr, unsigned long data_size, \ 159 u32 attr, unsigned long data_size, \
160 void *data) \ 160 void *data) \
161{ \ 161{ \
162 struct ia64_fpreg fr[6]; \ 162 struct ia64_fpreg fr[6]; \
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 284cd3771ea..9e8ee9d2b8c 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -6,6 +6,7 @@ config M68K
6 select GENERIC_ATOMIC64 if MMU 6 select GENERIC_ATOMIC64 if MMU
7 select HAVE_GENERIC_HARDIRQS if !MMU 7 select HAVE_GENERIC_HARDIRQS if !MMU
8 select GENERIC_IRQ_SHOW if !MMU 8 select GENERIC_IRQ_SHOW if !MMU
9 select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
9 10
10config RWSEM_GENERIC_SPINLOCK 11config RWSEM_GENERIC_SPINLOCK
11 bool 12 bool
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 65adc86a230..e077b0bf56c 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -15,6 +15,7 @@ config PARISC
15 select HAVE_GENERIC_HARDIRQS 15 select HAVE_GENERIC_HARDIRQS
16 select GENERIC_IRQ_PROBE 16 select GENERIC_IRQ_PROBE
17 select IRQ_PER_CPU 17 select IRQ_PER_CPU
18 select ARCH_HAVE_NMI_SAFE_CMPXCHG
18 19
19 help 20 help
20 The PA-RISC microprocessor is designed by Hewlett-Packard and used 21 The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index b1dc71f5534..4054b31e0fa 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -258,10 +258,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
258 258
259#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) 259#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
260 260
261static __inline__ int 261static __inline__ s64
262__atomic64_add_return(s64 i, atomic64_t *v) 262__atomic64_add_return(s64 i, atomic64_t *v)
263{ 263{
264 int ret; 264 s64 ret;
265 unsigned long flags; 265 unsigned long flags;
266 _atomic_spin_lock_irqsave(v, flags); 266 _atomic_spin_lock_irqsave(v, flags);
267 267
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 67a33cc27ef..2388bdb3283 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -5,11 +5,14 @@
5 5
6#include <linux/futex.h> 6#include <linux/futex.h>
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <asm/atomic.h>
8#include <asm/errno.h> 9#include <asm/errno.h>
9 10
10static inline int 11static inline int
11futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) 12futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
12{ 13{
14 unsigned long int flags;
15 u32 val;
13 int op = (encoded_op >> 28) & 7; 16 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15; 17 int cmp = (encoded_op >> 24) & 15;
15 int oparg = (encoded_op << 8) >> 20; 18 int oparg = (encoded_op << 8) >> 20;
@@ -18,21 +21,58 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 22 oparg = 1 << oparg;
20 23
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) 24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)))
22 return -EFAULT; 25 return -EFAULT;
23 26
24 pagefault_disable(); 27 pagefault_disable();
25 28
29 _atomic_spin_lock_irqsave(uaddr, flags);
30
26 switch (op) { 31 switch (op) {
27 case FUTEX_OP_SET: 32 case FUTEX_OP_SET:
33 /* *(int *)UADDR2 = OPARG; */
34 ret = get_user(oldval, uaddr);
35 if (!ret)
36 ret = put_user(oparg, uaddr);
37 break;
28 case FUTEX_OP_ADD: 38 case FUTEX_OP_ADD:
39 /* *(int *)UADDR2 += OPARG; */
40 ret = get_user(oldval, uaddr);
41 if (!ret) {
42 val = oldval + oparg;
43 ret = put_user(val, uaddr);
44 }
45 break;
29 case FUTEX_OP_OR: 46 case FUTEX_OP_OR:
47 /* *(int *)UADDR2 |= OPARG; */
48 ret = get_user(oldval, uaddr);
49 if (!ret) {
50 val = oldval | oparg;
51 ret = put_user(val, uaddr);
52 }
53 break;
30 case FUTEX_OP_ANDN: 54 case FUTEX_OP_ANDN:
55 /* *(int *)UADDR2 &= ~OPARG; */
56 ret = get_user(oldval, uaddr);
57 if (!ret) {
58 val = oldval & ~oparg;
59 ret = put_user(val, uaddr);
60 }
61 break;
31 case FUTEX_OP_XOR: 62 case FUTEX_OP_XOR:
63 /* *(int *)UADDR2 ^= OPARG; */
64 ret = get_user(oldval, uaddr);
65 if (!ret) {
66 val = oldval ^ oparg;
67 ret = put_user(val, uaddr);
68 }
69 break;
32 default: 70 default:
33 ret = -ENOSYS; 71 ret = -ENOSYS;
34 } 72 }
35 73
74 _atomic_spin_unlock_irqrestore(uaddr, flags);
75
36 pagefault_enable(); 76 pagefault_enable();
37 77
38 if (!ret) { 78 if (!ret) {
@@ -54,7 +94,9 @@ static inline int
54futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, 94futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
55 u32 oldval, u32 newval) 95 u32 oldval, u32 newval)
56{ 96{
97 int ret;
57 u32 val; 98 u32 val;
99 unsigned long flags;
58 100
59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is 101 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
60 * our gateway page, and causes no end of trouble... 102 * our gateway page, and causes no end of trouble...
@@ -65,12 +107,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) 107 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
66 return -EFAULT; 108 return -EFAULT;
67 109
68 if (get_user(val, uaddr)) 110 /* HPPA has no cmpxchg in hardware and therefore the
69 return -EFAULT; 111 * best we can do here is use an array of locks. The
70 if (val == oldval && put_user(newval, uaddr)) 112 * lock selected is based on a hash of the userspace
71 return -EFAULT; 113 * address. This should scale to a couple of CPUs.
114 */
115
116 _atomic_spin_lock_irqsave(uaddr, flags);
117
118 ret = get_user(val, uaddr);
119
120 if (!ret && val == oldval)
121 ret = put_user(newval, uaddr);
122
72 *uval = val; 123 *uval = val;
73 return 0; 124
125 _atomic_spin_unlock_irqrestore(uaddr, flags);
126
127 return ret;
74} 128}
75 129
76#endif /*__KERNEL__*/ 130#endif /*__KERNEL__*/
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3392de3e7be..d61de64f990 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -821,8 +821,9 @@
821#define __NR_open_by_handle_at (__NR_Linux + 326) 821#define __NR_open_by_handle_at (__NR_Linux + 326)
822#define __NR_syncfs (__NR_Linux + 327) 822#define __NR_syncfs (__NR_Linux + 327)
823#define __NR_setns (__NR_Linux + 328) 823#define __NR_setns (__NR_Linux + 328)
824#define __NR_sendmmsg (__NR_Linux + 329)
824 825
825#define __NR_Linux_syscalls (__NR_setns + 1) 826#define __NR_Linux_syscalls (__NR_sendmmsg + 1)
826 827
827 828
828#define __IGNORE_select /* newselect */ 829#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 34a4f5a2fff..e66366fd2ab 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -427,6 +427,7 @@
427 ENTRY_COMP(open_by_handle_at) 427 ENTRY_COMP(open_by_handle_at)
428 ENTRY_SAME(syncfs) 428 ENTRY_SAME(syncfs)
429 ENTRY_SAME(setns) 429 ENTRY_SAME(setns)
430 ENTRY_COMP(sendmmsg)
430 431
431 /* Nothing yet */ 432 /* Nothing yet */
432 433
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 374c475e56a..6926b61acfe 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -136,6 +136,7 @@ config PPC
136 select HAVE_SYSCALL_TRACEPOINTS 136 select HAVE_SYSCALL_TRACEPOINTS
137 select HAVE_BPF_JIT if (PPC64 && NET) 137 select HAVE_BPF_JIT if (PPC64 && NET)
138 select HAVE_ARCH_JUMP_LABEL 138 select HAVE_ARCH_JUMP_LABEL
139 select ARCH_HAVE_NMI_SAFE_CMPXCHG
139 140
140config EARLY_PRINTK 141config EARLY_PRINTK
141 bool 142 bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c03fef7a9c2..ed5cb5af528 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -81,6 +81,7 @@ config S390
81 select INIT_ALL_POSSIBLE 81 select INIT_ALL_POSSIBLE
82 select HAVE_IRQ_WORK 82 select HAVE_IRQ_WORK
83 select HAVE_PERF_EVENTS 83 select HAVE_PERF_EVENTS
84 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select HAVE_KERNEL_GZIP 85 select HAVE_KERNEL_GZIP
85 select HAVE_KERNEL_BZIP2 86 select HAVE_KERNEL_BZIP2
86 select HAVE_KERNEL_LZMA 87 select HAVE_KERNEL_LZMA
@@ -273,11 +274,11 @@ config MARCH_Z10
273 on older machines. 274 on older machines.
274 275
275config MARCH_Z196 276config MARCH_Z196
276 bool "IBM zEnterprise 196" 277 bool "IBM zEnterprise 114 and 196"
277 help 278 help
278 Select this to enable optimizations for IBM zEnterprise 196 279 Select this to enable optimizations for IBM zEnterprise 114 and 196
279 (2817 series). The kernel will be slightly faster but will not work 280 (2818 and 2817 series). The kernel will be slightly faster but will
280 on older machines. 281 not work on older machines.
281 282
282endchoice 283endchoice
283 284
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 5e95d95450b..97cc4403fab 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -167,5 +167,6 @@ enum diag308_rc {
167}; 167};
168 168
169extern int diag308(unsigned long subcode, void *addr); 169extern int diag308(unsigned long subcode, void *addr);
170extern void diag308_reset(void);
170 171
171#endif /* _ASM_S390_IPL_H */ 172#endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index f26280d9e88..e85c911aabf 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -18,6 +18,7 @@ void system_call(void);
18void pgm_check_handler(void); 18void pgm_check_handler(void);
19void mcck_int_handler(void); 19void mcck_int_handler(void);
20void io_int_handler(void); 20void io_int_handler(void);
21void psw_restart_int_handler(void);
21 22
22#ifdef CONFIG_32BIT 23#ifdef CONFIG_32BIT
23 24
@@ -150,7 +151,10 @@ struct _lowcore {
150 */ 151 */
151 __u32 ipib; /* 0x0e00 */ 152 __u32 ipib; /* 0x0e00 */
152 __u32 ipib_checksum; /* 0x0e04 */ 153 __u32 ipib_checksum; /* 0x0e04 */
153 __u8 pad_0x0e08[0x0f00-0x0e08]; /* 0x0e08 */ 154
155 /* 64 bit save area */
156 __u64 save_area_64; /* 0x0e08 */
157 __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */
154 158
155 /* Extended facility list */ 159 /* Extended facility list */
156 __u64 stfle_fac_list[32]; /* 0x0f00 */ 160 __u64 stfle_fac_list[32]; /* 0x0f00 */
@@ -286,7 +290,10 @@ struct _lowcore {
286 */ 290 */
287 __u64 ipib; /* 0x0e00 */ 291 __u64 ipib; /* 0x0e00 */
288 __u32 ipib_checksum; /* 0x0e08 */ 292 __u32 ipib_checksum; /* 0x0e08 */
289 __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ 293
294 /* 64 bit save area */
295 __u64 save_area_64; /* 0x0e0c */
296 __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */
290 297
291 /* Extended facility list */ 298 /* Extended facility list */
292 __u64 stfle_fac_list[32]; /* 0x0f00 */ 299 __u64 stfle_fac_list[32]; /* 0x0f00 */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 55dfcc8bdc0..a4b6229e5d4 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -119,14 +119,12 @@ struct stack_frame {
119 * Do necessary setup to start up a new thread. 119 * Do necessary setup to start up a new thread.
120 */ 120 */
121#define start_thread(regs, new_psw, new_stackp) do { \ 121#define start_thread(regs, new_psw, new_stackp) do { \
122 set_fs(USER_DS); \
123 regs->psw.mask = psw_user_bits; \ 122 regs->psw.mask = psw_user_bits; \
124 regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ 123 regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
125 regs->gprs[15] = new_stackp; \ 124 regs->gprs[15] = new_stackp; \
126} while (0) 125} while (0)
127 126
128#define start_thread31(regs, new_psw, new_stackp) do { \ 127#define start_thread31(regs, new_psw, new_stackp) do { \
129 set_fs(USER_DS); \
130 regs->psw.mask = psw_user32_bits; \ 128 regs->psw.mask = psw_user32_bits; \
131 regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ 129 regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
132 regs->gprs[15] = new_stackp; \ 130 regs->gprs[15] = new_stackp; \
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index d382629a017..6582f69f238 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -113,6 +113,7 @@ extern void pfault_fini(void);
113 113
114extern void cmma_init(void); 114extern void cmma_init(void);
115extern int memcpy_real(void *, void *, size_t); 115extern int memcpy_real(void *, void *, size_t);
116extern void copy_to_absolute_zero(void *dest, void *src, size_t count);
116 117
117#define finish_arch_switch(prev) do { \ 118#define finish_arch_switch(prev) do { \
118 set_fs(current->thread.mm_segment); \ 119 set_fs(current->thread.mm_segment); \
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 05d8f38734e..532fd432215 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -27,12 +27,9 @@ int main(void)
27 BLANK(); 27 BLANK();
28 DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); 28 DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
29 BLANK(); 29 BLANK();
30 DEFINE(__THREAD_per_cause, 30 DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
31 offsetof(struct task_struct, thread.per_event.cause)); 31 DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
32 DEFINE(__THREAD_per_address, 32 DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
33 offsetof(struct task_struct, thread.per_event.address));
34 DEFINE(__THREAD_per_paid,
35 offsetof(struct task_struct, thread.per_event.paid));
36 BLANK(); 33 BLANK();
37 DEFINE(__TI_task, offsetof(struct thread_info, task)); 34 DEFINE(__TI_task, offsetof(struct thread_info, task));
38 DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); 35 DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
@@ -142,6 +139,7 @@ int main(void)
142 DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); 139 DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
143 DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); 140 DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
144 DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); 141 DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
142 DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64));
145#ifdef CONFIG_32BIT 143#ifdef CONFIG_32BIT
146 DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); 144 DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
147#else /* CONFIG_32BIT */ 145#else /* CONFIG_32BIT */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index 209938c1dfc..255435663bf 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -76,6 +76,42 @@ s390_base_pgm_handler_fn:
76 .quad 0 76 .quad 0
77 .previous 77 .previous
78 78
79#
80# Calls diag 308 subcode 1 and continues execution
81#
82# The following conditions must be ensured before calling this function:
83# * Prefix register = 0
84# * Lowcore protection is disabled
85#
86ENTRY(diag308_reset)
87 larl %r4,.Lctlregs # Save control registers
88 stctg %c0,%c15,0(%r4)
89 larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
90 lghi %r3,0
91 lg %r4,0(%r4) # Save PSW
92 sturg %r4,%r3 # Use sturg, because of large pages
93 lghi %r1,1
94 diag %r1,%r1,0x308
95.Lrestart_part2:
96 lhi %r0,0 # Load r0 with zero
97 lhi %r1,2 # Use mode 2 = ESAME (dump)
98 sigp %r1,%r0,0x12 # Switch to ESAME mode
99 sam64 # Switch to 64 bit addressing mode
100 larl %r4,.Lctlregs # Restore control registers
101 lctlg %c0,%c15,0(%r4)
102 br %r14
103.align 16
104.Lrestart_psw:
105 .long 0x00080000,0x80000000 + .Lrestart_part2
106
107 .section .bss
108.align 8
109.Lctlregs:
110 .rept 16
111 .quad 0
112 .endr
113 .previous
114
79#else /* CONFIG_64BIT */ 115#else /* CONFIG_64BIT */
80 116
81ENTRY(s390_base_mcck_handler) 117ENTRY(s390_base_mcck_handler)
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index eee999853a7..a9a285b8c4a 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -380,20 +380,13 @@ asmlinkage long sys32_sigreturn(void)
380 goto badframe; 380 goto badframe;
381 if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) 381 if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
382 goto badframe; 382 goto badframe;
383
384 sigdelsetmask(&set, ~_BLOCKABLE); 383 sigdelsetmask(&set, ~_BLOCKABLE);
385 spin_lock_irq(&current->sighand->siglock); 384 set_current_blocked(&set);
386 current->blocked = set;
387 recalc_sigpending();
388 spin_unlock_irq(&current->sighand->siglock);
389
390 if (restore_sigregs32(regs, &frame->sregs)) 385 if (restore_sigregs32(regs, &frame->sregs))
391 goto badframe; 386 goto badframe;
392 if (restore_sigregs_gprs_high(regs, frame->gprs_high)) 387 if (restore_sigregs_gprs_high(regs, frame->gprs_high))
393 goto badframe; 388 goto badframe;
394
395 return regs->gprs[2]; 389 return regs->gprs[2];
396
397badframe: 390badframe:
398 force_sig(SIGSEGV, current); 391 force_sig(SIGSEGV, current);
399 return 0; 392 return 0;
@@ -413,31 +406,22 @@ asmlinkage long sys32_rt_sigreturn(void)
413 goto badframe; 406 goto badframe;
414 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) 407 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
415 goto badframe; 408 goto badframe;
416
417 sigdelsetmask(&set, ~_BLOCKABLE); 409 sigdelsetmask(&set, ~_BLOCKABLE);
418 spin_lock_irq(&current->sighand->siglock); 410 set_current_blocked(&set);
419 current->blocked = set;
420 recalc_sigpending();
421 spin_unlock_irq(&current->sighand->siglock);
422
423 if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) 411 if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
424 goto badframe; 412 goto badframe;
425 if (restore_sigregs_gprs_high(regs, frame->gprs_high)) 413 if (restore_sigregs_gprs_high(regs, frame->gprs_high))
426 goto badframe; 414 goto badframe;
427
428 err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); 415 err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp);
429 st.ss_sp = compat_ptr(ss_sp); 416 st.ss_sp = compat_ptr(ss_sp);
430 err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); 417 err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size);
431 err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); 418 err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags);
432 if (err) 419 if (err)
433 goto badframe; 420 goto badframe;
434
435 set_fs (KERNEL_DS); 421 set_fs (KERNEL_DS);
436 do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]); 422 do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]);
437 set_fs (old_fs); 423 set_fs (old_fs);
438
439 return regs->gprs[2]; 424 return regs->gprs[2];
440
441badframe: 425badframe:
442 force_sig(SIGSEGV, current); 426 force_sig(SIGSEGV, current);
443 return 0; 427 return 0;
@@ -605,10 +589,10 @@ give_sigsegv:
605 * OK, we're invoking a handler 589 * OK, we're invoking a handler
606 */ 590 */
607 591
608int 592int handle_signal32(unsigned long sig, struct k_sigaction *ka,
609handle_signal32(unsigned long sig, struct k_sigaction *ka, 593 siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
610 siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
611{ 594{
595 sigset_t blocked;
612 int ret; 596 int ret;
613 597
614 /* Set up the stack frame */ 598 /* Set up the stack frame */
@@ -616,15 +600,12 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka,
616 ret = setup_rt_frame32(sig, ka, info, oldset, regs); 600 ret = setup_rt_frame32(sig, ka, info, oldset, regs);
617 else 601 else
618 ret = setup_frame32(sig, ka, oldset, regs); 602 ret = setup_frame32(sig, ka, oldset, regs);
619 603 if (ret)
620 if (ret == 0) { 604 return ret;
621 spin_lock_irq(&current->sighand->siglock); 605 sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
622 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 606 if (!(ka->sa.sa_flags & SA_NODEFER))
623 if (!(ka->sa.sa_flags & SA_NODEFER)) 607 sigaddset(&blocked, sig);
624 sigaddset(&current->blocked,sig); 608 set_current_blocked(&blocked);
625 recalc_sigpending(); 609 return 0;
626 spin_unlock_irq(&current->sighand->siglock);
627 }
628 return ret;
629} 610}
630 611
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3eab7cfab07..02ec8fe7d03 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -849,6 +849,34 @@ restart_crash:
849restart_go: 849restart_go:
850#endif 850#endif
851 851
852#
853# PSW restart interrupt handler
854#
855ENTRY(psw_restart_int_handler)
856 st %r15,__LC_SAVE_AREA_64(%r0) # save r15
857 basr %r15,0
8580: l %r15,.Lrestart_stack-0b(%r15) # load restart stack
859 l %r15,0(%r15)
860 ahi %r15,-SP_SIZE # make room for pt_regs
861 stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack
862 mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack
863 mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw
864 xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0
865 basr %r14,0
8661: l %r14,.Ldo_restart-1b(%r14)
867 basr %r14,%r14
868
869 basr %r14,0 # load disabled wait PSW if
8702: lpsw restart_psw_crash-2b(%r14) # do_restart returns
871 .align 4
872.Ldo_restart:
873 .long do_restart
874.Lrestart_stack:
875 .long restart_stack
876 .align 8
877restart_psw_crash:
878 .long 0x000a0000,0x00000000 + restart_psw_crash
879
852 .section .kprobes.text, "ax" 880 .section .kprobes.text, "ax"
853 881
854#ifdef CONFIG_CHECK_STACK 882#ifdef CONFIG_CHECK_STACK
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 7a0fd426ca9..5f729d627ce 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -865,6 +865,26 @@ restart_crash:
865restart_go: 865restart_go:
866#endif 866#endif
867 867
868#
869# PSW restart interrupt handler
870#
871ENTRY(psw_restart_int_handler)
872 stg %r15,__LC_SAVE_AREA_64(%r0) # save r15
873 larl %r15,restart_stack # load restart stack
874 lg %r15,0(%r15)
875 aghi %r15,-SP_SIZE # make room for pt_regs
876 stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack
877 mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack
878 mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw
879 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0
880 brasl %r14,do_restart
881
882 larl %r14,restart_psw_crash # load disabled wait PSW if
883 lpswe 0(%r14) # do_restart returns
884 .align 8
885restart_psw_crash:
886 .quad 0x0002000080000000,0x0000000000000000 + restart_psw_crash
887
868 .section .kprobes.text, "ax" 888 .section .kprobes.text, "ax"
869 889
870#ifdef CONFIG_CHECK_STACK 890#ifdef CONFIG_CHECK_STACK
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index a689070be28..04361d5a427 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -45,11 +45,13 @@
45 * - halt 45 * - halt
46 * - power off 46 * - power off
47 * - reipl 47 * - reipl
48 * - restart
48 */ 49 */
49#define ON_PANIC_STR "on_panic" 50#define ON_PANIC_STR "on_panic"
50#define ON_HALT_STR "on_halt" 51#define ON_HALT_STR "on_halt"
51#define ON_POFF_STR "on_poff" 52#define ON_POFF_STR "on_poff"
52#define ON_REIPL_STR "on_reboot" 53#define ON_REIPL_STR "on_reboot"
54#define ON_RESTART_STR "on_restart"
53 55
54struct shutdown_action; 56struct shutdown_action;
55struct shutdown_trigger { 57struct shutdown_trigger {
@@ -1544,17 +1546,20 @@ static char vmcmd_on_reboot[128];
1544static char vmcmd_on_panic[128]; 1546static char vmcmd_on_panic[128];
1545static char vmcmd_on_halt[128]; 1547static char vmcmd_on_halt[128];
1546static char vmcmd_on_poff[128]; 1548static char vmcmd_on_poff[128];
1549static char vmcmd_on_restart[128];
1547 1550
1548DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); 1551DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
1549DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); 1552DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
1550DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); 1553DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
1551DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); 1554DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
1555DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
1552 1556
1553static struct attribute *vmcmd_attrs[] = { 1557static struct attribute *vmcmd_attrs[] = {
1554 &sys_vmcmd_on_reboot_attr.attr, 1558 &sys_vmcmd_on_reboot_attr.attr,
1555 &sys_vmcmd_on_panic_attr.attr, 1559 &sys_vmcmd_on_panic_attr.attr,
1556 &sys_vmcmd_on_halt_attr.attr, 1560 &sys_vmcmd_on_halt_attr.attr,
1557 &sys_vmcmd_on_poff_attr.attr, 1561 &sys_vmcmd_on_poff_attr.attr,
1562 &sys_vmcmd_on_restart_attr.attr,
1558 NULL, 1563 NULL,
1559}; 1564};
1560 1565
@@ -1576,6 +1581,8 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
1576 cmd = vmcmd_on_halt; 1581 cmd = vmcmd_on_halt;
1577 else if (strcmp(trigger->name, ON_POFF_STR) == 0) 1582 else if (strcmp(trigger->name, ON_POFF_STR) == 0)
1578 cmd = vmcmd_on_poff; 1583 cmd = vmcmd_on_poff;
1584 else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
1585 cmd = vmcmd_on_restart;
1579 else 1586 else
1580 return; 1587 return;
1581 1588
@@ -1707,6 +1714,34 @@ static void do_panic(void)
1707 stop_run(&on_panic_trigger); 1714 stop_run(&on_panic_trigger);
1708} 1715}
1709 1716
1717/* on restart */
1718
1719static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
1720 &reipl_action};
1721
1722static ssize_t on_restart_show(struct kobject *kobj,
1723 struct kobj_attribute *attr, char *page)
1724{
1725 return sprintf(page, "%s\n", on_restart_trigger.action->name);
1726}
1727
1728static ssize_t on_restart_store(struct kobject *kobj,
1729 struct kobj_attribute *attr,
1730 const char *buf, size_t len)
1731{
1732 return set_trigger(buf, &on_restart_trigger, len);
1733}
1734
1735static struct kobj_attribute on_restart_attr =
1736 __ATTR(on_restart, 0644, on_restart_show, on_restart_store);
1737
1738void do_restart(void)
1739{
1740 smp_send_stop();
1741 on_restart_trigger.action->fn(&on_restart_trigger);
1742 stop_run(&on_restart_trigger);
1743}
1744
1710/* on halt */ 1745/* on halt */
1711 1746
1712static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; 1747static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
@@ -1783,7 +1818,9 @@ static void __init shutdown_triggers_init(void)
1783 if (sysfs_create_file(&shutdown_actions_kset->kobj, 1818 if (sysfs_create_file(&shutdown_actions_kset->kobj,
1784 &on_poff_attr.attr)) 1819 &on_poff_attr.attr))
1785 goto fail; 1820 goto fail;
1786 1821 if (sysfs_create_file(&shutdown_actions_kset->kobj,
1822 &on_restart_attr.attr))
1823 goto fail;
1787 return; 1824 return;
1788fail: 1825fail:
1789 panic("shutdown_triggers_init failed\n"); 1826 panic("shutdown_triggers_init failed\n");
@@ -1959,6 +1996,12 @@ static void do_reset_calls(void)
1959{ 1996{
1960 struct reset_call *reset; 1997 struct reset_call *reset;
1961 1998
1999#ifdef CONFIG_64BIT
2000 if (diag308_set_works) {
2001 diag308_reset();
2002 return;
2003 }
2004#endif
1962 list_for_each_entry(reset, &rcall, list) 2005 list_for_each_entry(reset, &rcall, list)
1963 reset->fn(); 2006 reset->fn();
1964} 2007}
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
index 78eb7cfbd3d..e690975403f 100644
--- a/arch/s390/kernel/reipl64.S
+++ b/arch/s390/kernel/reipl64.S
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright IBM Corp 2000,2009 2 * Copyright IBM Corp 2000,2011
3 * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, 3 * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
4 * Denis Joseph Barrow, 4 * Denis Joseph Barrow,
5 */ 5 */
@@ -8,6 +8,64 @@
8#include <asm/asm-offsets.h> 8#include <asm/asm-offsets.h>
9 9
10# 10#
11# store_status
12#
13# Prerequisites to run this function:
14# - Prefix register is set to zero
15# - Original prefix register is stored in "dump_prefix_page"
16# - Lowcore protection is off
17#
18ENTRY(store_status)
19 /* Save register one and load save area base */
20 stg %r1,__LC_SAVE_AREA_64(%r0)
21 lghi %r1,SAVE_AREA_BASE
22 /* General purpose registers */
23 stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
24 lg %r2,__LC_SAVE_AREA_64(%r0)
25 stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
26 /* Control registers */
27 stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
28 /* Access registers */
29 stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
30 /* Floating point registers */
31 std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
32 std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
33 std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
34 std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
35 std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
36 std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
37 std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
38 std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
39 std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
40 std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
41 std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
42 std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
43 std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
44 std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
45 std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
46 std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
47 /* Floating point control register */
48 stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
49 /* CPU timer */
50 stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
51 /* Saved prefix register */
52 larl %r2,dump_prefix_page
53 mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
54 /* Clock comparator - seven bytes */
55 larl %r2,.Lclkcmp
56 stckc 0(%r2)
57 mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
58 /* Program status word */
59 epsw %r2,%r3
60 st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
61 st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
62 larl %r2,store_status
63 stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
64 br %r14
65.align 8
66.Lclkcmp: .quad 0x0000000000000000
67
68#
11# do_reipl_asm 69# do_reipl_asm
12# Parameter: r2 = schid of reipl device 70# Parameter: r2 = schid of reipl device
13# 71#
@@ -15,22 +73,7 @@
15ENTRY(do_reipl_asm) 73ENTRY(do_reipl_asm)
16 basr %r13,0 74 basr %r13,0
17.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) 75.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13)
18.Lpg1: # do store status of all registers 76.Lpg1: brasl %r14,store_status
19
20 stg %r1,.Lregsave-.Lpg0(%r13)
21 lghi %r1,0x1000
22 stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1)
23 lg %r0,.Lregsave-.Lpg0(%r13)
24 stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1)
25 stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1)
26 stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1)
27 lg %r10,.Ldump_pfx-.Lpg0(%r13)
28 mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10)
29 stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1)
30 stckc .Lclkcmp-.Lpg0(%r13)
31 mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13)
32 stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1)
33 stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1)
34 77
35 lctlg %c6,%c6,.Lall-.Lpg0(%r13) 78 lctlg %c6,%c6,.Lall-.Lpg0(%r13)
36 lgr %r1,%r2 79 lgr %r1,%r2
@@ -67,10 +110,7 @@ ENTRY(do_reipl_asm)
67 st %r14,.Ldispsw+12-.Lpg0(%r13) 110 st %r14,.Ldispsw+12-.Lpg0(%r13)
68 lpswe .Ldispsw-.Lpg0(%r13) 111 lpswe .Ldispsw-.Lpg0(%r13)
69 .align 8 112 .align 8
70.Lclkcmp: .quad 0x0000000000000000
71.Lall: .quad 0x00000000ff000000 113.Lall: .quad 0x00000000ff000000
72.Ldump_pfx: .quad dump_prefix_page
73.Lregsave: .quad 0x0000000000000000
74 .align 16 114 .align 16
75/* 115/*
76 * These addresses have to be 31 bit otherwise 116 * These addresses have to be 31 bit otherwise
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 0c35dee10b0..7b371c37061 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -346,7 +346,7 @@ setup_lowcore(void)
346 lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); 346 lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
347 lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; 347 lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
348 lc->restart_psw.addr = 348 lc->restart_psw.addr =
349 PSW_ADDR_AMODE | (unsigned long) restart_int_handler; 349 PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
350 if (user_mode != HOME_SPACE_MODE) 350 if (user_mode != HOME_SPACE_MODE)
351 lc->restart_psw.mask |= PSW_ASC_HOME; 351 lc->restart_psw.mask |= PSW_ASC_HOME;
352 lc->external_new_psw.mask = psw_kernel_bits; 352 lc->external_new_psw.mask = psw_kernel_bits;
@@ -529,6 +529,27 @@ static void __init setup_memory_end(void)
529 memory_end = memory_size; 529 memory_end = memory_size;
530} 530}
531 531
532void *restart_stack __attribute__((__section__(".data")));
533
534/*
535 * Setup new PSW and allocate stack for PSW restart interrupt
536 */
537static void __init setup_restart_psw(void)
538{
539 psw_t psw;
540
541 restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
542 restart_stack += ASYNC_SIZE;
543
544 /*
545 * Setup restart PSW for absolute zero lowcore. This is necesary
546 * if PSW restart is done on an offline CPU that has lowcore zero
547 */
548 psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
549 psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
550 copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw));
551}
552
532static void __init 553static void __init
533setup_memory(void) 554setup_memory(void)
534{ 555{
@@ -731,6 +752,7 @@ static void __init setup_hwcaps(void)
731 strcpy(elf_platform, "z10"); 752 strcpy(elf_platform, "z10");
732 break; 753 break;
733 case 0x2817: 754 case 0x2817:
755 case 0x2818:
734 strcpy(elf_platform, "z196"); 756 strcpy(elf_platform, "z196");
735 break; 757 break;
736 } 758 }
@@ -792,6 +814,7 @@ setup_arch(char **cmdline_p)
792 setup_addressing_mode(); 814 setup_addressing_mode();
793 setup_memory(); 815 setup_memory();
794 setup_resources(); 816 setup_resources();
817 setup_restart_psw();
795 setup_lowcore(); 818 setup_lowcore();
796 819
797 cpu_init(); 820 cpu_init();
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index abbb3c3c7aa..9a40e1cc5ec 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -57,17 +57,15 @@ typedef struct
57 */ 57 */
58SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) 58SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask)
59{ 59{
60 mask &= _BLOCKABLE; 60 sigset_t blocked;
61 spin_lock_irq(&current->sighand->siglock);
62 current->saved_sigmask = current->blocked;
63 siginitset(&current->blocked, mask);
64 recalc_sigpending();
65 spin_unlock_irq(&current->sighand->siglock);
66 61
62 current->saved_sigmask = current->blocked;
63 mask &= _BLOCKABLE;
64 siginitset(&blocked, mask);
65 set_current_blocked(&blocked);
67 set_current_state(TASK_INTERRUPTIBLE); 66 set_current_state(TASK_INTERRUPTIBLE);
68 schedule(); 67 schedule();
69 set_thread_flag(TIF_RESTORE_SIGMASK); 68 set_restore_sigmask();
70
71 return -ERESTARTNOHAND; 69 return -ERESTARTNOHAND;
72} 70}
73 71
@@ -172,18 +170,11 @@ SYSCALL_DEFINE0(sigreturn)
172 goto badframe; 170 goto badframe;
173 if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) 171 if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
174 goto badframe; 172 goto badframe;
175
176 sigdelsetmask(&set, ~_BLOCKABLE); 173 sigdelsetmask(&set, ~_BLOCKABLE);
177 spin_lock_irq(&current->sighand->siglock); 174 set_current_blocked(&set);
178 current->blocked = set;
179 recalc_sigpending();
180 spin_unlock_irq(&current->sighand->siglock);
181
182 if (restore_sigregs(regs, &frame->sregs)) 175 if (restore_sigregs(regs, &frame->sregs))
183 goto badframe; 176 goto badframe;
184
185 return regs->gprs[2]; 177 return regs->gprs[2];
186
187badframe: 178badframe:
188 force_sig(SIGSEGV, current); 179 force_sig(SIGSEGV, current);
189 return 0; 180 return 0;
@@ -199,21 +190,14 @@ SYSCALL_DEFINE0(rt_sigreturn)
199 goto badframe; 190 goto badframe;
200 if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) 191 if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
201 goto badframe; 192 goto badframe;
202
203 sigdelsetmask(&set, ~_BLOCKABLE); 193 sigdelsetmask(&set, ~_BLOCKABLE);
204 spin_lock_irq(&current->sighand->siglock); 194 set_current_blocked(&set);
205 current->blocked = set;
206 recalc_sigpending();
207 spin_unlock_irq(&current->sighand->siglock);
208
209 if (restore_sigregs(regs, &frame->uc.uc_mcontext)) 195 if (restore_sigregs(regs, &frame->uc.uc_mcontext))
210 goto badframe; 196 goto badframe;
211
212 if (do_sigaltstack(&frame->uc.uc_stack, NULL, 197 if (do_sigaltstack(&frame->uc.uc_stack, NULL,
213 regs->gprs[15]) == -EFAULT) 198 regs->gprs[15]) == -EFAULT)
214 goto badframe; 199 goto badframe;
215 return regs->gprs[2]; 200 return regs->gprs[2];
216
217badframe: 201badframe:
218 force_sig(SIGSEGV, current); 202 force_sig(SIGSEGV, current);
219 return 0; 203 return 0;
@@ -385,14 +369,11 @@ give_sigsegv:
385 return -EFAULT; 369 return -EFAULT;
386} 370}
387 371
388/* 372static int handle_signal(unsigned long sig, struct k_sigaction *ka,
389 * OK, we're invoking a handler 373 siginfo_t *info, sigset_t *oldset,
390 */ 374 struct pt_regs *regs)
391
392static int
393handle_signal(unsigned long sig, struct k_sigaction *ka,
394 siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
395{ 375{
376 sigset_t blocked;
396 int ret; 377 int ret;
397 378
398 /* Set up the stack frame */ 379 /* Set up the stack frame */
@@ -400,17 +381,13 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
400 ret = setup_rt_frame(sig, ka, info, oldset, regs); 381 ret = setup_rt_frame(sig, ka, info, oldset, regs);
401 else 382 else
402 ret = setup_frame(sig, ka, oldset, regs); 383 ret = setup_frame(sig, ka, oldset, regs);
403 384 if (ret)
404 if (ret == 0) { 385 return ret;
405 spin_lock_irq(&current->sighand->siglock); 386 sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
406 sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); 387 if (!(ka->sa.sa_flags & SA_NODEFER))
407 if (!(ka->sa.sa_flags & SA_NODEFER)) 388 sigaddset(&blocked, sig);
408 sigaddset(&current->blocked,sig); 389 set_current_blocked(&blocked);
409 recalc_sigpending(); 390 return 0;
410 spin_unlock_irq(&current->sighand->siglock);
411 }
412
413 return ret;
414} 391}
415 392
416/* 393/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a6d85c0a7f2..6ab16ac64d2 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -452,23 +452,27 @@ out:
452 */ 452 */
453int __cpuinit start_secondary(void *cpuvoid) 453int __cpuinit start_secondary(void *cpuvoid)
454{ 454{
455 /* Setup the cpu */
456 cpu_init(); 455 cpu_init();
457 preempt_disable(); 456 preempt_disable();
458 /* Enable TOD clock interrupts on the secondary cpu. */
459 init_cpu_timer(); 457 init_cpu_timer();
460 /* Enable cpu timer interrupts on the secondary cpu. */
461 init_cpu_vtimer(); 458 init_cpu_vtimer();
462 /* Enable pfault pseudo page faults on this cpu. */
463 pfault_init(); 459 pfault_init();
464 460
465 /* call cpu notifiers */
466 notify_cpu_starting(smp_processor_id()); 461 notify_cpu_starting(smp_processor_id());
467 /* Mark this cpu as online */
468 ipi_call_lock(); 462 ipi_call_lock();
469 set_cpu_online(smp_processor_id(), true); 463 set_cpu_online(smp_processor_id(), true);
470 ipi_call_unlock(); 464 ipi_call_unlock();
471 /* Switch on interrupts */ 465 __ctl_clear_bit(0, 28); /* Disable lowcore protection */
466 S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
467 S390_lowcore.restart_psw.addr =
468 PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
469 __ctl_set_bit(0, 28); /* Enable lowcore protection */
470 /*
471 * Wait until the cpu which brought this one up marked it
472 * active before enabling interrupts.
473 */
474 while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
475 cpu_relax();
472 local_irq_enable(); 476 local_irq_enable();
473 /* cpu_idle will call schedule for us */ 477 /* cpu_idle will call schedule for us */
474 cpu_idle(); 478 cpu_idle();
@@ -507,7 +511,11 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
507 memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); 511 memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
508 lowcore->async_stack = async_stack + ASYNC_SIZE; 512 lowcore->async_stack = async_stack + ASYNC_SIZE;
509 lowcore->panic_stack = panic_stack + PAGE_SIZE; 513 lowcore->panic_stack = panic_stack + PAGE_SIZE;
510 514 lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
515 lowcore->restart_psw.addr =
516 PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
517 if (user_mode != HOME_SPACE_MODE)
518 lowcore->restart_psw.mask |= PSW_ASC_HOME;
511#ifndef CONFIG_64BIT 519#ifndef CONFIG_64BIT
512 if (MACHINE_HAS_IEEE) { 520 if (MACHINE_HAS_IEEE) {
513 unsigned long save_area; 521 unsigned long save_area;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 51e5cd9b906..5dbbaa6e594 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -85,3 +85,19 @@ int memcpy_real(void *dest, void *src, size_t count)
85 arch_local_irq_restore(flags); 85 arch_local_irq_restore(flags);
86 return rc; 86 return rc;
87} 87}
88
89/*
90 * Copy memory to absolute zero
91 */
92void copy_to_absolute_zero(void *dest, void *src, size_t count)
93{
94 unsigned long cr0;
95
96 BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore));
97 preempt_disable();
98 __ctl_store(cr0, 0, 0);
99 __ctl_clear_bit(0, 28); /* disable lowcore protection */
100 memcpy_real(dest + store_prefix(), src, count);
101 __ctl_load(cr0, 0, 0);
102 preempt_enable();
103}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 2adb23938a7..4d1f2bce87b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -528,6 +528,7 @@ static inline void page_table_free_pgste(unsigned long *table)
528static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 528static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
529 unsigned long vmaddr) 529 unsigned long vmaddr)
530{ 530{
531 return NULL;
531} 532}
532 533
533static inline void page_table_free_pgste(unsigned long *table) 534static inline void page_table_free_pgste(unsigned long *table)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 748ff192006..ff9177c8f64 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,6 +11,7 @@ config SUPERH
11 select HAVE_DMA_ATTRS 11 select HAVE_DMA_ATTRS
12 select HAVE_IRQ_WORK 12 select HAVE_IRQ_WORK
13 select HAVE_PERF_EVENTS 13 select HAVE_PERF_EVENTS
14 select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
14 select PERF_USE_VMALLOC 15 select PERF_USE_VMALLOC
15 select HAVE_KERNEL_GZIP 16 select HAVE_KERNEL_GZIP
16 select HAVE_KERNEL_BZIP2 17 select HAVE_KERNEL_BZIP2
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 84db0d6ccd0..32114e0941a 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -16,12 +16,13 @@
16#include <linux/thread_info.h> 16#include <linux/thread_info.h>
17#include <linux/irqflags.h> 17#include <linux/irqflags.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/cpuidle.h>
19#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
20#include <asm/system.h> 21#include <asm/system.h>
21#include <linux/atomic.h> 22#include <linux/atomic.h>
22#include <asm/smp.h> 23#include <asm/smp.h>
23 24
24void (*pm_idle)(void) = NULL; 25static void (*pm_idle)(void);
25 26
26static int hlt_counter; 27static int hlt_counter;
27 28
@@ -100,7 +101,8 @@ void cpu_idle(void)
100 local_irq_disable(); 101 local_irq_disable();
101 /* Don't trace irqs off for idle */ 102 /* Don't trace irqs off for idle */
102 stop_critical_timings(); 103 stop_critical_timings();
103 pm_idle(); 104 if (cpuidle_idle_call())
105 pm_idle();
104 /* 106 /*
105 * Sanity check to ensure that pm_idle() returns 107 * Sanity check to ensure that pm_idle() returns
106 * with IRQs enabled 108 * with IRQs enabled
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 1074dddcb10..42c67beadca 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -54,6 +54,7 @@ config SPARC64
54 select HAVE_PERF_EVENTS 54 select HAVE_PERF_EVENTS
55 select PERF_USE_VMALLOC 55 select PERF_USE_VMALLOC
56 select IRQ_PREFLOW_FASTEOI 56 select IRQ_PREFLOW_FASTEOI
57 select ARCH_HAVE_NMI_SAFE_CMPXCHG
57 58
58config ARCH_DEFCONFIG 59config ARCH_DEFCONFIG
59 string 60 string
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 3c93f08ce18..2c2e38821f6 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -16,3 +16,8 @@ header-y += traps.h
16header-y += uctx.h 16header-y += uctx.h
17header-y += utrap.h 17header-y += utrap.h
18header-y += watchdog.h 18header-y += watchdog.h
19
20generic-y += div64.h
21generic-y += local64.h
22generic-y += irq_regs.h
23generic-y += local.h
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index 325e295d60d..29011cc0e4b 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -26,61 +26,28 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
26#define smp_mb__before_clear_bit() barrier() 26#define smp_mb__before_clear_bit() barrier()
27#define smp_mb__after_clear_bit() barrier() 27#define smp_mb__after_clear_bit() barrier()
28 28
29#include <asm-generic/bitops/ffz.h>
30#include <asm-generic/bitops/__ffs.h>
31#include <asm-generic/bitops/fls.h> 29#include <asm-generic/bitops/fls.h>
32#include <asm-generic/bitops/__fls.h> 30#include <asm-generic/bitops/__fls.h>
33#include <asm-generic/bitops/fls64.h> 31#include <asm-generic/bitops/fls64.h>
34 32
35#ifdef __KERNEL__ 33#ifdef __KERNEL__
36 34
35extern int ffs(int x);
36extern unsigned long __ffs(unsigned long);
37
38#include <asm-generic/bitops/ffz.h>
37#include <asm-generic/bitops/sched.h> 39#include <asm-generic/bitops/sched.h>
38#include <asm-generic/bitops/ffs.h>
39 40
40/* 41/*
41 * hweightN: returns the hamming weight (i.e. the number 42 * hweightN: returns the hamming weight (i.e. the number
42 * of bits set) of a N-bit word 43 * of bits set) of a N-bit word
43 */ 44 */
44 45
45#ifdef ULTRA_HAS_POPULATION_COUNT 46extern unsigned long __arch_hweight64(__u64 w);
46 47extern unsigned int __arch_hweight32(unsigned int w);
47static inline unsigned int __arch_hweight64(unsigned long w) 48extern unsigned int __arch_hweight16(unsigned int w);
48{ 49extern unsigned int __arch_hweight8(unsigned int w);
49 unsigned int res;
50
51 __asm__ ("popc %1,%0" : "=r" (res) : "r" (w));
52 return res;
53}
54
55static inline unsigned int __arch_hweight32(unsigned int w)
56{
57 unsigned int res;
58
59 __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffffffff));
60 return res;
61}
62 50
63static inline unsigned int __arch_hweight16(unsigned int w)
64{
65 unsigned int res;
66
67 __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xffff));
68 return res;
69}
70
71static inline unsigned int __arch_hweight8(unsigned int w)
72{
73 unsigned int res;
74
75 __asm__ ("popc %1,%0" : "=r" (res) : "r" (w & 0xff));
76 return res;
77}
78
79#else
80
81#include <asm-generic/bitops/arch_hweight.h>
82
83#endif
84#include <asm-generic/bitops/const_hweight.h> 51#include <asm-generic/bitops/const_hweight.h>
85#include <asm-generic/bitops/lock.h> 52#include <asm-generic/bitops/lock.h>
86#endif /* __KERNEL__ */ 53#endif /* __KERNEL__ */
diff --git a/arch/sparc/include/asm/div64.h b/arch/sparc/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb2..00000000000
--- a/arch/sparc/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/div64.h>
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 64f7a00b374..7df8b7f544d 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -59,15 +59,33 @@
59#define R_SPARC_6 45 59#define R_SPARC_6 45
60 60
61/* Bits present in AT_HWCAP, primarily for Sparc32. */ 61/* Bits present in AT_HWCAP, primarily for Sparc32. */
62 62#define HWCAP_SPARC_FLUSH 0x00000001
63#define HWCAP_SPARC_FLUSH 1 /* CPU supports flush instruction. */ 63#define HWCAP_SPARC_STBAR 0x00000002
64#define HWCAP_SPARC_STBAR 2 64#define HWCAP_SPARC_SWAP 0x00000004
65#define HWCAP_SPARC_SWAP 4 65#define HWCAP_SPARC_MULDIV 0x00000008
66#define HWCAP_SPARC_MULDIV 8 66#define HWCAP_SPARC_V9 0x00000010
67#define HWCAP_SPARC_V9 16 67#define HWCAP_SPARC_ULTRA3 0x00000020
68#define HWCAP_SPARC_ULTRA3 32 68#define HWCAP_SPARC_BLKINIT 0x00000040
69#define HWCAP_SPARC_BLKINIT 64 69#define HWCAP_SPARC_N2 0x00000080
70#define HWCAP_SPARC_N2 128 70
71/* Solaris compatible AT_HWCAP bits. */
72#define AV_SPARC_MUL32 0x00000100 /* 32x32 multiply is efficient */
73#define AV_SPARC_DIV32 0x00000200 /* 32x32 divide is efficient */
74#define AV_SPARC_FSMULD 0x00000400 /* 'fsmuld' is efficient */
75#define AV_SPARC_V8PLUS 0x00000800 /* v9 insn available to 32bit */
76#define AV_SPARC_POPC 0x00001000 /* 'popc' is efficient */
77#define AV_SPARC_VIS 0x00002000 /* VIS insns available */
78#define AV_SPARC_VIS2 0x00004000 /* VIS2 insns available */
79#define AV_SPARC_ASI_BLK_INIT 0x00008000 /* block init ASIs available */
80#define AV_SPARC_FMAF 0x00010000 /* fused multiply-add */
81#define AV_SPARC_VIS3 0x00020000 /* VIS3 insns available */
82#define AV_SPARC_HPC 0x00040000 /* HPC insns available */
83#define AV_SPARC_RANDOM 0x00080000 /* 'random' insn available */
84#define AV_SPARC_TRANS 0x00100000 /* transaction insns available */
85#define AV_SPARC_FJFMAU 0x00200000 /* unfused multiply-add */
86#define AV_SPARC_IMA 0x00400000 /* integer multiply-add */
87#define AV_SPARC_ASI_CACHE_SPARING \
88 0x00800000 /* cache sparing ASIs available */
71 89
72#define CORE_DUMP_USE_REGSET 90#define CORE_DUMP_USE_REGSET
73 91
@@ -162,33 +180,8 @@ typedef struct {
162#define ELF_ET_DYN_BASE 0x0000010000000000UL 180#define ELF_ET_DYN_BASE 0x0000010000000000UL
163#define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL 181#define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL
164 182
165 183extern unsigned long sparc64_elf_hwcap;
166/* This yields a mask that user programs can use to figure out what 184#define ELF_HWCAP sparc64_elf_hwcap
167 instruction set this cpu supports. */
168
169/* On Ultra, we support all of the v8 capabilities. */
170static inline unsigned int sparc64_elf_hwcap(void)
171{
172 unsigned int cap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
173 HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
174 HWCAP_SPARC_V9);
175
176 if (tlb_type == cheetah || tlb_type == cheetah_plus)
177 cap |= HWCAP_SPARC_ULTRA3;
178 else if (tlb_type == hypervisor) {
179 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
180 sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
181 sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
182 cap |= HWCAP_SPARC_BLKINIT;
183 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
184 sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
185 cap |= HWCAP_SPARC_N2;
186 }
187
188 return cap;
189}
190
191#define ELF_HWCAP sparc64_elf_hwcap()
192 185
193/* This yields a string that ld.so will use to load implementation 186/* This yields a string that ld.so will use to load implementation
194 specific libraries for optimization. This is more specific in 187 specific libraries for optimization. This is more specific in
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 7a5f80df15d..015a761eaa3 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2927,6 +2927,13 @@ extern unsigned long sun4v_ncs_request(unsigned long request,
2927#define HV_FAST_FIRE_GET_PERFREG 0x120 2927#define HV_FAST_FIRE_GET_PERFREG 0x120
2928#define HV_FAST_FIRE_SET_PERFREG 0x121 2928#define HV_FAST_FIRE_SET_PERFREG 0x121
2929 2929
2930#define HV_FAST_REBOOT_DATA_SET 0x172
2931
2932#ifndef __ASSEMBLY__
2933extern unsigned long sun4v_reboot_data_set(unsigned long ra,
2934 unsigned long len);
2935#endif
2936
2930/* Function numbers for HV_CORE_TRAP. */ 2937/* Function numbers for HV_CORE_TRAP. */
2931#define HV_CORE_SET_VER 0x00 2938#define HV_CORE_SET_VER 0x00
2932#define HV_CORE_PUTCHAR 0x01 2939#define HV_CORE_PUTCHAR 0x01
@@ -2940,11 +2947,17 @@ extern unsigned long sun4v_ncs_request(unsigned long request,
2940#define HV_GRP_CORE 0x0001 2947#define HV_GRP_CORE 0x0001
2941#define HV_GRP_INTR 0x0002 2948#define HV_GRP_INTR 0x0002
2942#define HV_GRP_SOFT_STATE 0x0003 2949#define HV_GRP_SOFT_STATE 0x0003
2950#define HV_GRP_TM 0x0080
2943#define HV_GRP_PCI 0x0100 2951#define HV_GRP_PCI 0x0100
2944#define HV_GRP_LDOM 0x0101 2952#define HV_GRP_LDOM 0x0101
2945#define HV_GRP_SVC_CHAN 0x0102 2953#define HV_GRP_SVC_CHAN 0x0102
2946#define HV_GRP_NCS 0x0103 2954#define HV_GRP_NCS 0x0103
2947#define HV_GRP_RNG 0x0104 2955#define HV_GRP_RNG 0x0104
2956#define HV_GRP_PBOOT 0x0105
2957#define HV_GRP_TPM 0x0107
2958#define HV_GRP_SDIO 0x0108
2959#define HV_GRP_SDIO_ERR 0x0109
2960#define HV_GRP_REBOOT_DATA 0x0110
2948#define HV_GRP_NIAG_PERF 0x0200 2961#define HV_GRP_NIAG_PERF 0x0200
2949#define HV_GRP_FIRE_PERF 0x0201 2962#define HV_GRP_FIRE_PERF 0x0201
2950#define HV_GRP_N2_CPU 0x0202 2963#define HV_GRP_N2_CPU 0x0202
diff --git a/arch/sparc/include/asm/irq_regs.h b/arch/sparc/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b7027..00000000000
--- a/arch/sparc/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/sparc/include/asm/local.h b/arch/sparc/include/asm/local.h
deleted file mode 100644
index bc80815a435..00000000000
--- a/arch/sparc/include/asm/local.h
+++ /dev/null
@@ -1,6 +0,0 @@
1#ifndef _SPARC_LOCAL_H
2#define _SPARC_LOCAL_H
3
4#include <asm-generic/local.h>
5
6#endif
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc23..00000000000
--- a/arch/sparc/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 83c571d8c8a..1a8afd1ad04 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -133,29 +133,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
133 sub TSB, 0x8, TSB; \ 133 sub TSB, 0x8, TSB; \
134 TSB_STORE(TSB, TAG); 134 TSB_STORE(TSB, TAG);
135 135
136#define KTSB_LOAD_QUAD(TSB, REG) \
137 ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG;
138
139#define KTSB_STORE(ADDR, VAL) \
140 stxa VAL, [ADDR] ASI_N;
141
142#define KTSB_LOCK_TAG(TSB, REG1, REG2) \
14399: lduwa [TSB] ASI_N, REG1; \
144 sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\
145 andcc REG1, REG2, %g0; \
146 bne,pn %icc, 99b; \
147 nop; \
148 casa [TSB] ASI_N, REG1, REG2;\
149 cmp REG1, REG2; \
150 bne,pn %icc, 99b; \
151 nop; \
152
153#define KTSB_WRITE(TSB, TTE, TAG) \
154 add TSB, 0x8, TSB; \
155 stxa TTE, [TSB] ASI_N; \
156 sub TSB, 0x8, TSB; \
157 stxa TAG, [TSB] ASI_N;
158
159 /* Do a kernel page table walk. Leaves physical PTE pointer in 136 /* Do a kernel page table walk. Leaves physical PTE pointer in
160 * REG1. Jumps to FAIL_LABEL on early page table walk termination. 137 * REG1. Jumps to FAIL_LABEL on early page table walk termination.
161 * VADDR will not be clobbered, but REG2 will. 138 * VADDR will not be clobbered, but REG2 will.
@@ -239,6 +216,8 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
239 (KERNEL_TSB_SIZE_BYTES / 16) 216 (KERNEL_TSB_SIZE_BYTES / 16)
240#define KERNEL_TSB4M_NENTRIES 4096 217#define KERNEL_TSB4M_NENTRIES 4096
241 218
219#define KTSB_PHYS_SHIFT 15
220
242 /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL 221 /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
243 * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries 222 * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
244 * and the found TTE will be left in REG1. REG3 and REG4 must 223 * and the found TTE will be left in REG1. REG3 and REG4 must
@@ -247,13 +226,22 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
247 * VADDR and TAG will be preserved and not clobbered by this macro. 226 * VADDR and TAG will be preserved and not clobbered by this macro.
248 */ 227 */
249#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ 228#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
250 sethi %hi(swapper_tsb), REG1; \ 229661: sethi %hi(swapper_tsb), REG1; \
251 or REG1, %lo(swapper_tsb), REG1; \ 230 or REG1, %lo(swapper_tsb), REG1; \
231 .section .swapper_tsb_phys_patch, "ax"; \
232 .word 661b; \
233 .previous; \
234661: nop; \
235 .section .tsb_ldquad_phys_patch, "ax"; \
236 .word 661b; \
237 sllx REG1, KTSB_PHYS_SHIFT, REG1; \
238 sllx REG1, KTSB_PHYS_SHIFT, REG1; \
239 .previous; \
252 srlx VADDR, PAGE_SHIFT, REG2; \ 240 srlx VADDR, PAGE_SHIFT, REG2; \
253 and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ 241 and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
254 sllx REG2, 4, REG2; \ 242 sllx REG2, 4, REG2; \
255 add REG1, REG2, REG2; \ 243 add REG1, REG2, REG2; \
256 KTSB_LOAD_QUAD(REG2, REG3); \ 244 TSB_LOAD_QUAD(REG2, REG3); \
257 cmp REG3, TAG; \ 245 cmp REG3, TAG; \
258 be,a,pt %xcc, OK_LABEL; \ 246 be,a,pt %xcc, OK_LABEL; \
259 mov REG4, REG1; 247 mov REG4, REG1;
@@ -263,12 +251,21 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
263 * we can make use of that for the index computation. 251 * we can make use of that for the index computation.
264 */ 252 */
265#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ 253#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
266 sethi %hi(swapper_4m_tsb), REG1; \ 254661: sethi %hi(swapper_4m_tsb), REG1; \
267 or REG1, %lo(swapper_4m_tsb), REG1; \ 255 or REG1, %lo(swapper_4m_tsb), REG1; \
256 .section .swapper_4m_tsb_phys_patch, "ax"; \
257 .word 661b; \
258 .previous; \
259661: nop; \
260 .section .tsb_ldquad_phys_patch, "ax"; \
261 .word 661b; \
262 sllx REG1, KTSB_PHYS_SHIFT, REG1; \
263 sllx REG1, KTSB_PHYS_SHIFT, REG1; \
264 .previous; \
268 and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ 265 and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
269 sllx REG2, 4, REG2; \ 266 sllx REG2, 4, REG2; \
270 add REG1, REG2, REG2; \ 267 add REG1, REG2, REG2; \
271 KTSB_LOAD_QUAD(REG2, REG3); \ 268 TSB_LOAD_QUAD(REG2, REG3); \
272 cmp REG3, TAG; \ 269 cmp REG3, TAG; \
273 be,a,pt %xcc, OK_LABEL; \ 270 be,a,pt %xcc, OK_LABEL; \
274 mov REG4, REG1; 271 mov REG4, REG1;
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 17cf290dc2b..9810fd88105 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -396,6 +396,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
396 , cpu_data(0).clock_tick 396 , cpu_data(0).clock_tick
397#endif 397#endif
398 ); 398 );
399 cpucap_info(m);
399#ifdef CONFIG_SMP 400#ifdef CONFIG_SMP
400 smp_bogo(m); 401 smp_bogo(m);
401#endif 402#endif
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index dd1342c0a3b..490e5418740 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -15,12 +15,15 @@
15#include <linux/reboot.h> 15#include <linux/reboot.h>
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17 17
18#include <asm/hypervisor.h>
18#include <asm/ldc.h> 19#include <asm/ldc.h>
19#include <asm/vio.h> 20#include <asm/vio.h>
20#include <asm/mdesc.h> 21#include <asm/mdesc.h>
21#include <asm/head.h> 22#include <asm/head.h>
22#include <asm/irq.h> 23#include <asm/irq.h>
23 24
25#include "kernel.h"
26
24#define DRV_MODULE_NAME "ds" 27#define DRV_MODULE_NAME "ds"
25#define PFX DRV_MODULE_NAME ": " 28#define PFX DRV_MODULE_NAME ": "
26#define DRV_MODULE_VERSION "1.0" 29#define DRV_MODULE_VERSION "1.0"
@@ -828,18 +831,32 @@ void ldom_set_var(const char *var, const char *value)
828 } 831 }
829} 832}
830 833
834static char full_boot_str[256] __attribute__((aligned(32)));
835static int reboot_data_supported;
836
831void ldom_reboot(const char *boot_command) 837void ldom_reboot(const char *boot_command)
832{ 838{
833 /* Don't bother with any of this if the boot_command 839 /* Don't bother with any of this if the boot_command
834 * is empty. 840 * is empty.
835 */ 841 */
836 if (boot_command && strlen(boot_command)) { 842 if (boot_command && strlen(boot_command)) {
837 char full_boot_str[256]; 843 unsigned long len;
838 844
839 strcpy(full_boot_str, "boot "); 845 strcpy(full_boot_str, "boot ");
840 strcpy(full_boot_str + strlen("boot "), boot_command); 846 strcpy(full_boot_str + strlen("boot "), boot_command);
847 len = strlen(full_boot_str);
841 848
842 ldom_set_var("reboot-command", full_boot_str); 849 if (reboot_data_supported) {
850 unsigned long ra = kimage_addr_to_ra(full_boot_str);
851 unsigned long hv_ret;
852
853 hv_ret = sun4v_reboot_data_set(ra, len);
854 if (hv_ret != HV_EOK)
855 pr_err("SUN4V: Unable to set reboot data "
856 "hv_ret=%lu\n", hv_ret);
857 } else {
858 ldom_set_var("reboot-command", full_boot_str);
859 }
843 } 860 }
844 sun4v_mach_sir(); 861 sun4v_mach_sir();
845} 862}
@@ -1237,6 +1254,15 @@ static struct vio_driver ds_driver = {
1237 1254
1238static int __init ds_init(void) 1255static int __init ds_init(void)
1239{ 1256{
1257 unsigned long hv_ret, major, minor;
1258
1259 hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor);
1260 if (hv_ret == HV_EOK) {
1261 pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n",
1262 major, minor);
1263 reboot_data_supported = 1;
1264 }
1265
1240 kthread_run(ds_thread, NULL, "kldomd"); 1266 kthread_run(ds_thread, NULL, "kldomd");
1241 1267
1242 return vio_register_driver(&ds_driver); 1268 return vio_register_driver(&ds_driver);
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index d1f1361c416..e27f8ea8656 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -42,6 +42,20 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
42extern void fpload(unsigned long *fpregs, unsigned long *fsr); 42extern void fpload(unsigned long *fpregs, unsigned long *fsr);
43 43
44#else /* CONFIG_SPARC32 */ 44#else /* CONFIG_SPARC32 */
45struct popc_3insn_patch_entry {
46 unsigned int addr;
47 unsigned int insns[3];
48};
49extern struct popc_3insn_patch_entry __popc_3insn_patch,
50 __popc_3insn_patch_end;
51
52struct popc_6insn_patch_entry {
53 unsigned int addr;
54 unsigned int insns[6];
55};
56extern struct popc_6insn_patch_entry __popc_6insn_patch,
57 __popc_6insn_patch_end;
58
45extern void __init per_cpu_patch(void); 59extern void __init per_cpu_patch(void);
46extern void __init sun4v_patch(void); 60extern void __init sun4v_patch(void);
47extern void __init boot_cpu_id_too_large(int cpu); 61extern void __init boot_cpu_id_too_large(int cpu);
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index c752603a7c0..0eac1b2fc53 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -559,7 +559,7 @@ niagara2_patch:
559 nop 559 nop
560 call niagara_patch_bzero 560 call niagara_patch_bzero
561 nop 561 nop
562 call niagara2_patch_pageops 562 call niagara_patch_pageops
563 nop 563 nop
564 564
565 ba,a,pt %xcc, 80f 565 ba,a,pt %xcc, 80f
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index d306e648c33..c2d055d8ba9 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -28,11 +28,17 @@ static struct api_info api_table[] = {
28 { .group = HV_GRP_CORE, .flags = FLAG_PRE_API }, 28 { .group = HV_GRP_CORE, .flags = FLAG_PRE_API },
29 { .group = HV_GRP_INTR, }, 29 { .group = HV_GRP_INTR, },
30 { .group = HV_GRP_SOFT_STATE, }, 30 { .group = HV_GRP_SOFT_STATE, },
31 { .group = HV_GRP_TM, },
31 { .group = HV_GRP_PCI, .flags = FLAG_PRE_API }, 32 { .group = HV_GRP_PCI, .flags = FLAG_PRE_API },
32 { .group = HV_GRP_LDOM, }, 33 { .group = HV_GRP_LDOM, },
33 { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API }, 34 { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API },
34 { .group = HV_GRP_NCS, .flags = FLAG_PRE_API }, 35 { .group = HV_GRP_NCS, .flags = FLAG_PRE_API },
35 { .group = HV_GRP_RNG, }, 36 { .group = HV_GRP_RNG, },
37 { .group = HV_GRP_PBOOT, },
38 { .group = HV_GRP_TPM, },
39 { .group = HV_GRP_SDIO, },
40 { .group = HV_GRP_SDIO_ERR, },
41 { .group = HV_GRP_REBOOT_DATA, },
36 { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, 42 { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API },
37 { .group = HV_GRP_FIRE_PERF, }, 43 { .group = HV_GRP_FIRE_PERF, },
38 { .group = HV_GRP_N2_CPU, }, 44 { .group = HV_GRP_N2_CPU, },
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index 8a5f35ffb15..58d60de4d65 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -798,3 +798,10 @@ ENTRY(sun4v_niagara2_setperf)
798 retl 798 retl
799 nop 799 nop
800ENDPROC(sun4v_niagara2_setperf) 800ENDPROC(sun4v_niagara2_setperf)
801
802ENTRY(sun4v_reboot_data_set)
803 mov HV_FAST_REBOOT_DATA_SET, %o5
804 ta HV_FAST_TRAP
805 retl
806 nop
807ENDPROC(sun4v_reboot_data_set)
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 6f6544cfa0e..fd6c36b1df7 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -4,12 +4,27 @@
4#include <linux/interrupt.h> 4#include <linux/interrupt.h>
5 5
6#include <asm/traps.h> 6#include <asm/traps.h>
7#include <asm/head.h>
8#include <asm/io.h>
7 9
8/* cpu.c */ 10/* cpu.c */
9extern const char *sparc_pmu_type; 11extern const char *sparc_pmu_type;
10extern unsigned int fsr_storage; 12extern unsigned int fsr_storage;
11extern int ncpus_probed; 13extern int ncpus_probed;
12 14
15#ifdef CONFIG_SPARC64
16/* setup_64.c */
17struct seq_file;
18extern void cpucap_info(struct seq_file *);
19
20static inline unsigned long kimage_addr_to_ra(const char *p)
21{
22 unsigned long val = (unsigned long) p;
23
24 return kern_base + (val - KERNBASE);
25}
26#endif
27
13#ifdef CONFIG_SPARC32 28#ifdef CONFIG_SPARC32
14/* cpu.c */ 29/* cpu.c */
15extern void cpu_probe(void); 30extern void cpu_probe(void);
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 1d361477d7d..79f31036484 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -47,16 +47,16 @@ kvmap_itlb_tsb_miss:
47kvmap_itlb_vmalloc_addr: 47kvmap_itlb_vmalloc_addr:
48 KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) 48 KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
49 49
50 KTSB_LOCK_TAG(%g1, %g2, %g7) 50 TSB_LOCK_TAG(%g1, %g2, %g7)
51 51
52 /* Load and check PTE. */ 52 /* Load and check PTE. */
53 ldxa [%g5] ASI_PHYS_USE_EC, %g5 53 ldxa [%g5] ASI_PHYS_USE_EC, %g5
54 mov 1, %g7 54 mov 1, %g7
55 sllx %g7, TSB_TAG_INVALID_BIT, %g7 55 sllx %g7, TSB_TAG_INVALID_BIT, %g7
56 brgez,a,pn %g5, kvmap_itlb_longpath 56 brgez,a,pn %g5, kvmap_itlb_longpath
57 KTSB_STORE(%g1, %g7) 57 TSB_STORE(%g1, %g7)
58 58
59 KTSB_WRITE(%g1, %g5, %g6) 59 TSB_WRITE(%g1, %g5, %g6)
60 60
61 /* fallthrough to TLB load */ 61 /* fallthrough to TLB load */
62 62
@@ -102,9 +102,9 @@ kvmap_itlb_longpath:
102kvmap_itlb_obp: 102kvmap_itlb_obp:
103 OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath) 103 OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
104 104
105 KTSB_LOCK_TAG(%g1, %g2, %g7) 105 TSB_LOCK_TAG(%g1, %g2, %g7)
106 106
107 KTSB_WRITE(%g1, %g5, %g6) 107 TSB_WRITE(%g1, %g5, %g6)
108 108
109 ba,pt %xcc, kvmap_itlb_load 109 ba,pt %xcc, kvmap_itlb_load
110 nop 110 nop
@@ -112,17 +112,17 @@ kvmap_itlb_obp:
112kvmap_dtlb_obp: 112kvmap_dtlb_obp:
113 OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath) 113 OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
114 114
115 KTSB_LOCK_TAG(%g1, %g2, %g7) 115 TSB_LOCK_TAG(%g1, %g2, %g7)
116 116
117 KTSB_WRITE(%g1, %g5, %g6) 117 TSB_WRITE(%g1, %g5, %g6)
118 118
119 ba,pt %xcc, kvmap_dtlb_load 119 ba,pt %xcc, kvmap_dtlb_load
120 nop 120 nop
121 121
122 .align 32 122 .align 32
123kvmap_dtlb_tsb4m_load: 123kvmap_dtlb_tsb4m_load:
124 KTSB_LOCK_TAG(%g1, %g2, %g7) 124 TSB_LOCK_TAG(%g1, %g2, %g7)
125 KTSB_WRITE(%g1, %g5, %g6) 125 TSB_WRITE(%g1, %g5, %g6)
126 ba,pt %xcc, kvmap_dtlb_load 126 ba,pt %xcc, kvmap_dtlb_load
127 nop 127 nop
128 128
@@ -222,16 +222,16 @@ kvmap_linear_patch:
222kvmap_dtlb_vmalloc_addr: 222kvmap_dtlb_vmalloc_addr:
223 KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) 223 KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
224 224
225 KTSB_LOCK_TAG(%g1, %g2, %g7) 225 TSB_LOCK_TAG(%g1, %g2, %g7)
226 226
227 /* Load and check PTE. */ 227 /* Load and check PTE. */
228 ldxa [%g5] ASI_PHYS_USE_EC, %g5 228 ldxa [%g5] ASI_PHYS_USE_EC, %g5
229 mov 1, %g7 229 mov 1, %g7
230 sllx %g7, TSB_TAG_INVALID_BIT, %g7 230 sllx %g7, TSB_TAG_INVALID_BIT, %g7
231 brgez,a,pn %g5, kvmap_dtlb_longpath 231 brgez,a,pn %g5, kvmap_dtlb_longpath
232 KTSB_STORE(%g1, %g7) 232 TSB_STORE(%g1, %g7)
233 233
234 KTSB_WRITE(%g1, %g5, %g6) 234 TSB_WRITE(%g1, %g5, %g6)
235 235
236 /* fallthrough to TLB load */ 236 /* fallthrough to TLB load */
237 237
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 42f28c7420e..acaebb63c4f 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -508,6 +508,8 @@ const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
508} 508}
509EXPORT_SYMBOL(mdesc_node_name); 509EXPORT_SYMBOL(mdesc_node_name);
510 510
511static u64 max_cpus = 64;
512
511static void __init report_platform_properties(void) 513static void __init report_platform_properties(void)
512{ 514{
513 struct mdesc_handle *hp = mdesc_grab(); 515 struct mdesc_handle *hp = mdesc_grab();
@@ -543,8 +545,10 @@ static void __init report_platform_properties(void)
543 if (v) 545 if (v)
544 printk("PLATFORM: watchdog-max-timeout [%llu ms]\n", *v); 546 printk("PLATFORM: watchdog-max-timeout [%llu ms]\n", *v);
545 v = mdesc_get_property(hp, pn, "max-cpus", NULL); 547 v = mdesc_get_property(hp, pn, "max-cpus", NULL);
546 if (v) 548 if (v) {
547 printk("PLATFORM: max-cpus [%llu]\n", *v); 549 max_cpus = *v;
550 printk("PLATFORM: max-cpus [%llu]\n", max_cpus);
551 }
548 552
549#ifdef CONFIG_SMP 553#ifdef CONFIG_SMP
550 { 554 {
@@ -715,7 +719,7 @@ static void __cpuinit set_proc_ids(struct mdesc_handle *hp)
715} 719}
716 720
717static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask, 721static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
718 unsigned char def) 722 unsigned long def, unsigned long max)
719{ 723{
720 u64 val; 724 u64 val;
721 725
@@ -726,6 +730,9 @@ static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
726 if (!val || val >= 64) 730 if (!val || val >= 64)
727 goto use_default; 731 goto use_default;
728 732
733 if (val > max)
734 val = max;
735
729 *mask = ((1U << val) * 64U) - 1U; 736 *mask = ((1U << val) * 64U) - 1U;
730 return; 737 return;
731 738
@@ -736,19 +743,28 @@ use_default:
736static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp, 743static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
737 struct trap_per_cpu *tb) 744 struct trap_per_cpu *tb)
738{ 745{
746 static int printed;
739 const u64 *val; 747 const u64 *val;
740 748
741 val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL); 749 val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
742 get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); 750 get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7, ilog2(max_cpus * 2));
743 751
744 val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL); 752 val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
745 get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); 753 get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7, 8);
746 754
747 val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL); 755 val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
748 get_one_mondo_bits(val, &tb->resum_qmask, 6); 756 get_one_mondo_bits(val, &tb->resum_qmask, 6, 7);
749 757
750 val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL); 758 val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
751 get_one_mondo_bits(val, &tb->nonresum_qmask, 2); 759 get_one_mondo_bits(val, &tb->nonresum_qmask, 2, 2);
760 if (!printed++) {
761 pr_info("SUN4V: Mondo queue sizes "
762 "[cpu(%u) dev(%u) r(%u) nr(%u)]\n",
763 tb->cpu_mondo_qmask + 1,
764 tb->dev_mondo_qmask + 1,
765 tb->resum_qmask + 1,
766 tb->nonresum_qmask + 1);
767 }
752} 768}
753 769
754static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask) 770static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask)
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index c4dd0999da8..3e9daea1653 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -29,6 +29,7 @@
29#include <linux/interrupt.h> 29#include <linux/interrupt.h>
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/initrd.h> 31#include <linux/initrd.h>
32#include <linux/module.h>
32 33
33#include <asm/system.h> 34#include <asm/system.h>
34#include <asm/io.h> 35#include <asm/io.h>
@@ -46,6 +47,8 @@
46#include <asm/mmu.h> 47#include <asm/mmu.h>
47#include <asm/ns87303.h> 48#include <asm/ns87303.h>
48#include <asm/btext.h> 49#include <asm/btext.h>
50#include <asm/elf.h>
51#include <asm/mdesc.h>
49 52
50#ifdef CONFIG_IP_PNP 53#ifdef CONFIG_IP_PNP
51#include <net/ipconfig.h> 54#include <net/ipconfig.h>
@@ -269,6 +272,40 @@ void __init sun4v_patch(void)
269 sun4v_hvapi_init(); 272 sun4v_hvapi_init();
270} 273}
271 274
275static void __init popc_patch(void)
276{
277 struct popc_3insn_patch_entry *p3;
278 struct popc_6insn_patch_entry *p6;
279
280 p3 = &__popc_3insn_patch;
281 while (p3 < &__popc_3insn_patch_end) {
282 unsigned long i, addr = p3->addr;
283
284 for (i = 0; i < 3; i++) {
285 *(unsigned int *) (addr + (i * 4)) = p3->insns[i];
286 wmb();
287 __asm__ __volatile__("flush %0"
288 : : "r" (addr + (i * 4)));
289 }
290
291 p3++;
292 }
293
294 p6 = &__popc_6insn_patch;
295 while (p6 < &__popc_6insn_patch_end) {
296 unsigned long i, addr = p6->addr;
297
298 for (i = 0; i < 6; i++) {
299 *(unsigned int *) (addr + (i * 4)) = p6->insns[i];
300 wmb();
301 __asm__ __volatile__("flush %0"
302 : : "r" (addr + (i * 4)));
303 }
304
305 p6++;
306 }
307}
308
272#ifdef CONFIG_SMP 309#ifdef CONFIG_SMP
273void __init boot_cpu_id_too_large(int cpu) 310void __init boot_cpu_id_too_large(int cpu)
274{ 311{
@@ -278,6 +315,154 @@ void __init boot_cpu_id_too_large(int cpu)
278} 315}
279#endif 316#endif
280 317
318/* On Ultra, we support all of the v8 capabilities. */
319unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
320 HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
321 HWCAP_SPARC_V9);
322EXPORT_SYMBOL(sparc64_elf_hwcap);
323
324static const char *hwcaps[] = {
325 "flush", "stbar", "swap", "muldiv", "v9",
326 "ultra3", "blkinit", "n2",
327
328 /* These strings are as they appear in the machine description
329 * 'hwcap-list' property for cpu nodes.
330 */
331 "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
332 "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
333 "ima", "cspare",
334};
335
336void cpucap_info(struct seq_file *m)
337{
338 unsigned long caps = sparc64_elf_hwcap;
339 int i, printed = 0;
340
341 seq_puts(m, "cpucaps\t\t: ");
342 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
343 unsigned long bit = 1UL << i;
344 if (caps & bit) {
345 seq_printf(m, "%s%s",
346 printed ? "," : "", hwcaps[i]);
347 printed++;
348 }
349 }
350 seq_putc(m, '\n');
351}
352
353static void __init report_hwcaps(unsigned long caps)
354{
355 int i, printed = 0;
356
357 printk(KERN_INFO "CPU CAPS: [");
358 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
359 unsigned long bit = 1UL << i;
360 if (caps & bit) {
361 printk(KERN_CONT "%s%s",
362 printed ? "," : "", hwcaps[i]);
363 if (++printed == 8) {
364 printk(KERN_CONT "]\n");
365 printk(KERN_INFO "CPU CAPS: [");
366 printed = 0;
367 }
368 }
369 }
370 printk(KERN_CONT "]\n");
371}
372
373static unsigned long __init mdesc_cpu_hwcap_list(void)
374{
375 struct mdesc_handle *hp;
376 unsigned long caps = 0;
377 const char *prop;
378 int len;
379 u64 pn;
380
381 hp = mdesc_grab();
382 if (!hp)
383 return 0;
384
385 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu");
386 if (pn == MDESC_NODE_NULL)
387 goto out;
388
389 prop = mdesc_get_property(hp, pn, "hwcap-list", &len);
390 if (!prop)
391 goto out;
392
393 while (len) {
394 int i, plen;
395
396 for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
397 unsigned long bit = 1UL << i;
398
399 if (!strcmp(prop, hwcaps[i])) {
400 caps |= bit;
401 break;
402 }
403 }
404
405 plen = strlen(prop) + 1;
406 prop += plen;
407 len -= plen;
408 }
409
410out:
411 mdesc_release(hp);
412 return caps;
413}
414
415/* This yields a mask that user programs can use to figure out what
416 * instruction set this cpu supports.
417 */
418static void __init init_sparc64_elf_hwcap(void)
419{
420 unsigned long cap = sparc64_elf_hwcap;
421 unsigned long mdesc_caps;
422
423 if (tlb_type == cheetah || tlb_type == cheetah_plus)
424 cap |= HWCAP_SPARC_ULTRA3;
425 else if (tlb_type == hypervisor) {
426 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
427 sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
428 sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
429 cap |= HWCAP_SPARC_BLKINIT;
430 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
431 sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
432 cap |= HWCAP_SPARC_N2;
433 }
434
435 cap |= (AV_SPARC_MUL32 | AV_SPARC_DIV32 | AV_SPARC_V8PLUS);
436
437 mdesc_caps = mdesc_cpu_hwcap_list();
438 if (!mdesc_caps) {
439 if (tlb_type == spitfire)
440 cap |= AV_SPARC_VIS;
441 if (tlb_type == cheetah || tlb_type == cheetah_plus)
442 cap |= AV_SPARC_VIS | AV_SPARC_VIS2;
443 if (tlb_type == cheetah_plus)
444 cap |= AV_SPARC_POPC;
445 if (tlb_type == hypervisor) {
446 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1)
447 cap |= AV_SPARC_ASI_BLK_INIT;
448 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
449 sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
450 cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
451 AV_SPARC_ASI_BLK_INIT |
452 AV_SPARC_POPC);
453 if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3)
454 cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
455 AV_SPARC_FMAF);
456 }
457 }
458 sparc64_elf_hwcap = cap | mdesc_caps;
459
460 report_hwcaps(sparc64_elf_hwcap);
461
462 if (sparc64_elf_hwcap & AV_SPARC_POPC)
463 popc_patch();
464}
465
281void __init setup_arch(char **cmdline_p) 466void __init setup_arch(char **cmdline_p)
282{ 467{
283 /* Initialize PROM console and command line. */ 468 /* Initialize PROM console and command line. */
@@ -337,6 +522,7 @@ void __init setup_arch(char **cmdline_p)
337 init_cur_cpu_trap(current_thread_info()); 522 init_cur_cpu_trap(current_thread_info());
338 523
339 paging_init(); 524 paging_init();
525 init_sparc64_elf_hwcap();
340} 526}
341 527
342extern int stop_a_enabled; 528extern int stop_a_enabled;
diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
index 372ad59c4cb..83b47ab02d9 100644
--- a/arch/sparc/kernel/sparc_ksyms_64.c
+++ b/arch/sparc/kernel/sparc_ksyms_64.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/pci.h> 9#include <linux/pci.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/bitops.h>
11 12
12#include <asm/system.h> 13#include <asm/system.h>
13#include <asm/cpudata.h> 14#include <asm/cpudata.h>
@@ -38,5 +39,15 @@ EXPORT_SYMBOL(sun4v_niagara_setperf);
38EXPORT_SYMBOL(sun4v_niagara2_getperf); 39EXPORT_SYMBOL(sun4v_niagara2_getperf);
39EXPORT_SYMBOL(sun4v_niagara2_setperf); 40EXPORT_SYMBOL(sun4v_niagara2_setperf);
40 41
42/* from hweight.S */
43EXPORT_SYMBOL(__arch_hweight8);
44EXPORT_SYMBOL(__arch_hweight16);
45EXPORT_SYMBOL(__arch_hweight32);
46EXPORT_SYMBOL(__arch_hweight64);
47
48/* from ffs_ffz.S */
49EXPORT_SYMBOL(ffs);
50EXPORT_SYMBOL(__ffs);
51
41/* Exporting a symbol from /init/main.c */ 52/* Exporting a symbol from /init/main.c */
42EXPORT_SYMBOL(saved_command_line); 53EXPORT_SYMBOL(saved_command_line);
diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c
index 8cdbe5946b4..c59af546f52 100644
--- a/arch/sparc/kernel/sstate.c
+++ b/arch/sparc/kernel/sstate.c
@@ -14,14 +14,9 @@
14#include <asm/head.h> 14#include <asm/head.h>
15#include <asm/io.h> 15#include <asm/io.h>
16 16
17static int hv_supports_soft_state; 17#include "kernel.h"
18
19static unsigned long kimage_addr_to_ra(const char *p)
20{
21 unsigned long val = (unsigned long) p;
22 18
23 return kern_base + (val - KERNBASE); 19static int hv_supports_soft_state;
24}
25 20
26static void do_set_sstate(unsigned long state, const char *msg) 21static void do_set_sstate(unsigned long state, const char *msg)
27{ 22{
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index 35cff1673aa..76e4ac1a13e 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -22,6 +22,7 @@
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/perf_event.h> 23#include <linux/perf_event.h>
24#include <linux/ratelimit.h> 24#include <linux/ratelimit.h>
25#include <linux/bitops.h>
25#include <asm/fpumacro.h> 26#include <asm/fpumacro.h>
26 27
27enum direction { 28enum direction {
@@ -373,16 +374,11 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
373 } 374 }
374} 375}
375 376
376static char popc_helper[] = {
3770, 1, 1, 2, 1, 2, 2, 3,
3781, 2, 2, 3, 2, 3, 3, 4,
379};
380
381int handle_popc(u32 insn, struct pt_regs *regs) 377int handle_popc(u32 insn, struct pt_regs *regs)
382{ 378{
383 u64 value;
384 int ret, i, rd = ((insn >> 25) & 0x1f);
385 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; 379 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
380 int ret, rd = ((insn >> 25) & 0x1f);
381 u64 value;
386 382
387 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); 383 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
388 if (insn & 0x2000) { 384 if (insn & 0x2000) {
@@ -392,10 +388,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
392 maybe_flush_windows(0, insn & 0x1f, rd, from_kernel); 388 maybe_flush_windows(0, insn & 0x1f, rd, from_kernel);
393 value = fetch_reg(insn & 0x1f, regs); 389 value = fetch_reg(insn & 0x1f, regs);
394 } 390 }
395 for (ret = 0, i = 0; i < 16; i++) { 391 ret = hweight64(value);
396 ret += popc_helper[value & 0xf];
397 value >>= 4;
398 }
399 if (rd < 16) { 392 if (rd < 16) {
400 if (rd) 393 if (rd)
401 regs->u_regs[rd] = ret; 394 regs->u_regs[rd] = ret;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index c0220759003..0e1605697b4 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -107,7 +107,26 @@ SECTIONS
107 *(.sun4v_2insn_patch) 107 *(.sun4v_2insn_patch)
108 __sun4v_2insn_patch_end = .; 108 __sun4v_2insn_patch_end = .;
109 } 109 }
110 110 .swapper_tsb_phys_patch : {
111 __swapper_tsb_phys_patch = .;
112 *(.swapper_tsb_phys_patch)
113 __swapper_tsb_phys_patch_end = .;
114 }
115 .swapper_4m_tsb_phys_patch : {
116 __swapper_4m_tsb_phys_patch = .;
117 *(.swapper_4m_tsb_phys_patch)
118 __swapper_4m_tsb_phys_patch_end = .;
119 }
120 .popc_3insn_patch : {
121 __popc_3insn_patch = .;
122 *(.popc_3insn_patch)
123 __popc_3insn_patch_end = .;
124 }
125 .popc_6insn_patch : {
126 __popc_6insn_patch = .;
127 *(.popc_6insn_patch)
128 __popc_6insn_patch_end = .;
129 }
111 PERCPU_SECTION(SMP_CACHE_BYTES) 130 PERCPU_SECTION(SMP_CACHE_BYTES)
112 131
113 . = ALIGN(PAGE_SIZE); 132 . = ALIGN(PAGE_SIZE);
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 7f01b8fce8b..a3fc4375a15 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -31,13 +31,13 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o
31lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o 31lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
32 32
33lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o 33lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
34lib-$(CONFIG_SPARC64) += NG2patch.o NG2page.o 34lib-$(CONFIG_SPARC64) += NG2patch.o
35 35
36lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o 36lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
37lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o 37lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
38 38
39lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o 39lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
40lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o 40lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
41 41
42obj-y += iomap.o 42obj-y += iomap.o
43obj-$(CONFIG_SPARC32) += atomic32.o 43obj-$(CONFIG_SPARC32) += atomic32.o
diff --git a/arch/sparc/lib/NG2page.S b/arch/sparc/lib/NG2page.S
deleted file mode 100644
index 73b6b7c72cb..00000000000
--- a/arch/sparc/lib/NG2page.S
+++ /dev/null
@@ -1,61 +0,0 @@
1/* NG2page.S: Niagara-2 optimized clear and copy page.
2 *
3 * Copyright (C) 2007 (davem@davemloft.net)
4 */
5
6#include <asm/asi.h>
7#include <asm/page.h>
8#include <asm/visasm.h>
9
10 .text
11 .align 32
12
13 /* This is heavily simplified from the sun4u variants
14 * because Niagara-2 does not have any D-cache aliasing issues.
15 */
16NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
17 prefetch [%o1 + 0x00], #one_read
18 prefetch [%o1 + 0x40], #one_read
19 VISEntryHalf
20 set PAGE_SIZE, %g7
21 sub %o0, %o1, %g3
221: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P
23 subcc %g7, 64, %g7
24 ldda [%o1] ASI_BLK_P, %f0
25 stda %f0, [%o1 + %g3] ASI_BLK_P
26 add %o1, 64, %o1
27 bne,pt %xcc, 1b
28 prefetch [%o1 + 0x40], #one_read
29 membar #Sync
30 VISExitHalf
31 retl
32 nop
33
34#define BRANCH_ALWAYS 0x10680000
35#define NOP 0x01000000
36#define NG_DO_PATCH(OLD, NEW) \
37 sethi %hi(NEW), %g1; \
38 or %g1, %lo(NEW), %g1; \
39 sethi %hi(OLD), %g2; \
40 or %g2, %lo(OLD), %g2; \
41 sub %g1, %g2, %g1; \
42 sethi %hi(BRANCH_ALWAYS), %g3; \
43 sll %g1, 11, %g1; \
44 srl %g1, 11 + 2, %g1; \
45 or %g3, %lo(BRANCH_ALWAYS), %g3; \
46 or %g3, %g1, %g3; \
47 stw %g3, [%g2]; \
48 sethi %hi(NOP), %g3; \
49 or %g3, %lo(NOP), %g3; \
50 stw %g3, [%g2 + 0x4]; \
51 flush %g2;
52
53 .globl niagara2_patch_pageops
54 .type niagara2_patch_pageops,#function
55niagara2_patch_pageops:
56 NG_DO_PATCH(copy_user_page, NG2copy_user_page)
57 NG_DO_PATCH(_clear_page, NGclear_page)
58 NG_DO_PATCH(clear_user_page, NGclear_user_page)
59 retl
60 nop
61 .size niagara2_patch_pageops,.-niagara2_patch_pageops
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S
index 428920de05b..b9e790b9c6b 100644
--- a/arch/sparc/lib/NGpage.S
+++ b/arch/sparc/lib/NGpage.S
@@ -16,55 +16,91 @@
16 */ 16 */
17 17
18NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ 18NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */
19 prefetch [%o1 + 0x00], #one_read 19 save %sp, -192, %sp
20 mov 8, %g1 20 rd %asi, %g3
21 mov 16, %g2 21 wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
22 mov 24, %g3
23 set PAGE_SIZE, %g7 22 set PAGE_SIZE, %g7
23 prefetch [%i1 + 0x00], #one_read
24 prefetch [%i1 + 0x40], #one_read
24 25
251: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 261: prefetch [%i1 + 0x80], #one_read
26 ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 27 prefetch [%i1 + 0xc0], #one_read
27 prefetch [%o1 + 0x40], #one_read 28 ldda [%i1 + 0x00] %asi, %o2
28 add %o1, 32, %o1 29 ldda [%i1 + 0x10] %asi, %o4
29 stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 30 ldda [%i1 + 0x20] %asi, %l2
30 stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P 31 ldda [%i1 + 0x30] %asi, %l4
31 ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 32 stxa %o2, [%i0 + 0x00] %asi
32 stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 33 stxa %o3, [%i0 + 0x08] %asi
33 stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P 34 stxa %o4, [%i0 + 0x10] %asi
34 ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 35 stxa %o5, [%i0 + 0x18] %asi
35 add %o1, 32, %o1 36 stxa %l2, [%i0 + 0x20] %asi
36 add %o0, 32, %o0 37 stxa %l3, [%i0 + 0x28] %asi
37 stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 38 stxa %l4, [%i0 + 0x30] %asi
38 stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P 39 stxa %l5, [%i0 + 0x38] %asi
39 stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 40 ldda [%i1 + 0x40] %asi, %o2
40 stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P 41 ldda [%i1 + 0x50] %asi, %o4
41 subcc %g7, 64, %g7 42 ldda [%i1 + 0x60] %asi, %l2
43 ldda [%i1 + 0x70] %asi, %l4
44 stxa %o2, [%i0 + 0x40] %asi
45 stxa %o3, [%i0 + 0x48] %asi
46 stxa %o4, [%i0 + 0x50] %asi
47 stxa %o5, [%i0 + 0x58] %asi
48 stxa %l2, [%i0 + 0x60] %asi
49 stxa %l3, [%i0 + 0x68] %asi
50 stxa %l4, [%i0 + 0x70] %asi
51 stxa %l5, [%i0 + 0x78] %asi
52 add %i1, 128, %i1
53 subcc %g7, 128, %g7
42 bne,pt %xcc, 1b 54 bne,pt %xcc, 1b
43 add %o0, 32, %o0 55 add %i0, 128, %i0
56 wr %g3, 0x0, %asi
44 membar #Sync 57 membar #Sync
45 retl 58 ret
46 nop 59 restore
47 60
48 .globl NGclear_page, NGclear_user_page 61 .align 32
49NGclear_page: /* %o0=dest */ 62NGclear_page: /* %o0=dest */
50NGclear_user_page: /* %o0=dest, %o1=vaddr */ 63NGclear_user_page: /* %o0=dest, %o1=vaddr */
51 mov 8, %g1 64 rd %asi, %g3
52 mov 16, %g2 65 wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
53 mov 24, %g3
54 set PAGE_SIZE, %g7 66 set PAGE_SIZE, %g7
55 67
561: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 681: stxa %g0, [%o0 + 0x00] %asi
57 stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P 69 stxa %g0, [%o0 + 0x08] %asi
58 stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 70 stxa %g0, [%o0 + 0x10] %asi
59 stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P 71 stxa %g0, [%o0 + 0x18] %asi
60 add %o0, 32, %o0 72 stxa %g0, [%o0 + 0x20] %asi
61 stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P 73 stxa %g0, [%o0 + 0x28] %asi
62 stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P 74 stxa %g0, [%o0 + 0x30] %asi
63 stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P 75 stxa %g0, [%o0 + 0x38] %asi
64 stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P 76 stxa %g0, [%o0 + 0x40] %asi
65 subcc %g7, 64, %g7 77 stxa %g0, [%o0 + 0x48] %asi
78 stxa %g0, [%o0 + 0x50] %asi
79 stxa %g0, [%o0 + 0x58] %asi
80 stxa %g0, [%o0 + 0x60] %asi
81 stxa %g0, [%o0 + 0x68] %asi
82 stxa %g0, [%o0 + 0x70] %asi
83 stxa %g0, [%o0 + 0x78] %asi
84 stxa %g0, [%o0 + 0x80] %asi
85 stxa %g0, [%o0 + 0x88] %asi
86 stxa %g0, [%o0 + 0x90] %asi
87 stxa %g0, [%o0 + 0x98] %asi
88 stxa %g0, [%o0 + 0xa0] %asi
89 stxa %g0, [%o0 + 0xa8] %asi
90 stxa %g0, [%o0 + 0xb0] %asi
91 stxa %g0, [%o0 + 0xb8] %asi
92 stxa %g0, [%o0 + 0xc0] %asi
93 stxa %g0, [%o0 + 0xc8] %asi
94 stxa %g0, [%o0 + 0xd0] %asi
95 stxa %g0, [%o0 + 0xd8] %asi
96 stxa %g0, [%o0 + 0xe0] %asi
97 stxa %g0, [%o0 + 0xe8] %asi
98 stxa %g0, [%o0 + 0xf0] %asi
99 stxa %g0, [%o0 + 0xf8] %asi
100 subcc %g7, 256, %g7
66 bne,pt %xcc, 1b 101 bne,pt %xcc, 1b
67 add %o0, 32, %o0 102 add %o0, 256, %o0
103 wr %g3, 0x0, %asi
68 membar #Sync 104 membar #Sync
69 retl 105 retl
70 nop 106 nop
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 8600eb2461b..1d32b54089a 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -65,7 +65,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u)
65 if (ret != u) 65 if (ret != u)
66 v->counter += a; 66 v->counter += a;
67 spin_unlock_irqrestore(ATOMIC_HASH(v), flags); 67 spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
68 return ret != u; 68 return ret;
69} 69}
70EXPORT_SYMBOL(__atomic_add_unless); 70EXPORT_SYMBOL(__atomic_add_unless);
71 71
diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S
new file mode 100644
index 00000000000..b39389f6989
--- /dev/null
+++ b/arch/sparc/lib/ffs.S
@@ -0,0 +1,84 @@
1#include <linux/linkage.h>
2
3 .register %g2,#scratch
4
5 .text
6 .align 32
7
8ENTRY(ffs)
9 brnz,pt %o0, 1f
10 mov 1, %o1
11 retl
12 clr %o0
13 nop
14 nop
15ENTRY(__ffs)
16 sllx %o0, 32, %g1 /* 1 */
17 srlx %o0, 32, %g2
18
19 clr %o1 /* 2 */
20 movrz %g1, %g2, %o0
21
22 movrz %g1, 32, %o1 /* 3 */
231: clr %o2
24
25 sllx %o0, (64 - 16), %g1 /* 4 */
26 srlx %o0, 16, %g2
27
28 movrz %g1, %g2, %o0 /* 5 */
29 clr %o3
30
31 movrz %g1, 16, %o2 /* 6 */
32 clr %o4
33
34 and %o0, 0xff, %g1 /* 7 */
35 srlx %o0, 8, %g2
36
37 movrz %g1, %g2, %o0 /* 8 */
38 clr %o5
39
40 movrz %g1, 8, %o3 /* 9 */
41 add %o2, %o1, %o2
42
43 and %o0, 0xf, %g1 /* 10 */
44 srlx %o0, 4, %g2
45
46 movrz %g1, %g2, %o0 /* 11 */
47 add %o2, %o3, %o2
48
49 movrz %g1, 4, %o4 /* 12 */
50
51 and %o0, 0x3, %g1 /* 13 */
52 srlx %o0, 2, %g2
53
54 movrz %g1, %g2, %o0 /* 14 */
55 add %o2, %o4, %o2
56
57 movrz %g1, 2, %o5 /* 15 */
58
59 and %o0, 0x1, %g1 /* 16 */
60
61 add %o2, %o5, %o2 /* 17 */
62 xor %g1, 0x1, %g1
63
64 retl /* 18 */
65 add %o2, %g1, %o0
66ENDPROC(ffs)
67ENDPROC(__ffs)
68
69 .section .popc_6insn_patch, "ax"
70 .word ffs
71 brz,pn %o0, 98f
72 neg %o0, %g1
73 xnor %o0, %g1, %o1
74 popc %o1, %o0
7598: retl
76 nop
77 .word __ffs
78 neg %o0, %g1
79 xnor %o0, %g1, %o1
80 popc %o1, %o0
81 retl
82 sub %o0, 1, %o0
83 nop
84 .previous
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
new file mode 100644
index 00000000000..95414e0a680
--- /dev/null
+++ b/arch/sparc/lib/hweight.S
@@ -0,0 +1,51 @@
1#include <linux/linkage.h>
2
3 .text
4 .align 32
5ENTRY(__arch_hweight8)
6 ba,pt %xcc, __sw_hweight8
7 nop
8 nop
9ENDPROC(__arch_hweight8)
10 .section .popc_3insn_patch, "ax"
11 .word __arch_hweight8
12 sllx %o0, 64-8, %g1
13 retl
14 popc %g1, %o0
15 .previous
16
17ENTRY(__arch_hweight16)
18 ba,pt %xcc, __sw_hweight16
19 nop
20 nop
21ENDPROC(__arch_hweight16)
22 .section .popc_3insn_patch, "ax"
23 .word __arch_hweight16
24 sllx %o0, 64-16, %g1
25 retl
26 popc %g1, %o0
27 .previous
28
29ENTRY(__arch_hweight32)
30 ba,pt %xcc, __sw_hweight32
31 nop
32 nop
33ENDPROC(__arch_hweight32)
34 .section .popc_3insn_patch, "ax"
35 .word __arch_hweight32
36 sllx %o0, 64-32, %g1
37 retl
38 popc %g1, %o0
39 .previous
40
41ENTRY(__arch_hweight64)
42 ba,pt %xcc, __sw_hweight64
43 nop
44 nop
45ENDPROC(__arch_hweight64)
46 .section .popc_3insn_patch, "ax"
47 .word __arch_hweight64
48 retl
49 popc %o0, %o0
50 nop
51 .previous
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 3fd8e18bed8..581531dbc8b 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1597,6 +1597,44 @@ static void __init tsb_phys_patch(void)
1597static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; 1597static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
1598extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; 1598extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
1599 1599
1600static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
1601{
1602 pa >>= KTSB_PHYS_SHIFT;
1603
1604 while (start < end) {
1605 unsigned int *ia = (unsigned int *)(unsigned long)*start;
1606
1607 ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
1608 __asm__ __volatile__("flush %0" : : "r" (ia));
1609
1610 ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
1611 __asm__ __volatile__("flush %0" : : "r" (ia + 1));
1612
1613 start++;
1614 }
1615}
1616
1617static void ktsb_phys_patch(void)
1618{
1619 extern unsigned int __swapper_tsb_phys_patch;
1620 extern unsigned int __swapper_tsb_phys_patch_end;
1621 unsigned long ktsb_pa;
1622
1623 ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
1624 patch_one_ktsb_phys(&__swapper_tsb_phys_patch,
1625 &__swapper_tsb_phys_patch_end, ktsb_pa);
1626#ifndef CONFIG_DEBUG_PAGEALLOC
1627 {
1628 extern unsigned int __swapper_4m_tsb_phys_patch;
1629 extern unsigned int __swapper_4m_tsb_phys_patch_end;
1630 ktsb_pa = (kern_base +
1631 ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
1632 patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch,
1633 &__swapper_4m_tsb_phys_patch_end, ktsb_pa);
1634 }
1635#endif
1636}
1637
1600static void __init sun4v_ktsb_init(void) 1638static void __init sun4v_ktsb_init(void)
1601{ 1639{
1602 unsigned long ktsb_pa; 1640 unsigned long ktsb_pa;
@@ -1716,8 +1754,10 @@ void __init paging_init(void)
1716 sun4u_pgprot_init(); 1754 sun4u_pgprot_init();
1717 1755
1718 if (tlb_type == cheetah_plus || 1756 if (tlb_type == cheetah_plus ||
1719 tlb_type == hypervisor) 1757 tlb_type == hypervisor) {
1720 tsb_phys_patch(); 1758 tsb_phys_patch();
1759 ktsb_phys_patch();
1760 }
1721 1761
1722 if (tlb_type == hypervisor) { 1762 if (tlb_type == hypervisor) {
1723 sun4v_patch_tlb_handlers(); 1763 sun4v_patch_tlb_handlers();
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 0249b8b4db5..b30f71ac0d0 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -12,6 +12,7 @@ config TILE
12 select GENERIC_PENDING_IRQ if SMP 12 select GENERIC_PENDING_IRQ if SMP
13 select GENERIC_IRQ_SHOW 13 select GENERIC_IRQ_SHOW
14 select SYS_HYPERVISOR 14 select SYS_HYPERVISOR
15 select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
15 16
16# FIXME: investigate whether we need/want these options. 17# FIXME: investigate whether we need/want these options.
17# select HAVE_IOREMAP_PROT 18# select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 849ab2fa1f5..aec60dc0600 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm
2 2
3header-y += ucontext.h 3header-y += ucontext.h
4header-y += hardwall.h 4header-y += hardwall.h
5
6generic-y += bug.h
7generic-y += bugs.h
8generic-y += cputime.h
9generic-y += device.h
10generic-y += div64.h
11generic-y += emergency-restart.h
12generic-y += errno.h
13generic-y += fb.h
14generic-y += fcntl.h
15generic-y += ioctl.h
16generic-y += ioctls.h
17generic-y += ipc.h
18generic-y += ipcbuf.h
19generic-y += irq_regs.h
20generic-y += kdebug.h
21generic-y += local.h
22generic-y += module.h
23generic-y += msgbuf.h
24generic-y += mutex.h
25generic-y += param.h
26generic-y += parport.h
27generic-y += poll.h
28generic-y += posix_types.h
29generic-y += resource.h
30generic-y += scatterlist.h
31generic-y += sembuf.h
32generic-y += serial.h
33generic-y += shmbuf.h
34generic-y += shmparam.h
35generic-y += socket.h
36generic-y += sockios.h
37generic-y += statfs.h
38generic-y += termbits.h
39generic-y += termios.h
40generic-y += types.h
41generic-y += ucontext.h
42generic-y += xor.h
diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h
deleted file mode 100644
index b12fd89e42e..00000000000
--- a/arch/tile/include/asm/bug.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/bug.h>
diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h
deleted file mode 100644
index 61791e1ad9f..00000000000
--- a/arch/tile/include/asm/bugs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/bugs.h>
diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h
deleted file mode 100644
index 6d68ad7e0ea..00000000000
--- a/arch/tile/include/asm/cputime.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/cputime.h>
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
deleted file mode 100644
index f0a4c256403..00000000000
--- a/arch/tile/include/asm/device.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/device.h>
diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb2..00000000000
--- a/arch/tile/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/div64.h>
diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h
deleted file mode 100644
index 3711bd9d50b..00000000000
--- a/arch/tile/include/asm/emergency-restart.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/emergency-restart.h>
diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h
deleted file mode 100644
index 4c82b503d92..00000000000
--- a/arch/tile/include/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/errno.h>
diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h
deleted file mode 100644
index 3a4988e8df4..00000000000
--- a/arch/tile/include/asm/fb.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/fb.h>
diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h
deleted file mode 100644
index 46ab12db573..00000000000
--- a/arch/tile/include/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/fcntl.h>
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index 51537ff9265..c66f7933bea 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx,
75 75
76#define set_fixmap(idx, phys) \ 76#define set_fixmap(idx, phys) \
77 __set_fixmap(idx, phys, PAGE_KERNEL) 77 __set_fixmap(idx, phys, PAGE_KERNEL)
78/*
79 * Some hardware wants to get fixmapped without caching.
80 */
81#define set_fixmap_nocache(idx, phys) \
82 __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
83
84#define clear_fixmap(idx) \ 78#define clear_fixmap(idx) \
85 __set_fixmap(idx, 0, __pgprot(0)) 79 __set_fixmap(idx, 0, __pgprot(0))
86 80
diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe..00000000000
--- a/arch/tile/include/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ioctl.h>
diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h
deleted file mode 100644
index ec34c760665..00000000000
--- a/arch/tile/include/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ioctls.h>
diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h
deleted file mode 100644
index a46e3d9c2a3..00000000000
--- a/arch/tile/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ipc.h>
diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d..00000000000
--- a/arch/tile/include/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ipcbuf.h>
diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b7027..00000000000
--- a/arch/tile/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b03766..00000000000
--- a/arch/tile/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/kdebug.h>
diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h
deleted file mode 100644
index c11c530f74d..00000000000
--- a/arch/tile/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/local.h>
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
deleted file mode 100644
index 1e4b79fe858..00000000000
--- a/arch/tile/include/asm/module.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/module.h>
diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h
deleted file mode 100644
index 809134c644a..00000000000
--- a/arch/tile/include/asm/msgbuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/msgbuf.h>
diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c7..00000000000
--- a/arch/tile/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/mutex-dec.h>
diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h
deleted file mode 100644
index 965d4542797..00000000000
--- a/arch/tile/include/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/param.h>
diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h
deleted file mode 100644
index cf252af6459..00000000000
--- a/arch/tile/include/asm/parport.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/parport.h>
diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h
deleted file mode 100644
index c98509d3149..00000000000
--- a/arch/tile/include/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/poll.h>
diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h
deleted file mode 100644
index 22cae6230ce..00000000000
--- a/arch/tile/include/asm/posix_types.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/posix_types.h>
diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h
deleted file mode 100644
index 04bc4db8921..00000000000
--- a/arch/tile/include/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/resource.h>
diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93a..00000000000
--- a/arch/tile/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/scatterlist.h>
diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h
deleted file mode 100644
index 7673b83cfef..00000000000
--- a/arch/tile/include/asm/sembuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/sembuf.h>
diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h
deleted file mode 100644
index a0cb0caff15..00000000000
--- a/arch/tile/include/asm/serial.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/serial.h>
diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h
deleted file mode 100644
index 83c05fc2de3..00000000000
--- a/arch/tile/include/asm/shmbuf.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/shmbuf.h>
diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h
deleted file mode 100644
index 93f30deb95d..00000000000
--- a/arch/tile/include/asm/shmparam.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/shmparam.h>
diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h
deleted file mode 100644
index 6b71384b9d8..00000000000
--- a/arch/tile/include/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/socket.h>
diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h
deleted file mode 100644
index def6d4746ee..00000000000
--- a/arch/tile/include/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/sockios.h>
diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h
deleted file mode 100644
index 0b91fe198c2..00000000000
--- a/arch/tile/include/asm/statfs.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/statfs.h>
diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h
deleted file mode 100644
index 3935b106de7..00000000000
--- a/arch/tile/include/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/termbits.h>
diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h
deleted file mode 100644
index 280d78a9d96..00000000000
--- a/arch/tile/include/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/termios.h>
diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h
deleted file mode 100644
index b9e79bc580d..00000000000
--- a/arch/tile/include/asm/types.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/types.h>
diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h
deleted file mode 100644
index 9bc07b9f30f..00000000000
--- a/arch/tile/include/asm/ucontext.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/ucontext.h>
diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h
deleted file mode 100644
index c82eb12a5b1..00000000000
--- a/arch/tile/include/asm/xor.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/xor.h>
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644
index 00000000000..6395faa6d9e
--- /dev/null
+++ b/arch/tile/include/hv/drv_srom_intf.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drv_srom_intf.h
17 * Interface definitions for the SPI Flash ROM driver.
18 */
19
20#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
21#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
22
23/** Read this offset to get the total device size. */
24#define SROM_TOTAL_SIZE_OFF 0xF0000000
25
26/** Read this offset to get the device sector size. */
27#define SROM_SECTOR_SIZE_OFF 0xF0000004
28
29/** Read this offset to get the device page size. */
30#define SROM_PAGE_SIZE_OFF 0xF0000008
31
32/** Write this offset to flush any pending writes. */
33#define SROM_FLUSH_OFF 0xF1000000
34
35/** Write this offset, plus the byte offset of the start of a sector, to
36 * erase a sector. Any write data is ignored, but there must be at least
37 * one byte of write data. Only applies when the driver is in MTD mode.
38 */
39#define SROM_ERASE_OFF 0xF2000000
40
41#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index c4be58cc5d5..f6f50f2a5e3 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = {
78 .rating = 300, 78 .rating = 300,
79 .read = clocksource_get_cycles, 79 .read = clocksource_get_cycles,
80 .mask = CLOCKSOURCE_MASK(64), 80 .mask = CLOCKSOURCE_MASK(64),
81 .shift = 22, /* typical value, e.g. x86 tsc uses this */
82 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 81 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
83}; 82};
84 83
@@ -91,8 +90,6 @@ void __init setup_clock(void)
91 cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); 90 cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
92 sched_clock_mult = 91 sched_clock_mult =
93 clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); 92 clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
94 cycle_counter_cs.mult =
95 clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
96} 93}
97 94
98void __init calibrate_delay(void) 95void __init calibrate_delay(void)
@@ -107,7 +104,7 @@ void __init calibrate_delay(void)
107void __init time_init(void) 104void __init time_init(void)
108{ 105{
109 /* Initialize and register the clock source. */ 106 /* Initialize and register the clock source. */
110 clocksource_register(&cycle_counter_cs); 107 clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
111 108
112 /* Start up the tile-timer interrupt source on the boot cpu. */ 109 /* Start up the tile-timer interrupt source on the boot cpu. */
113 setup_tile_timer(); 110 setup_tile_timer();
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 4e10c402302..7309988c979 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -836,8 +836,7 @@ void __init mem_init(void)
836#endif 836#endif
837 837
838#ifdef CONFIG_FLATMEM 838#ifdef CONFIG_FLATMEM
839 if (!mem_map) 839 BUG_ON(!mem_map);
840 BUG();
841#endif 840#endif
842 841
843#ifdef CONFIG_HIGHMEM 842#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7cf916fc1ce..6a47bb22657 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
72 select USE_GENERIC_SMP_HELPERS if SMP 72 select USE_GENERIC_SMP_HELPERS if SMP
73 select HAVE_BPF_JIT if (X86_64 && NET) 73 select HAVE_BPF_JIT if (X86_64 && NET)
74 select CLKEVT_I8253 74 select CLKEVT_I8253
75 select ARCH_HAVE_NMI_SAFE_CMPXCHG
75 76
76config INSTRUCTION_DECODER 77config INSTRUCTION_DECODER
77 def_bool (KPROBES || PERF_EVENTS) 78 def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d02804d650c..d8e8eefbe24 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -40,8 +40,6 @@
40#include <linux/compiler.h> 40#include <linux/compiler.h>
41#include <asm/page.h> 41#include <asm/page.h>
42 42
43#include <xen/xen.h>
44
45#define build_mmio_read(name, size, type, reg, barrier) \ 43#define build_mmio_read(name, size, type, reg, barrier) \
46static inline type name(const volatile void __iomem *addr) \ 44static inline type name(const volatile void __iomem *addr) \
47{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ 45{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -334,6 +332,7 @@ extern void fixup_early_ioremap(void);
334extern bool is_early_ioremap_ptep(pte_t *ptep); 332extern bool is_early_ioremap_ptep(pte_t *ptep);
335 333
336#ifdef CONFIG_XEN 334#ifdef CONFIG_XEN
335#include <xen/xen.h>
337struct bio_vec; 336struct bio_vec;
338 337
339extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, 338extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 219371546af..0d1171c9772 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -751,8 +751,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
751 :: "a" (eax), "c" (ecx)); 751 :: "a" (eax), "c" (ecx));
752} 752}
753 753
754extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
755
756extern void select_idle_routine(const struct cpuinfo_x86 *c); 754extern void select_idle_routine(const struct cpuinfo_x86 *c);
757extern void init_amd_e400_c1e_mask(void); 755extern void init_amd_e400_c1e_mask(void);
758 756
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5812404a0d4..f50e7fb2a20 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
149} 149}
150EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); 150EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
151 151
152/*
153 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
154 * which can obviate IPI to trigger checking of need_resched.
155 * We execute MONITOR against need_resched and enter optimized wait state
156 * through MWAIT. Whenever someone changes need_resched, we would be woken
157 * up from MWAIT (without an IPI).
158 *
159 * New with Core Duo processors, MWAIT can take some hints based on CPU
160 * capability.
161 */
162void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
163{
164 if (!need_resched()) {
165 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
166 clflush((void *)&current_thread_info()->flags);
167
168 __monitor((void *)&current_thread_info()->flags, 0, 0);
169 smp_mb();
170 if (!need_resched())
171 __mwait(ax, cx);
172 }
173}
174
152void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) 175void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
153{ 176{
154 unsigned int cpu = smp_processor_id(); 177 unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e1ba8cb24e4..e7e3b019c43 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -438,29 +438,6 @@ void cpu_idle_wait(void)
438} 438}
439EXPORT_SYMBOL_GPL(cpu_idle_wait); 439EXPORT_SYMBOL_GPL(cpu_idle_wait);
440 440
441/*
442 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
443 * which can obviate IPI to trigger checking of need_resched.
444 * We execute MONITOR against need_resched and enter optimized wait state
445 * through MWAIT. Whenever someone changes need_resched, we would be woken
446 * up from MWAIT (without an IPI).
447 *
448 * New with Core Duo processors, MWAIT can take some hints based on CPU
449 * capability.
450 */
451void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
452{
453 if (!need_resched()) {
454 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
455 clflush((void *)&current_thread_info()->flags);
456
457 __monitor((void *)&current_thread_info()->flags, 0, 0);
458 smp_mb();
459 if (!need_resched())
460 __mwait(ax, cx);
461 }
462}
463
464/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 441/* Default MONITOR/MWAIT with no hints, used for default C1 state */
465static void mwait_idle(void) 442static void mwait_idle(void)
466{ 443{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc59067..7a3b65107a2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/uaccess.h> 38#include <linux/uaccess.h>
39#include <linux/io.h> 39#include <linux/io.h>
40#include <linux/kdebug.h> 40#include <linux/kdebug.h>
41#include <linux/cpuidle.h>
41 42
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -109,7 +110,8 @@ void cpu_idle(void)
109 local_irq_disable(); 110 local_irq_disable();
110 /* Don't trace irqs off for idle */ 111 /* Don't trace irqs off for idle */
111 stop_critical_timings(); 112 stop_critical_timings();
112 pm_idle(); 113 if (cpuidle_idle_call())
114 pm_idle();
113 start_critical_timings(); 115 start_critical_timings();
114 } 116 }
115 tick_nohz_restart_sched_tick(); 117 tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab8df3..f693e44e1bf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/io.h> 38#include <linux/io.h>
39#include <linux/ftrace.h> 39#include <linux/ftrace.h>
40#include <linux/cpuidle.h>
40 41
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
42#include <asm/system.h> 43#include <asm/system.h>
@@ -136,7 +137,8 @@ void cpu_idle(void)
136 enter_idle(); 137 enter_idle();
137 /* Don't trace irqs off for idle */ 138 /* Don't trace irqs off for idle */
138 stop_critical_timings(); 139 stop_critical_timings();
139 pm_idle(); 140 if (cpuidle_idle_call())
141 pm_idle();
140 start_critical_timings(); 142 start_critical_timings();
141 143
142 /* In many cases the interrupt that ended idle 144 /* In many cases the interrupt that ended idle
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index f61ccdd4934..1ea38775a6d 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,3 +1,4 @@
1obj-$(CONFIG_X86_MRST) += mrst.o 1obj-$(CONFIG_X86_MRST) += mrst.o
2obj-$(CONFIG_X86_MRST) += vrtc.o 2obj-$(CONFIG_X86_MRST) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o 3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
4obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
new file mode 100644
index 00000000000..9281da7d91b
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.c
@@ -0,0 +1,817 @@
1/*
2 * mrst/pmu.c - driver for MRST Power Management Unit
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#include <linux/cpuidle.h>
21#include <linux/debugfs.h>
22#include <linux/delay.h>
23#include <linux/interrupt.h>
24#include <linux/module.h>
25#include <linux/pci.h>
26#include <linux/seq_file.h>
27#include <linux/sfi.h>
28#include <asm/intel_scu_ipc.h>
29#include "pmu.h"
30
31#define IPCMSG_FW_REVISION 0xF4
32
33struct mrst_device {
34 u16 pci_dev_num; /* DEBUG only */
35 u16 lss;
36 u16 latest_request;
37 unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
38};
39
40/*
41 * comlete list of MRST PCI devices
42 */
43static struct mrst_device mrst_devs[] = {
44/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
45/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
46/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
47/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
48/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
49/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
50/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
51/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
52/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
53/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
54/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
55/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
56/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
57/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
58/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
59/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
60/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
61/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
62/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
63/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
64/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
65/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
66
67/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
68
69/* 23 */ { 0x4102, 0 }, /* Lincroft */
70/* 24 */ { 0x4110, 0 }, /* Lincroft */
71};
72
73/* n.b. We ignore PCI-id 0x815 in LSS9 b/c MeeGo has no driver for it */
74static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
75static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
76 0x0804, 0x0805, 0x080f, 0};
77
78/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
79static spinlock_t mrst_pmu_power_state_lock;
80
81static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
82static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
83
84static int graphics_is_off;
85static int lss_s0i3_enabled;
86static bool mrst_pmu_s0i3_enable;
87
88/* debug counters */
89static u32 pmu_wait_ready_calls;
90static u32 pmu_wait_ready_udelays;
91static u32 pmu_wait_ready_udelays_max;
92static u32 pmu_wait_done_calls;
93static u32 pmu_wait_done_udelays;
94static u32 pmu_wait_done_udelays_max;
95static u32 pmu_set_power_state_entry;
96static u32 pmu_set_power_state_send_cmd;
97
98static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
99{
100 int index = 0;
101
102 if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
103 index = pci_dev_num - 0x800;
104 else if (pci_dev_num == 0x084F)
105 index = 22;
106 else if (pci_dev_num == 0x4102)
107 index = 23;
108 else if (pci_dev_num == 0x4110)
109 index = 24;
110
111 if (pci_dev_num != mrst_devs[index].pci_dev_num) {
112 WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
113 return 0;
114 }
115
116 return &mrst_devs[index];
117}
118
119/**
120 * mrst_pmu_validate_cstates
121 * @dev: cpuidle_device
122 *
123 * Certain states are not appropriate for governor to pick in some cases.
124 * This function will be called as cpuidle_device's prepare callback and
125 * thus tells governor to ignore such states when selecting the next state
126 * to enter.
127 */
128
129#define IDLE_STATE4_IS_C6 4
130#define IDLE_STATE5_IS_S0I3 5
131
132int mrst_pmu_invalid_cstates(void)
133{
134 int cpu = smp_processor_id();
135
136 /*
137 * Demote to C4 if the PMU is busy.
138 * Since LSS changes leave the busy bit clear...
139 * busy means either the PMU is waiting for an ACK-C6 that
140 * isn't coming due to an MWAIT that returned immediately;
141 * or we returned from S0i3 successfully, and the PMU
142 * is not done sending us interrupts.
143 */
144 if (pmu_read_busy_status())
145 return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
146
147 /*
148 * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
149 * or if device LSS is insufficient, or the GPU is active,
150 * or if it has been explicitly disabled.
151 */
152 if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
153 !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
154 return 1 << IDLE_STATE5_IS_S0I3;
155 else
156 return 0;
157}
158
159/*
160 * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
161 * DEBUG only.
162 */
163static void pmu_update_wake_counters(void)
164{
165 int lss;
166 u32 wake_status;
167
168 wake_status = pmu_read_wks();
169
170 for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
171 if (wake_status & (1 << lss))
172 wake_counters[lss]++;
173 }
174}
175
176int mrst_pmu_s0i3_entry(void)
177{
178 int status;
179
180 /* Clear any possible error conditions */
181 pmu_write_ics(0x300);
182
183 /* set wake control to current D-states */
184 pmu_write_wssc(S0I3_SSS_TARGET);
185
186 status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
187 pmu_update_wake_counters();
188 return status;
189}
190
191/* poll for maximum of 5ms for busy bit to clear */
192static int pmu_wait_ready(void)
193{
194 int udelays;
195
196 pmu_wait_ready_calls++;
197
198 for (udelays = 0; udelays < 500; ++udelays) {
199 if (udelays > pmu_wait_ready_udelays_max)
200 pmu_wait_ready_udelays_max = udelays;
201
202 if (pmu_read_busy_status() == 0)
203 return 0;
204
205 udelay(10);
206 pmu_wait_ready_udelays++;
207 }
208
209 /*
210 * if this fires, observe
211 * /sys/kernel/debug/mrst_pmu_wait_ready_calls
212 * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
213 */
214 WARN_ONCE(1, "SCU not ready for 5ms");
215 return -EBUSY;
216}
217/* poll for maximum of 50ms us for busy bit to clear */
218static int pmu_wait_done(void)
219{
220 int udelays;
221
222 pmu_wait_done_calls++;
223
224 for (udelays = 0; udelays < 500; ++udelays) {
225 if (udelays > pmu_wait_done_udelays_max)
226 pmu_wait_done_udelays_max = udelays;
227
228 if (pmu_read_busy_status() == 0)
229 return 0;
230
231 udelay(100);
232 pmu_wait_done_udelays++;
233 }
234
235 /*
236 * if this fires, observe
237 * /sys/kernel/debug/mrst_pmu_wait_done_calls
238 * /sys/kernel/debug/mrst_pmu_wait_done_udelays
239 */
240 WARN_ONCE(1, "SCU not done for 50ms");
241 return -EBUSY;
242}
243
244u32 mrst_pmu_msi_is_disabled(void)
245{
246 return pmu_msi_is_disabled();
247}
248
249void mrst_pmu_enable_msi(void)
250{
251 pmu_msi_enable();
252}
253
254/**
255 * pmu_irq - pmu driver interrupt handler
256 * Context: interrupt context
257 */
258static irqreturn_t pmu_irq(int irq, void *dummy)
259{
260 union pmu_pm_ics pmu_ics;
261
262 pmu_ics.value = pmu_read_ics();
263
264 if (!pmu_ics.bits.pending)
265 return IRQ_NONE;
266
267 switch (pmu_ics.bits.cause) {
268 case INT_SPURIOUS:
269 case INT_CMD_DONE:
270 case INT_CMD_ERR:
271 case INT_WAKE_RX:
272 case INT_SS_ERROR:
273 case INT_S0IX_MISS:
274 case INT_NO_ACKC6:
275 pmu_irq_stats[pmu_ics.bits.cause]++;
276 break;
277 default:
278 pmu_irq_stats[INT_INVALID]++;
279 }
280
281 pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
282
283 return IRQ_HANDLED;
284}
285
286/*
287 * Translate PCI power management to MRST LSS D-states
288 */
289static int pci_2_mrst_state(int lss, pci_power_t pci_state)
290{
291 switch (pci_state) {
292 case PCI_D0:
293 if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
294 return D0i1;
295 else
296 return D0;
297 case PCI_D1:
298 return D0i1;
299 case PCI_D2:
300 return D0i2;
301 case PCI_D3hot:
302 case PCI_D3cold:
303 return D0i3;
304 default:
305 WARN(1, "pci_state %d\n", pci_state);
306 return 0;
307 }
308}
309
310static int pmu_issue_command(u32 pm_ssc)
311{
312 union pmu_pm_set_cfg_cmd_t command;
313
314 if (pmu_read_busy_status()) {
315 pr_debug("pmu is busy, Operation not permitted\n");
316 return -1;
317 }
318
319 /*
320 * enable interrupts in PMU so that interrupts are
321 * propagated when ioc bit for a particular set
322 * command is set
323 */
324
325 pmu_irq_enable();
326
327 /* Configure the sub systems for pmu2 */
328
329 pmu_write_ssc(pm_ssc);
330
331 /*
332 * Send the set config command for pmu its configured
333 * for mode CM_IMMEDIATE & hence with No Trigger
334 */
335
336 command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
337 command.pmu2_params.d_param.cfg_delay = 0;
338 command.pmu2_params.d_param.rsvd = 0;
339
340 /* construct the command to send SET_CFG to particular PMU */
341 command.pmu2_params.d_param.cmd = SET_CFG_CMD;
342 command.pmu2_params.d_param.ioc = 0;
343 command.pmu2_params.d_param.mode_id = 0;
344 command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
345
346 /* write the value of PM_CMD into particular PMU */
347 pr_debug("pmu command being written %x\n",
348 command.pmu_pm_set_cfg_cmd_value);
349
350 pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
351
352 return 0;
353}
354
355static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
356{
357 u16 existing_request;
358 int i;
359
360 for (i = 0; ids[i]; ++i) {
361 struct mrst_device *mrst_dev;
362
363 mrst_dev = pci_id_2_mrst_dev(ids[i]);
364 if (unlikely(!mrst_dev))
365 continue;
366
367 existing_request = mrst_dev->latest_request;
368 if (existing_request < pci_state)
369 pci_state = existing_request;
370 }
371 return pci_state;
372}
373
374/**
375 * pmu_pci_set_power_state - Callback function is used by all the PCI devices
376 * for a platform specific device power on/shutdown.
377 */
378
379int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
380{
381 u32 old_sss, new_sss;
382 int status = 0;
383 struct mrst_device *mrst_dev;
384
385 pmu_set_power_state_entry++;
386
387 BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
388 BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
389
390 mrst_dev = pci_id_2_mrst_dev(pdev->device);
391 if (unlikely(!mrst_dev))
392 return -ENODEV;
393
394 mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
395
396 /* PMU driver calls self as part of PCI initialization, ignore */
397 if (pdev->device == PCI_DEV_ID_MRST_PMU)
398 return 0;
399
400 BUG_ON(!pmu_reg); /* SW bug if called before initialized */
401
402 spin_lock(&mrst_pmu_power_state_lock);
403
404 if (pdev->d3_delay) {
405 dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
406 pdev->d3_delay);
407 pdev->d3_delay = 0;
408 }
409 /*
410 * If Lincroft graphics, simply remember state
411 */
412 if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
413 && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
414 if (pci_state == PCI_D0)
415 graphics_is_off = 0;
416 else
417 graphics_is_off = 1;
418 goto ret;
419 }
420
421 if (!mrst_dev->lss)
422 goto ret; /* device with no LSS */
423
424 if (mrst_dev->latest_request == pci_state)
425 goto ret; /* no change */
426
427 mrst_dev->latest_request = pci_state; /* record latest request */
428
429 /*
430 * LSS9 and LSS10 contain multiple PCI devices.
431 * Use the lowest numbered (highest power) state in the LSS
432 */
433 if (mrst_dev->lss == 9)
434 pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
435 else if (mrst_dev->lss == 10)
436 pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
437
438 status = pmu_wait_ready();
439 if (status)
440 goto ret;
441
442 old_sss = pmu_read_sss();
443 new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
444 new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
445 mrst_dev->lss);
446
447 if (new_sss == old_sss)
448 goto ret; /* nothing to do */
449
450 pmu_set_power_state_send_cmd++;
451
452 status = pmu_issue_command(new_sss);
453
454 if (unlikely(status != 0)) {
455 dev_err(&pdev->dev, "Failed to Issue a PM command\n");
456 goto ret;
457 }
458
459 if (pmu_wait_done())
460 goto ret;
461
462 lss_s0i3_enabled =
463 ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
464ret:
465 spin_unlock(&mrst_pmu_power_state_lock);
466 return status;
467}
468
469#ifdef CONFIG_DEBUG_FS
470static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
471
472static inline const char *d0ix_name(int state)
473{
474 return d0ix_names[(int) state];
475}
476
477static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
478{
479 struct pci_dev *pdev = NULL;
480 u32 cur_pmsss;
481 int lss;
482
483 seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
484
485 cur_pmsss = pmu_read_sss();
486
487 seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
488
489 seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
490 seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
491
492 if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
493 seq_printf(s, "cpu0 is only cpu online\n");
494 else
495 seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
496
497 seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
498
499
500 for_each_pci_dev(pdev) {
501 int pos;
502 u16 pmcsr;
503 struct mrst_device *mrst_dev;
504 int i;
505
506 mrst_dev = pci_id_2_mrst_dev(pdev->device);
507
508 seq_printf(s, "%s %04x/%04X %-16.16s ",
509 dev_name(&pdev->dev),
510 pdev->vendor, pdev->device,
511 dev_driver_string(&pdev->dev));
512
513 if (unlikely (!mrst_dev)) {
514 seq_printf(s, " UNKNOWN\n");
515 continue;
516 }
517
518 if (mrst_dev->lss)
519 seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
520 d0ix_name(((cur_pmsss >>
521 (mrst_dev->lss * 2)) & 0x3)));
522 else
523 seq_printf(s, " ");
524
525 /* PCI PM config space setting */
526 pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
527 if (pos != 0) {
528 pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
529 seq_printf(s, "PCI-%-4s",
530 pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
531 } else {
532 seq_printf(s, " ");
533 }
534
535 seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
536 for (i = 0; i <= PCI_D3cold; ++i)
537 seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
538
539 if (mrst_dev->lss) {
540 unsigned int lssmask;
541
542 lssmask = SSMSK(D0i3, mrst_dev->lss);
543
544 if ((lssmask & S0I3_SSS_TARGET) &&
545 ((lssmask & cur_pmsss) !=
546 (lssmask & S0I3_SSS_TARGET)))
547 seq_printf(s , "[BLOCKS s0i3]");
548 }
549
550 seq_printf(s, "\n");
551 }
552 seq_printf(s, "Wake Counters:\n");
553 for (lss = 0; lss < MRST_NUM_LSS; ++lss)
554 seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
555
556 seq_printf(s, "Interrupt Counters:\n");
557 seq_printf(s,
558 "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
559 "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
560 "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
561 "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
562 pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
563 pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
564 pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
565 pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
566
567 seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
568 pmu_wait_ready_calls);
569 seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
570 pmu_wait_ready_udelays);
571 seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
572 pmu_wait_ready_udelays_max);
573 seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
574 pmu_wait_done_calls);
575 seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
576 pmu_wait_done_udelays);
577 seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
578 pmu_wait_done_udelays_max);
579 seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
580 pmu_set_power_state_entry);
581 seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
582 pmu_set_power_state_send_cmd);
583 seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
584
585 return 0;
586}
587
588static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
589{
590 return single_open(file, debug_mrst_pmu_show, NULL);
591}
592
593static const struct file_operations devices_state_operations = {
594 .open = debug_mrst_pmu_open,
595 .read = seq_read,
596 .llseek = seq_lseek,
597 .release = single_release,
598};
599#endif /* DEBUG_FS */
600
601/*
602 * Validate SCU PCI shim PCI vendor capability byte
603 * against LSS hard-coded in mrst_devs[] above.
604 * DEBUG only.
605 */
606static void pmu_scu_firmware_debug(void)
607{
608 struct pci_dev *pdev = NULL;
609
610 for_each_pci_dev(pdev) {
611 struct mrst_device *mrst_dev;
612 u8 pci_config_lss;
613 int pos;
614
615 mrst_dev = pci_id_2_mrst_dev(pdev->device);
616 if (unlikely(!mrst_dev)) {
617 printk(KERN_ERR FW_BUG "pmu: Unknown "
618 "PCI device 0x%04X\n", pdev->device);
619 continue;
620 }
621
622 if (mrst_dev->lss == 0)
623 continue; /* no LSS in our table */
624
625 pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
626 if (!pos != 0) {
627 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
628 "missing PCI Vendor Capability\n",
629 pdev->device);
630 continue;
631 }
632 pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
633 if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
634 printk(KERN_ERR FW_BUG "pmu: 0x%04X "
635 "invalid PCI Vendor Capability 0x%x "
636 " expected LSS 0x%X\n",
637 pdev->device, pci_config_lss, mrst_dev->lss);
638 continue;
639 }
640 pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
641
642 if (mrst_dev->lss == pci_config_lss)
643 continue;
644
645 printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
646 pdev->device, pci_config_lss, mrst_dev->lss);
647 }
648}
649
650/**
651 * pmu_probe
652 */
653static int __devinit pmu_probe(struct pci_dev *pdev,
654 const struct pci_device_id *pci_id)
655{
656 int ret;
657 struct mrst_pmu_reg *pmu;
658
659 /* Init the device */
660 ret = pci_enable_device(pdev);
661 if (ret) {
662 dev_err(&pdev->dev, "Unable to Enable PCI device\n");
663 return ret;
664 }
665
666 ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
667 if (ret < 0) {
668 dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
669 goto out_err1;
670 }
671
672 /* Map the memory of PMU reg base */
673 pmu = pci_iomap(pdev, 0, 0);
674 if (!pmu) {
675 dev_err(&pdev->dev, "Unable to map the PMU address space\n");
676 ret = -ENOMEM;
677 goto out_err2;
678 }
679
680#ifdef CONFIG_DEBUG_FS
681 /* /sys/kernel/debug/mrst_pmu */
682 (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
683 NULL, NULL, &devices_state_operations);
684#endif
685 pmu_reg = pmu; /* success */
686
687 if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
688 dev_err(&pdev->dev, "Registering isr has failed\n");
689 ret = -1;
690 goto out_err3;
691 }
692
693 pmu_scu_firmware_debug();
694
695 pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
696
697 pmu_wait_ready();
698
699 pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
700 pmu_write_cmd(0x201);
701
702 spin_lock_init(&mrst_pmu_power_state_lock);
703
704 /* Enable the hardware interrupt */
705 pmu_irq_enable();
706 return 0;
707
708out_err3:
709 free_irq(pdev->irq, NULL);
710 pci_iounmap(pdev, pmu_reg);
711 pmu_reg = NULL;
712out_err2:
713 pci_release_region(pdev, 0);
714out_err1:
715 pci_disable_device(pdev);
716 return ret;
717}
718
719static void __devexit pmu_remove(struct pci_dev *pdev)
720{
721 dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
722
723 /* Freeing up the irq */
724 free_irq(pdev->irq, NULL);
725
726 pci_iounmap(pdev, pmu_reg);
727 pmu_reg = NULL;
728
729 /* disable the current PCI device */
730 pci_release_region(pdev, 0);
731 pci_disable_device(pdev);
732}
733
734static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
735 { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
736 { }
737};
738
739MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
740
741static struct pci_driver driver = {
742 .name = MRST_PMU_DRV_NAME,
743 .id_table = pmu_pci_ids,
744 .probe = pmu_probe,
745 .remove = __devexit_p(pmu_remove),
746};
747
748/**
749 * pmu_pci_register - register the PMU driver as PCI device
750 */
751static int __init pmu_pci_register(void)
752{
753 return pci_register_driver(&driver);
754}
755
756/* Register and probe via fs_initcall() to preceed device_initcall() */
757fs_initcall(pmu_pci_register);
758
759static void __exit mid_pci_cleanup(void)
760{
761 pci_unregister_driver(&driver);
762}
763
764static int ia_major;
765static int ia_minor;
766
767static int pmu_sfi_parse_oem(struct sfi_table_header *table)
768{
769 struct sfi_table_simple *sb;
770
771 sb = (struct sfi_table_simple *)table;
772 ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
773 ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
774 printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
775 ia_major, ia_minor);
776
777 return 0;
778}
779
780static int __init scu_fw_check(void)
781{
782 int ret;
783 u32 fw_version;
784
785 if (!pmu_reg)
786 return 0; /* this driver didn't probe-out */
787
788 sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
789
790 if (ia_major < 0x6005 || ia_minor < 0x1525) {
791 WARN(1, "mrst_pmu: IA FW version too old\n");
792 return -1;
793 }
794
795 ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
796 &fw_version, 1);
797
798 if (ret) {
799 WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
800 } else {
801 int scu_major = (fw_version >> 8) & 0xFF;
802 int scu_minor = (fw_version >> 0) & 0xFF;
803
804 printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
805
806 if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
807 printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
808 mrst_pmu_s0i3_enable = true;
809 } else {
810 WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
811 scu_major, scu_minor);
812 }
813 }
814 return 0;
815}
816late_initcall(scu_fw_check);
817module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
new file mode 100644
index 00000000000..bfbfe64b167
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.h
@@ -0,0 +1,234 @@
1/*
2 * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
3 *
4 * Copyright (c) 2011, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#ifndef _MRST_PMU_H_
21#define _MRST_PMU_H_
22
23#define PCI_DEV_ID_MRST_PMU 0x0810
24#define MRST_PMU_DRV_NAME "mrst_pmu"
25#define PCI_SUB_CLASS_MASK 0xFF00
26
27#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
28#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
29
30#define SUB_SYS_ALL_D0I1 0x01155555
31#define S0I3_WAKE_SOURCES 0x00001FFF
32
33#define PM_S0I3_COMMAND \
34 ((0 << 31) | /* Reserved */ \
35 (0 << 30) | /* Core must be idle */ \
36 (0xc2 << 22) | /* ACK C6 trigger */ \
37 (3 << 19) | /* Trigger on DMI message */ \
38 (3 << 16) | /* Enter S0i3 */ \
39 (0 << 13) | /* Numeric mode ID (sw) */ \
40 (3 << 9) | /* Trigger mode */ \
41 (0 << 8) | /* Do not interrupt */ \
42 (1 << 0)) /* Set configuration */
43
44#define LSS_DMI 0
45#define LSS_SD_HC0 1
46#define LSS_SD_HC1 2
47#define LSS_NAND 3
48#define LSS_IMAGING 4
49#define LSS_SECURITY 5
50#define LSS_DISPLAY 6
51#define LSS_USB_HC 7
52#define LSS_USB_OTG 8
53#define LSS_AUDIO 9
54#define LSS_AUDIO_LPE 9
55#define LSS_AUDIO_SSP 9
56#define LSS_I2C0 10
57#define LSS_I2C1 10
58#define LSS_I2C2 10
59#define LSS_KBD 10
60#define LSS_SPI0 10
61#define LSS_SPI1 10
62#define LSS_SPI2 10
63#define LSS_GPIO 10
64#define LSS_SRAM 11 /* used by SCU, do not touch */
65#define LSS_SD_HC2 12
66/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
67#define MRST_NUM_LSS 13
68
69#define MIN(a, b) (((a) < (b)) ? (a) : (b))
70
71#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
72#define D0 0
73#define D0i1 1
74#define D0i2 2
75#define D0i3 3
76
77#define S0I3_SSS_TARGET ( \
78 SSMSK(D0i1, LSS_DMI) | \
79 SSMSK(D0i3, LSS_SD_HC0) | \
80 SSMSK(D0i3, LSS_SD_HC1) | \
81 SSMSK(D0i3, LSS_NAND) | \
82 SSMSK(D0i3, LSS_SD_HC2) | \
83 SSMSK(D0i3, LSS_IMAGING) | \
84 SSMSK(D0i3, LSS_SECURITY) | \
85 SSMSK(D0i3, LSS_DISPLAY) | \
86 SSMSK(D0i3, LSS_USB_HC) | \
87 SSMSK(D0i3, LSS_USB_OTG) | \
88 SSMSK(D0i3, LSS_AUDIO) | \
89 SSMSK(D0i1, LSS_I2C0))
90
91/*
92 * D0i1 on Langwell is Autonomous Clock Gating (ACG).
93 * Enable ACG on every LSS except camera and audio
94 */
95#define D0I1_ACG_SSS_TARGET \
96 (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
97
98enum cm_mode {
99 CM_NOP, /* ignore the config mode value */
100 CM_IMMEDIATE,
101 CM_DELAY,
102 CM_TRIGGER,
103 CM_INVALID
104};
105
106enum sys_state {
107 SYS_STATE_S0I0,
108 SYS_STATE_S0I1,
109 SYS_STATE_S0I2,
110 SYS_STATE_S0I3,
111 SYS_STATE_S3,
112 SYS_STATE_S5
113};
114
115#define SET_CFG_CMD 1
116
117enum int_status {
118 INT_SPURIOUS = 0,
119 INT_CMD_DONE = 1,
120 INT_CMD_ERR = 2,
121 INT_WAKE_RX = 3,
122 INT_SS_ERROR = 4,
123 INT_S0IX_MISS = 5,
124 INT_NO_ACKC6 = 6,
125 INT_INVALID = 7,
126};
127
128/* PMU register interface */
129static struct mrst_pmu_reg {
130 u32 pm_sts; /* 0x00 */
131 u32 pm_cmd; /* 0x04 */
132 u32 pm_ics; /* 0x08 */
133 u32 _resv1; /* 0x0C */
134 u32 pm_wkc[2]; /* 0x10 */
135 u32 pm_wks[2]; /* 0x18 */
136 u32 pm_ssc[4]; /* 0x20 */
137 u32 pm_sss[4]; /* 0x30 */
138 u32 pm_wssc[4]; /* 0x40 */
139 u32 pm_c3c4; /* 0x50 */
140 u32 pm_c5c6; /* 0x54 */
141 u32 pm_msi_disable; /* 0x58 */
142} *pmu_reg;
143
144static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
145static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
146static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
147static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
148
149static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
150static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
151static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
152static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
153static inline void pmu_write_wssc(u32 arg)
154 { writel(arg, &pmu_reg->pm_wssc[0]); }
155
156static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
157static inline u32 pmu_msi_is_disabled(void)
158 { return readl(&pmu_reg->pm_msi_disable); }
159
160union pmu_pm_ics {
161 struct {
162 u32 cause:8;
163 u32 enable:1;
164 u32 pending:1;
165 u32 reserved:22;
166 } bits;
167 u32 value;
168};
169
170static inline void pmu_irq_enable(void)
171{
172 union pmu_pm_ics pmu_ics;
173
174 pmu_ics.value = pmu_read_ics();
175 pmu_ics.bits.enable = 1;
176 pmu_write_ics(pmu_ics.value);
177}
178
179union pmu_pm_status {
180 struct {
181 u32 pmu_rev:8;
182 u32 pmu_busy:1;
183 u32 mode_id:4;
184 u32 Reserved:19;
185 } pmu_status_parts;
186 u32 pmu_status_value;
187};
188
189static inline int pmu_read_busy_status(void)
190{
191 union pmu_pm_status result;
192
193 result.pmu_status_value = pmu_read_sts();
194
195 return result.pmu_status_parts.pmu_busy;
196}
197
198/* pmu set config parameters */
199struct cfg_delay_param_t {
200 u32 cmd:8;
201 u32 ioc:1;
202 u32 cfg_mode:4;
203 u32 mode_id:3;
204 u32 sys_state:3;
205 u32 cfg_delay:8;
206 u32 rsvd:5;
207};
208
209struct cfg_trig_param_t {
210 u32 cmd:8;
211 u32 ioc:1;
212 u32 cfg_mode:4;
213 u32 mode_id:3;
214 u32 sys_state:3;
215 u32 cfg_trig_type:3;
216 u32 cfg_trig_val:8;
217 u32 cmbi:1;
218 u32 rsvd1:1;
219};
220
221union pmu_pm_set_cfg_cmd_t {
222 union {
223 struct cfg_delay_param_t d_param;
224 struct cfg_trig_param_t t_param;
225 } pmu2_params;
226 u32 pmu_pm_set_cfg_cmd_value;
227};
228
229#ifdef FUTURE_PATCH
230extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
231#else
232static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
233#endif
234#endif
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 45e94aca5bc..3326204e251 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
15 grant-table.o suspend.o platform-pci-unplug.o \ 15 grant-table.o suspend.o platform-pci-unplug.o \
16 p2m.o 16 p2m.o
17 17
18obj-$(CONFIG_FUNCTION_TRACER) += trace.o 18obj-$(CONFIG_FTRACE) += trace.o
19 19
20obj-$(CONFIG_SMP) += smp.o 20obj-$(CONFIG_SMP) += smp.o
21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o 21obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 60aeeb56948..df118a825f3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/memblock.h> 11#include <linux/memblock.h>
12#include <linux/cpuidle.h>
12 13
13#include <asm/elf.h> 14#include <asm/elf.h>
14#include <asm/vdso.h> 15#include <asm/vdso.h>
@@ -92,8 +93,6 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
92 if (end <= start) 93 if (end <= start)
93 return 0; 94 return 0;
94 95
95 printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
96 start, end);
97 for(pfn = start; pfn < end; pfn++) { 96 for(pfn = start; pfn < end; pfn++) {
98 unsigned long mfn = pfn_to_mfn(pfn); 97 unsigned long mfn = pfn_to_mfn(pfn);
99 98
@@ -106,14 +105,14 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
106 105
107 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 106 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
108 &reservation); 107 &reservation);
109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 108 WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
110 start, end, ret);
111 if (ret == 1) { 109 if (ret == 1) {
112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 110 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
113 len++; 111 len++;
114 } 112 }
115 } 113 }
116 printk(KERN_CONT "%ld pages freed\n", len); 114 printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
115 start, end, len);
117 116
118 return len; 117 return len;
119} 118}
@@ -139,7 +138,7 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
139 if (last_end < max_addr) 138 if (last_end < max_addr)
140 released += xen_release_chunk(last_end, max_addr); 139 released += xen_release_chunk(last_end, max_addr);
141 140
142 printk(KERN_INFO "released %ld pages of unused memory\n", released); 141 printk(KERN_INFO "released %lu pages of unused memory\n", released);
143 return released; 142 return released;
144} 143}
145 144
@@ -426,7 +425,7 @@ void __init xen_arch_setup(void)
426#ifdef CONFIG_X86_32 425#ifdef CONFIG_X86_32
427 boot_cpu_data.hlt_works_ok = 1; 426 boot_cpu_data.hlt_works_ok = 1;
428#endif 427#endif
429 pm_idle = default_idle; 428 disable_cpuidle();
430 boot_option_idle_override = IDLE_HALT; 429 boot_option_idle_override = IDLE_HALT;
431 430
432 fiddle_vdso(); 431 fiddle_vdso();
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c
index 734beba2a08..520022d1a18 100644
--- a/arch/x86/xen/trace.c
+++ b/arch/x86/xen/trace.c
@@ -1,4 +1,5 @@
1#include <linux/ftrace.h> 1#include <linux/ftrace.h>
2#include <xen/interface/xen.h>
2 3
3#define N(x) [__HYPERVISOR_##x] = "("#x")" 4#define N(x) [__HYPERVISOR_##x] = "("#x")"
4static const char *xen_hypercall_names[] = { 5static const char *xen_hypercall_names[] = {
diff --git a/block/blk-core.c b/block/blk-core.c
index b850bedad22..b627558c461 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1368 1368
1369static int __init fail_make_request_debugfs(void) 1369static int __init fail_make_request_debugfs(void)
1370{ 1370{
1371 return init_fault_attr_dentries(&fail_make_request, 1371 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1372 "fail_make_request"); 1372 NULL, &fail_make_request);
1373
1374 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
1373} 1375}
1374 1376
1375late_initcall(fail_make_request_debugfs); 1377late_initcall(fail_make_request_debugfs);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4f0c06c7a33..78035488895 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
28 28
29static int __init fail_io_timeout_debugfs(void) 29static int __init fail_io_timeout_debugfs(void)
30{ 30{
31 return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); 31 struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
32 NULL, &fail_io_timeout);
33
34 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
32} 35}
33 36
34late_initcall(fail_io_timeout_debugfs); 37late_initcall(fail_io_timeout_debugfs);
diff --git a/crypto/md5.c b/crypto/md5.c
index 30efc7dad89..7febeaab923 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -21,99 +21,9 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/cryptohash.h>
24#include <asm/byteorder.h> 25#include <asm/byteorder.h>
25 26
26#define F1(x, y, z) (z ^ (x & (y ^ z)))
27#define F2(x, y, z) F1(z, x, y)
28#define F3(x, y, z) (x ^ y ^ z)
29#define F4(x, y, z) (y ^ (x | ~z))
30
31#define MD5STEP(f, w, x, y, z, in, s) \
32 (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
33
34static void md5_transform(u32 *hash, u32 const *in)
35{
36 u32 a, b, c, d;
37
38 a = hash[0];
39 b = hash[1];
40 c = hash[2];
41 d = hash[3];
42
43 MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
44 MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
45 MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
46 MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
47 MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
48 MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
49 MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
50 MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
51 MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
52 MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
53 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
54 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
55 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
56 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
57 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
58 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
59
60 MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
61 MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
62 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
63 MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
64 MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
65 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
66 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
67 MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
68 MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
69 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
70 MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
71 MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
72 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
73 MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
74 MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
75 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
76
77 MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
78 MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
79 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
80 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
81 MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
82 MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
83 MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
84 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
85 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
86 MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
87 MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
88 MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
89 MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
90 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
91 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
92 MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
93
94 MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
95 MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
96 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
97 MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
98 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
99 MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
100 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
101 MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
102 MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
103 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
104 MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
105 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
106 MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
107 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
108 MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
109 MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
110
111 hash[0] += a;
112 hash[1] += b;
113 hash[2] += c;
114 hash[3] += d;
115}
116
117/* XXX: this stuff can be optimized */ 27/* XXX: this stuff can be optimized */
118static inline void le32_to_cpu_array(u32 *buf, unsigned int words) 28static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
119{ 29{
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 73863d86f02..76dc02f1557 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE);
126 */ 126 */
127u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE); 127u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE);
128 128
129/*
130 * Disable runtime checking and repair of values returned by control methods.
131 * Use only if the repair is causing a problem on a particular machine.
132 */
133u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE);
134
129/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */ 135/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
130 136
131struct acpi_table_fadt acpi_gbl_FADT; 137struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index c7f743ca395..5552125d834 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -357,6 +357,7 @@ struct acpi_predefined_data {
357 char *pathname; 357 char *pathname;
358 const union acpi_predefined_info *predefined; 358 const union acpi_predefined_info *predefined;
359 union acpi_operand_object *parent_package; 359 union acpi_operand_object *parent_package;
360 struct acpi_namespace_node *node;
360 u32 flags; 361 u32 flags;
361 u8 node_flags; 362 u8 node_flags;
362}; 363};
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 94e73c97cf8..c445cca490e 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] =
468 {{"_SWS", 0, ACPI_RTYPE_INTEGER}}, 468 {{"_SWS", 0, ACPI_RTYPE_INTEGER}},
469 {{"_TC1", 0, ACPI_RTYPE_INTEGER}}, 469 {{"_TC1", 0, ACPI_RTYPE_INTEGER}},
470 {{"_TC2", 0, ACPI_RTYPE_INTEGER}}, 470 {{"_TC2", 0, ACPI_RTYPE_INTEGER}},
471 {{"_TDL", 0, ACPI_RTYPE_INTEGER}},
471 {{"_TIP", 1, ACPI_RTYPE_INTEGER}}, 472 {{"_TIP", 1, ACPI_RTYPE_INTEGER}},
472 {{"_TIV", 1, ACPI_RTYPE_INTEGER}}, 473 {{"_TIV", 1, ACPI_RTYPE_INTEGER}},
473 {{"_TMP", 0, ACPI_RTYPE_INTEGER}}, 474 {{"_TMP", 0, ACPI_RTYPE_INTEGER}},
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 9fb03fa8ffd..c845c8089f3 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
193 } 193 }
194 194
195 /* 195 /*
196 * 1) We have a return value, but if one wasn't expected, just exit, this is 196 * Return value validation and possible repair.
197 * not a problem. For example, if the "Implicit Return" feature is
198 * enabled, methods will always return a value.
199 * 197 *
200 * 2) If the return value can be of any type, then we cannot perform any 198 * 1) Don't perform return value validation/repair if this feature
201 * validation, exit. 199 * has been disabled via a global option.
200 *
201 * 2) We have a return value, but if one wasn't expected, just exit,
202 * this is not a problem. For example, if the "Implicit Return"
203 * feature is enabled, methods will always return a value.
204 *
205 * 3) If the return value can be of any type, then we cannot perform
206 * any validation, just exit.
202 */ 207 */
203 if ((!predefined->info.expected_btypes) || 208 if (acpi_gbl_disable_auto_repair ||
209 (!predefined->info.expected_btypes) ||
204 (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) { 210 (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
205 goto cleanup; 211 goto cleanup;
206 } 212 }
@@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
212 goto cleanup; 218 goto cleanup;
213 } 219 }
214 data->predefined = predefined; 220 data->predefined = predefined;
221 data->node = node;
215 data->node_flags = node->flags; 222 data->node_flags = node->flags;
216 data->pathname = pathname; 223 data->pathname = pathname;
217 224
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 973883babee..024c4f263f8 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data,
503{ 503{
504 union acpi_operand_object *return_object = *return_object_ptr; 504 union acpi_operand_object *return_object = *return_object_ptr;
505 acpi_status status; 505 acpi_status status;
506 struct acpi_namespace_node *node;
507
508 /*
509 * We can only sort the _TSS return package if there is no _PSS in the
510 * same scope. This is because if _PSS is present, the ACPI specification
511 * dictates that the _TSS Power Dissipation field is to be ignored, and
512 * therefore some BIOSs leave garbage values in the _TSS Power field(s).
513 * In this case, it is best to just return the _TSS package as-is.
514 * (May, 2011)
515 */
516 status =
517 acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node);
518 if (ACPI_SUCCESS(status)) {
519 return (AE_OK);
520 }
506 521
507 status = acpi_ns_check_sorted_list(data, return_object, 5, 1, 522 status = acpi_ns_check_sorted_list(data, return_object, 5, 1,
508 ACPI_SORT_DESCENDING, 523 ACPI_SORT_DESCENDING,
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 48db0944ce4..62365f6075d 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
126 } 126 }
127 127
128 /* 128 /*
129 * Originally, we checked the table signature for "SSDT" or "PSDT" here. 129 * Validate the incoming table signature.
130 * Next, we added support for OEMx tables, signature "OEM". 130 *
131 * Valid tables were encountered with a null signature, so we've just 131 * 1) Originally, we checked the table signature for "SSDT" or "PSDT".
132 * given up on validating the signature, since it seems to be a waste 132 * 2) We added support for OEMx tables, signature "OEM".
133 * of code. The original code was removed (05/2008). 133 * 3) Valid tables were encountered with a null signature, so we just
134 * gave up on validating the signature, (05/2008).
135 * 4) We encountered non-AML tables such as the MADT, which caused
136 * interpreter errors and kernel faults. So now, we once again allow
137 * only "SSDT", "OEMx", and now, also a null signature. (05/2011).
134 */ 138 */
139 if ((table_desc->pointer->signature[0] != 0x00) &&
140 (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT))
141 && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) {
142 ACPI_ERROR((AE_INFO,
143 "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx",
144 acpi_ut_valid_acpi_name(*(u32 *)table_desc->
145 pointer->
146 signature) ? table_desc->
147 pointer->signature : "????",
148 *(u32 *)table_desc->pointer->signature));
149
150 return_ACPI_STATUS(AE_BAD_SIGNATURE);
151 }
135 152
136 (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES); 153 (void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
137 154
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index f739a70b1c7..c34aa51af4e 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -10,9 +10,11 @@ config ACPI_APEI
10 error injection. 10 error injection.
11 11
12config ACPI_APEI_GHES 12config ACPI_APEI_GHES
13 tristate "APEI Generic Hardware Error Source" 13 bool "APEI Generic Hardware Error Source"
14 depends on ACPI_APEI && X86 14 depends on ACPI_APEI && X86
15 select ACPI_HED 15 select ACPI_HED
16 select LLIST
17 select GENERIC_ALLOCATOR
16 help 18 help
17 Generic Hardware Error Source provides a way to report 19 Generic Hardware Error Source provides a way to report
18 platform hardware errors (such as that from chipset). It 20 platform hardware errors (such as that from chipset). It
@@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER
30 PCIe AER errors may be reported via APEI firmware first mode. 32 PCIe AER errors may be reported via APEI firmware first mode.
31 Turn on this option to enable the corresponding support. 33 Turn on this option to enable the corresponding support.
32 34
35config ACPI_APEI_MEMORY_FAILURE
36 bool "APEI memory error recovering support"
37 depends on ACPI_APEI && MEMORY_FAILURE
38 help
39 Memory errors may be reported via APEI firmware first mode.
40 Turn on this option to enable the memory recovering support.
41
33config ACPI_APEI_EINJ 42config ACPI_APEI_EINJ
34 tristate "APEI Error INJection (EINJ)" 43 tristate "APEI Error INJection (EINJ)"
35 depends on ACPI_APEI && DEBUG_FS 44 depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 4a904a4bf05..8041248fce9 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
157 * Interpret the specified action. Go through whole action table, 157 * Interpret the specified action. Go through whole action table,
158 * execute all instructions belong to the action. 158 * execute all instructions belong to the action.
159 */ 159 */
160int apei_exec_run(struct apei_exec_context *ctx, u8 action) 160int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
161 bool optional)
161{ 162{
162 int rc; 163 int rc = -ENOENT;
163 u32 i, ip; 164 u32 i, ip;
164 struct acpi_whea_header *entry; 165 struct acpi_whea_header *entry;
165 apei_exec_ins_func_t run; 166 apei_exec_ins_func_t run;
@@ -198,9 +199,9 @@ rewind:
198 goto rewind; 199 goto rewind;
199 } 200 }
200 201
201 return 0; 202 return !optional && rc < 0 ? rc : 0;
202} 203}
203EXPORT_SYMBOL_GPL(apei_exec_run); 204EXPORT_SYMBOL_GPL(__apei_exec_run);
204 205
205typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx, 206typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
206 struct acpi_whea_header *entry, 207 struct acpi_whea_header *entry,
@@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void)
603 return dapei; 604 return dapei;
604} 605}
605EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); 606EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
607
608int apei_osc_setup(void)
609{
610 static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
611 acpi_handle handle;
612 u32 capbuf[3];
613 struct acpi_osc_context context = {
614 .uuid_str = whea_uuid_str,
615 .rev = 1,
616 .cap.length = sizeof(capbuf),
617 .cap.pointer = capbuf,
618 };
619
620 capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
621 capbuf[OSC_SUPPORT_TYPE] = 0;
622 capbuf[OSC_CONTROL_TYPE] = 0;
623
624 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
625 || ACPI_FAILURE(acpi_run_osc(handle, &context)))
626 return -EIO;
627 else {
628 kfree(context.ret.pointer);
629 return 0;
630 }
631}
632EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index ef0581f2094..f57050e7a5e 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
50 return ctx->value; 50 return ctx->value;
51} 51}
52 52
53int apei_exec_run(struct apei_exec_context *ctx, u8 action); 53int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
54
55static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
56{
57 return __apei_exec_run(ctx, action, 0);
58}
59
60/* It is optional whether the firmware provides the action */
61static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
62{
63 return __apei_exec_run(ctx, action, 1);
64}
54 65
55/* Common instruction implementation */ 66/* Common instruction implementation */
56 67
@@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx,
113 const struct acpi_hest_generic_status *estatus); 124 const struct acpi_hest_generic_status *estatus);
114int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); 125int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
115int apei_estatus_check(const struct acpi_hest_generic_status *estatus); 126int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
127
128int apei_osc_setup(void);
116#endif 129#endif
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index f74b2ea11f2..589b96c3870 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -46,7 +46,8 @@
46 * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the 46 * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
47 * EINJ table through an unpublished extension. Use with caution as 47 * EINJ table through an unpublished extension. Use with caution as
48 * most will ignore the parameter and make their own choice of address 48 * most will ignore the parameter and make their own choice of address
49 * for error injection. 49 * for error injection. This extension is used only if
50 * param_extension module parameter is specified.
50 */ 51 */
51struct einj_parameter { 52struct einj_parameter {
52 u64 type; 53 u64 type;
@@ -65,6 +66,9 @@ struct einj_parameter {
65 ((struct acpi_whea_header *)((char *)(tab) + \ 66 ((struct acpi_whea_header *)((char *)(tab) + \
66 sizeof(struct acpi_table_einj))) 67 sizeof(struct acpi_table_einj)))
67 68
69static bool param_extension;
70module_param(param_extension, bool, 0);
71
68static struct acpi_table_einj *einj_tab; 72static struct acpi_table_einj *einj_tab;
69 73
70static struct apei_resources einj_resources; 74static struct apei_resources einj_resources;
@@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
285 289
286 einj_exec_ctx_init(&ctx); 290 einj_exec_ctx_init(&ctx);
287 291
288 rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION); 292 rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
289 if (rc) 293 if (rc)
290 return rc; 294 return rc;
291 apei_exec_ctx_set_input(&ctx, type); 295 apei_exec_ctx_set_input(&ctx, type);
@@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
323 rc = __einj_error_trigger(trigger_paddr); 327 rc = __einj_error_trigger(trigger_paddr);
324 if (rc) 328 if (rc)
325 return rc; 329 return rc;
326 rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION); 330 rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
327 331
328 return rc; 332 return rc;
329} 333}
@@ -489,14 +493,6 @@ static int __init einj_init(void)
489 einj_debug_dir, NULL, &error_type_fops); 493 einj_debug_dir, NULL, &error_type_fops);
490 if (!fentry) 494 if (!fentry)
491 goto err_cleanup; 495 goto err_cleanup;
492 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
493 einj_debug_dir, &error_param1);
494 if (!fentry)
495 goto err_cleanup;
496 fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
497 einj_debug_dir, &error_param2);
498 if (!fentry)
499 goto err_cleanup;
500 fentry = debugfs_create_file("error_inject", S_IWUSR, 496 fentry = debugfs_create_file("error_inject", S_IWUSR,
501 einj_debug_dir, NULL, &error_inject_fops); 497 einj_debug_dir, NULL, &error_inject_fops);
502 if (!fentry) 498 if (!fentry)
@@ -513,12 +509,23 @@ static int __init einj_init(void)
513 rc = apei_exec_pre_map_gars(&ctx); 509 rc = apei_exec_pre_map_gars(&ctx);
514 if (rc) 510 if (rc)
515 goto err_release; 511 goto err_release;
516 param_paddr = einj_get_parameter_address(); 512 if (param_extension) {
517 if (param_paddr) { 513 param_paddr = einj_get_parameter_address();
518 einj_param = ioremap(param_paddr, sizeof(*einj_param)); 514 if (param_paddr) {
519 rc = -ENOMEM; 515 einj_param = ioremap(param_paddr, sizeof(*einj_param));
520 if (!einj_param) 516 rc = -ENOMEM;
521 goto err_unmap; 517 if (!einj_param)
518 goto err_unmap;
519 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
520 einj_debug_dir, &error_param1);
521 if (!fentry)
522 goto err_unmap;
523 fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
524 einj_debug_dir, &error_param2);
525 if (!fentry)
526 goto err_unmap;
527 } else
528 pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
522 } 529 }
523 530
524 pr_info(EINJ_PFX "Error INJection is initialized.\n"); 531 pr_info(EINJ_PFX "Error INJection is initialized.\n");
@@ -526,6 +533,8 @@ static int __init einj_init(void)
526 return 0; 533 return 0;
527 534
528err_unmap: 535err_unmap:
536 if (einj_param)
537 iounmap(einj_param);
529 apei_exec_post_unmap_gars(&ctx); 538 apei_exec_post_unmap_gars(&ctx);
530err_release: 539err_release:
531 apei_resources_release(&einj_resources); 540 apei_resources_release(&einj_resources);
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index a4cfb64c86a..903549df809 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -33,7 +33,7 @@
33 33
34#define ERST_DBG_PFX "ERST DBG: " 34#define ERST_DBG_PFX "ERST DBG: "
35 35
36#define ERST_DBG_RECORD_LEN_MAX 4096 36#define ERST_DBG_RECORD_LEN_MAX 0x4000
37 37
38static void *erst_dbg_buf; 38static void *erst_dbg_buf;
39static unsigned int erst_dbg_buf_len; 39static unsigned int erst_dbg_buf_len;
@@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = {
213 213
214static __init int erst_dbg_init(void) 214static __init int erst_dbg_init(void)
215{ 215{
216 if (erst_disable) {
217 pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
218 return -ENODEV;
219 }
216 return misc_register(&erst_dbg_dev); 220 return misc_register(&erst_dbg_dev);
217} 221}
218 222
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index e6cef8e1b53..2ca59dc69f7 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
642 int rc; 642 int rc;
643 643
644 erst_exec_ctx_init(&ctx); 644 erst_exec_ctx_init(&ctx);
645 rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE); 645 rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
646 if (rc) 646 if (rc)
647 return rc; 647 return rc;
648 apei_exec_ctx_set_input(&ctx, offset); 648 apei_exec_ctx_set_input(&ctx, offset);
@@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
666 if (rc) 666 if (rc)
667 return rc; 667 return rc;
668 val = apei_exec_ctx_get_output(&ctx); 668 val = apei_exec_ctx_get_output(&ctx);
669 rc = apei_exec_run(&ctx, ACPI_ERST_END); 669 rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
670 if (rc) 670 if (rc)
671 return rc; 671 return rc;
672 672
@@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
681 int rc; 681 int rc;
682 682
683 erst_exec_ctx_init(&ctx); 683 erst_exec_ctx_init(&ctx);
684 rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ); 684 rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
685 if (rc) 685 if (rc)
686 return rc; 686 return rc;
687 apei_exec_ctx_set_input(&ctx, offset); 687 apei_exec_ctx_set_input(&ctx, offset);
@@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
709 if (rc) 709 if (rc)
710 return rc; 710 return rc;
711 val = apei_exec_ctx_get_output(&ctx); 711 val = apei_exec_ctx_get_output(&ctx);
712 rc = apei_exec_run(&ctx, ACPI_ERST_END); 712 rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
713 if (rc) 713 if (rc)
714 return rc; 714 return rc;
715 715
@@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
724 int rc; 724 int rc;
725 725
726 erst_exec_ctx_init(&ctx); 726 erst_exec_ctx_init(&ctx);
727 rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR); 727 rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
728 if (rc) 728 if (rc)
729 return rc; 729 return rc;
730 apei_exec_ctx_set_input(&ctx, record_id); 730 apei_exec_ctx_set_input(&ctx, record_id);
@@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
748 if (rc) 748 if (rc)
749 return rc; 749 return rc;
750 val = apei_exec_ctx_get_output(&ctx); 750 val = apei_exec_ctx_get_output(&ctx);
751 rc = apei_exec_run(&ctx, ACPI_ERST_END); 751 rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
752 if (rc) 752 if (rc)
753 return rc; 753 return rc;
754 754
@@ -932,8 +932,11 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
932static int erst_open_pstore(struct pstore_info *psi); 932static int erst_open_pstore(struct pstore_info *psi);
933static int erst_close_pstore(struct pstore_info *psi); 933static int erst_close_pstore(struct pstore_info *psi);
934static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, 934static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
935 struct timespec *time); 935 struct timespec *time, struct pstore_info *psi);
936static u64 erst_writer(enum pstore_type_id type, size_t size); 936static u64 erst_writer(enum pstore_type_id type, unsigned int part,
937 size_t size, struct pstore_info *psi);
938static int erst_clearer(enum pstore_type_id type, u64 id,
939 struct pstore_info *psi);
937 940
938static struct pstore_info erst_info = { 941static struct pstore_info erst_info = {
939 .owner = THIS_MODULE, 942 .owner = THIS_MODULE,
@@ -942,7 +945,7 @@ static struct pstore_info erst_info = {
942 .close = erst_close_pstore, 945 .close = erst_close_pstore,
943 .read = erst_reader, 946 .read = erst_reader,
944 .write = erst_writer, 947 .write = erst_writer,
945 .erase = erst_clear 948 .erase = erst_clearer
946}; 949};
947 950
948#define CPER_CREATOR_PSTORE \ 951#define CPER_CREATOR_PSTORE \
@@ -983,7 +986,7 @@ static int erst_close_pstore(struct pstore_info *psi)
983} 986}
984 987
985static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, 988static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
986 struct timespec *time) 989 struct timespec *time, struct pstore_info *psi)
987{ 990{
988 int rc; 991 int rc;
989 ssize_t len = 0; 992 ssize_t len = 0;
@@ -1037,7 +1040,8 @@ out:
1037 return (rc < 0) ? rc : (len - sizeof(*rcd)); 1040 return (rc < 0) ? rc : (len - sizeof(*rcd));
1038} 1041}
1039 1042
1040static u64 erst_writer(enum pstore_type_id type, size_t size) 1043static u64 erst_writer(enum pstore_type_id type, unsigned int part,
1044 size_t size, struct pstore_info *psi)
1041{ 1045{
1042 struct cper_pstore_record *rcd = (struct cper_pstore_record *) 1046 struct cper_pstore_record *rcd = (struct cper_pstore_record *)
1043 (erst_info.buf - sizeof(*rcd)); 1047 (erst_info.buf - sizeof(*rcd));
@@ -1080,6 +1084,12 @@ static u64 erst_writer(enum pstore_type_id type, size_t size)
1080 return rcd->hdr.record_id; 1084 return rcd->hdr.record_id;
1081} 1085}
1082 1086
1087static int erst_clearer(enum pstore_type_id type, u64 id,
1088 struct pstore_info *psi)
1089{
1090 return erst_clear(id);
1091}
1092
1083static int __init erst_init(void) 1093static int __init erst_init(void)
1084{ 1094{
1085 int rc = 0; 1095 int rc = 0;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index f703b288115..0784f99a466 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
12 * For more information about Generic Hardware Error Source, please 12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6 13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 * 14 *
15 * Copyright 2010 Intel Corp. 15 * Copyright 2010,2011 Intel Corp.
16 * Author: Huang Ying <ying.huang@intel.com> 16 * Author: Huang Ying <ying.huang@intel.com>
17 * 17 *
18 * This program is free software; you can redistribute it and/or 18 * This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h> 44#include <linux/vmalloc.h>
45#include <linux/irq_work.h>
46#include <linux/llist.h>
47#include <linux/genalloc.h>
45#include <acpi/apei.h> 48#include <acpi/apei.h>
46#include <acpi/atomicio.h> 49#include <acpi/atomicio.h>
47#include <acpi/hed.h> 50#include <acpi/hed.h>
@@ -53,6 +56,30 @@
53#define GHES_PFX "GHES: " 56#define GHES_PFX "GHES: "
54 57
55#define GHES_ESTATUS_MAX_SIZE 65536 58#define GHES_ESTATUS_MAX_SIZE 65536
59#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
60
61#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
62
63/* This is just an estimation for memory pool allocation */
64#define GHES_ESTATUS_CACHE_AVG_SIZE 512
65
66#define GHES_ESTATUS_CACHES_SIZE 4
67
68#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
69/* Prevent too many caches are allocated because of RCU */
70#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
71
72#define GHES_ESTATUS_CACHE_LEN(estatus_len) \
73 (sizeof(struct ghes_estatus_cache) + (estatus_len))
74#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
75 ((struct acpi_hest_generic_status *) \
76 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
77
78#define GHES_ESTATUS_NODE_LEN(estatus_len) \
79 (sizeof(struct ghes_estatus_node) + (estatus_len))
80#define GHES_ESTATUS_FROM_NODE(estatus_node) \
81 ((struct acpi_hest_generic_status *) \
82 ((struct ghes_estatus_node *)(estatus_node) + 1))
56 83
57/* 84/*
58 * One struct ghes is created for each generic hardware error source. 85 * One struct ghes is created for each generic hardware error source.
@@ -77,6 +104,22 @@ struct ghes {
77 }; 104 };
78}; 105};
79 106
107struct ghes_estatus_node {
108 struct llist_node llnode;
109 struct acpi_hest_generic *generic;
110};
111
112struct ghes_estatus_cache {
113 u32 estatus_len;
114 atomic_t count;
115 struct acpi_hest_generic *generic;
116 unsigned long long time_in;
117 struct rcu_head rcu;
118};
119
120int ghes_disable;
121module_param_named(disable, ghes_disable, bool, 0);
122
80static int ghes_panic_timeout __read_mostly = 30; 123static int ghes_panic_timeout __read_mostly = 30;
81 124
82/* 125/*
@@ -121,6 +164,22 @@ static struct vm_struct *ghes_ioremap_area;
121static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 164static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
122static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 165static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
123 166
167/*
168 * printk is not safe in NMI context. So in NMI handler, we allocate
169 * required memory from lock-less memory allocator
170 * (ghes_estatus_pool), save estatus into it, put them into lock-less
171 * list (ghes_estatus_llist), then delay printk into IRQ context via
172 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
173 * required pool size by all NMI error source.
174 */
175static struct gen_pool *ghes_estatus_pool;
176static unsigned long ghes_estatus_pool_size_request;
177static struct llist_head ghes_estatus_llist;
178static struct irq_work ghes_proc_irq_work;
179
180struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
181static atomic_t ghes_estatus_cache_alloced;
182
124static int ghes_ioremap_init(void) 183static int ghes_ioremap_init(void)
125{ 184{
126 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 185 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -180,6 +239,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
180 __flush_tlb_one(vaddr); 239 __flush_tlb_one(vaddr);
181} 240}
182 241
242static int ghes_estatus_pool_init(void)
243{
244 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
245 if (!ghes_estatus_pool)
246 return -ENOMEM;
247 return 0;
248}
249
250static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
251 struct gen_pool_chunk *chunk,
252 void *data)
253{
254 free_page(chunk->start_addr);
255}
256
257static void ghes_estatus_pool_exit(void)
258{
259 gen_pool_for_each_chunk(ghes_estatus_pool,
260 ghes_estatus_pool_free_chunk_page, NULL);
261 gen_pool_destroy(ghes_estatus_pool);
262}
263
264static int ghes_estatus_pool_expand(unsigned long len)
265{
266 unsigned long i, pages, size, addr;
267 int ret;
268
269 ghes_estatus_pool_size_request += PAGE_ALIGN(len);
270 size = gen_pool_size(ghes_estatus_pool);
271 if (size >= ghes_estatus_pool_size_request)
272 return 0;
273 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
274 for (i = 0; i < pages; i++) {
275 addr = __get_free_page(GFP_KERNEL);
276 if (!addr)
277 return -ENOMEM;
278 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
279 if (ret)
280 return ret;
281 }
282
283 return 0;
284}
285
286static void ghes_estatus_pool_shrink(unsigned long len)
287{
288 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
289}
290
183static struct ghes *ghes_new(struct acpi_hest_generic *generic) 291static struct ghes *ghes_new(struct acpi_hest_generic *generic)
184{ 292{
185 struct ghes *ghes; 293 struct ghes *ghes;
@@ -341,43 +449,196 @@ static void ghes_clear_estatus(struct ghes *ghes)
341 ghes->flags &= ~GHES_TO_CLEAR; 449 ghes->flags &= ~GHES_TO_CLEAR;
342} 450}
343 451
344static void ghes_do_proc(struct ghes *ghes) 452static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
345{ 453{
346 int sev, processed = 0; 454 int sev, sec_sev;
347 struct acpi_hest_generic_data *gdata; 455 struct acpi_hest_generic_data *gdata;
348 456
349 sev = ghes_severity(ghes->estatus->error_severity); 457 sev = ghes_severity(estatus->error_severity);
350 apei_estatus_for_each_section(ghes->estatus, gdata) { 458 apei_estatus_for_each_section(estatus, gdata) {
351#ifdef CONFIG_X86_MCE 459 sec_sev = ghes_severity(gdata->error_severity);
352 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 460 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
353 CPER_SEC_PLATFORM_MEM)) { 461 CPER_SEC_PLATFORM_MEM)) {
354 apei_mce_report_mem_error( 462 struct cper_sec_mem_err *mem_err;
355 sev == GHES_SEV_CORRECTED, 463 mem_err = (struct cper_sec_mem_err *)(gdata+1);
356 (struct cper_sec_mem_err *)(gdata+1)); 464#ifdef CONFIG_X86_MCE
357 processed = 1; 465 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
358 } 466 mem_err);
359#endif 467#endif
468#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
469 if (sev == GHES_SEV_RECOVERABLE &&
470 sec_sev == GHES_SEV_RECOVERABLE &&
471 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
472 unsigned long pfn;
473 pfn = mem_err->physical_addr >> PAGE_SHIFT;
474 memory_failure_queue(pfn, 0, 0);
475 }
476#endif
477 }
360 } 478 }
361} 479}
362 480
363static void ghes_print_estatus(const char *pfx, struct ghes *ghes) 481static void __ghes_print_estatus(const char *pfx,
482 const struct acpi_hest_generic *generic,
483 const struct acpi_hest_generic_status *estatus)
364{ 484{
365 /* Not more than 2 messages every 5 seconds */
366 static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
367
368 if (pfx == NULL) { 485 if (pfx == NULL) {
369 if (ghes_severity(ghes->estatus->error_severity) <= 486 if (ghes_severity(estatus->error_severity) <=
370 GHES_SEV_CORRECTED) 487 GHES_SEV_CORRECTED)
371 pfx = KERN_WARNING HW_ERR; 488 pfx = KERN_WARNING HW_ERR;
372 else 489 else
373 pfx = KERN_ERR HW_ERR; 490 pfx = KERN_ERR HW_ERR;
374 } 491 }
375 if (__ratelimit(&ratelimit)) { 492 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
376 printk( 493 pfx, generic->header.source_id);
377 "%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 494 apei_estatus_print(pfx, estatus);
378 pfx, ghes->generic->header.source_id); 495}
379 apei_estatus_print(pfx, ghes->estatus); 496
497static int ghes_print_estatus(const char *pfx,
498 const struct acpi_hest_generic *generic,
499 const struct acpi_hest_generic_status *estatus)
500{
501 /* Not more than 2 messages every 5 seconds */
502 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
503 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
504 struct ratelimit_state *ratelimit;
505
506 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
507 ratelimit = &ratelimit_corrected;
508 else
509 ratelimit = &ratelimit_uncorrected;
510 if (__ratelimit(ratelimit)) {
511 __ghes_print_estatus(pfx, generic, estatus);
512 return 1;
380 } 513 }
514 return 0;
515}
516
517/*
518 * GHES error status reporting throttle, to report more kinds of
519 * errors, instead of just most frequently occurred errors.
520 */
521static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
522{
523 u32 len;
524 int i, cached = 0;
525 unsigned long long now;
526 struct ghes_estatus_cache *cache;
527 struct acpi_hest_generic_status *cache_estatus;
528
529 len = apei_estatus_len(estatus);
530 rcu_read_lock();
531 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
532 cache = rcu_dereference(ghes_estatus_caches[i]);
533 if (cache == NULL)
534 continue;
535 if (len != cache->estatus_len)
536 continue;
537 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
538 if (memcmp(estatus, cache_estatus, len))
539 continue;
540 atomic_inc(&cache->count);
541 now = sched_clock();
542 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
543 cached = 1;
544 break;
545 }
546 rcu_read_unlock();
547 return cached;
548}
549
550static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
551 struct acpi_hest_generic *generic,
552 struct acpi_hest_generic_status *estatus)
553{
554 int alloced;
555 u32 len, cache_len;
556 struct ghes_estatus_cache *cache;
557 struct acpi_hest_generic_status *cache_estatus;
558
559 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
560 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
561 atomic_dec(&ghes_estatus_cache_alloced);
562 return NULL;
563 }
564 len = apei_estatus_len(estatus);
565 cache_len = GHES_ESTATUS_CACHE_LEN(len);
566 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
567 if (!cache) {
568 atomic_dec(&ghes_estatus_cache_alloced);
569 return NULL;
570 }
571 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
572 memcpy(cache_estatus, estatus, len);
573 cache->estatus_len = len;
574 atomic_set(&cache->count, 0);
575 cache->generic = generic;
576 cache->time_in = sched_clock();
577 return cache;
578}
579
580static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
581{
582 u32 len;
583
584 len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
585 len = GHES_ESTATUS_CACHE_LEN(len);
586 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
587 atomic_dec(&ghes_estatus_cache_alloced);
588}
589
590static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
591{
592 struct ghes_estatus_cache *cache;
593
594 cache = container_of(head, struct ghes_estatus_cache, rcu);
595 ghes_estatus_cache_free(cache);
596}
597
598static void ghes_estatus_cache_add(
599 struct acpi_hest_generic *generic,
600 struct acpi_hest_generic_status *estatus)
601{
602 int i, slot = -1, count;
603 unsigned long long now, duration, period, max_period = 0;
604 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
605
606 new_cache = ghes_estatus_cache_alloc(generic, estatus);
607 if (new_cache == NULL)
608 return;
609 rcu_read_lock();
610 now = sched_clock();
611 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
612 cache = rcu_dereference(ghes_estatus_caches[i]);
613 if (cache == NULL) {
614 slot = i;
615 slot_cache = NULL;
616 break;
617 }
618 duration = now - cache->time_in;
619 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
620 slot = i;
621 slot_cache = cache;
622 break;
623 }
624 count = atomic_read(&cache->count);
625 period = duration;
626 do_div(period, (count + 1));
627 if (period > max_period) {
628 max_period = period;
629 slot = i;
630 slot_cache = cache;
631 }
632 }
633 /* new_cache must be put into array after its contents are written */
634 smp_wmb();
635 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
636 slot_cache, new_cache) == slot_cache) {
637 if (slot_cache)
638 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
639 } else
640 ghes_estatus_cache_free(new_cache);
641 rcu_read_unlock();
381} 642}
382 643
383static int ghes_proc(struct ghes *ghes) 644static int ghes_proc(struct ghes *ghes)
@@ -387,9 +648,11 @@ static int ghes_proc(struct ghes *ghes)
387 rc = ghes_read_estatus(ghes, 0); 648 rc = ghes_read_estatus(ghes, 0);
388 if (rc) 649 if (rc)
389 goto out; 650 goto out;
390 ghes_print_estatus(NULL, ghes); 651 if (!ghes_estatus_cached(ghes->estatus)) {
391 ghes_do_proc(ghes); 652 if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
392 653 ghes_estatus_cache_add(ghes->generic, ghes->estatus);
654 }
655 ghes_do_proc(ghes->estatus);
393out: 656out:
394 ghes_clear_estatus(ghes); 657 ghes_clear_estatus(ghes);
395 return 0; 658 return 0;
@@ -447,6 +710,45 @@ static int ghes_notify_sci(struct notifier_block *this,
447 return ret; 710 return ret;
448} 711}
449 712
713static void ghes_proc_in_irq(struct irq_work *irq_work)
714{
715 struct llist_node *llnode, *next, *tail = NULL;
716 struct ghes_estatus_node *estatus_node;
717 struct acpi_hest_generic *generic;
718 struct acpi_hest_generic_status *estatus;
719 u32 len, node_len;
720
721 /*
722 * Because the time order of estatus in list is reversed,
723 * revert it back to proper order.
724 */
725 llnode = llist_del_all(&ghes_estatus_llist);
726 while (llnode) {
727 next = llnode->next;
728 llnode->next = tail;
729 tail = llnode;
730 llnode = next;
731 }
732 llnode = tail;
733 while (llnode) {
734 next = llnode->next;
735 estatus_node = llist_entry(llnode, struct ghes_estatus_node,
736 llnode);
737 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
738 len = apei_estatus_len(estatus);
739 node_len = GHES_ESTATUS_NODE_LEN(len);
740 ghes_do_proc(estatus);
741 if (!ghes_estatus_cached(estatus)) {
742 generic = estatus_node->generic;
743 if (ghes_print_estatus(NULL, generic, estatus))
744 ghes_estatus_cache_add(generic, estatus);
745 }
746 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
747 node_len);
748 llnode = next;
749 }
750}
751
450static int ghes_notify_nmi(struct notifier_block *this, 752static int ghes_notify_nmi(struct notifier_block *this,
451 unsigned long cmd, void *data) 753 unsigned long cmd, void *data)
452{ 754{
@@ -476,7 +778,8 @@ static int ghes_notify_nmi(struct notifier_block *this,
476 778
477 if (sev_global >= GHES_SEV_PANIC) { 779 if (sev_global >= GHES_SEV_PANIC) {
478 oops_begin(); 780 oops_begin();
479 ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); 781 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
782 ghes_global->estatus);
480 /* reboot to log the error! */ 783 /* reboot to log the error! */
481 if (panic_timeout == 0) 784 if (panic_timeout == 0)
482 panic_timeout = ghes_panic_timeout; 785 panic_timeout = ghes_panic_timeout;
@@ -484,12 +787,34 @@ static int ghes_notify_nmi(struct notifier_block *this,
484 } 787 }
485 788
486 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 789 list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
790#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
791 u32 len, node_len;
792 struct ghes_estatus_node *estatus_node;
793 struct acpi_hest_generic_status *estatus;
794#endif
487 if (!(ghes->flags & GHES_TO_CLEAR)) 795 if (!(ghes->flags & GHES_TO_CLEAR))
488 continue; 796 continue;
489 /* Do not print estatus because printk is not NMI safe */ 797#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
490 ghes_do_proc(ghes); 798 if (ghes_estatus_cached(ghes->estatus))
799 goto next;
800 /* Save estatus for further processing in IRQ context */
801 len = apei_estatus_len(ghes->estatus);
802 node_len = GHES_ESTATUS_NODE_LEN(len);
803 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
804 node_len);
805 if (estatus_node) {
806 estatus_node->generic = ghes->generic;
807 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
808 memcpy(estatus, ghes->estatus, len);
809 llist_add(&estatus_node->llnode, &ghes_estatus_llist);
810 }
811next:
812#endif
491 ghes_clear_estatus(ghes); 813 ghes_clear_estatus(ghes);
492 } 814 }
815#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
816 irq_work_queue(&ghes_proc_irq_work);
817#endif
493 818
494out: 819out:
495 raw_spin_unlock(&ghes_nmi_lock); 820 raw_spin_unlock(&ghes_nmi_lock);
@@ -504,10 +829,26 @@ static struct notifier_block ghes_notifier_nmi = {
504 .notifier_call = ghes_notify_nmi, 829 .notifier_call = ghes_notify_nmi,
505}; 830};
506 831
832static unsigned long ghes_esource_prealloc_size(
833 const struct acpi_hest_generic *generic)
834{
835 unsigned long block_length, prealloc_records, prealloc_size;
836
837 block_length = min_t(unsigned long, generic->error_block_length,
838 GHES_ESTATUS_MAX_SIZE);
839 prealloc_records = max_t(unsigned long,
840 generic->records_to_preallocate, 1);
841 prealloc_size = min_t(unsigned long, block_length * prealloc_records,
842 GHES_ESOURCE_PREALLOC_MAX_SIZE);
843
844 return prealloc_size;
845}
846
507static int __devinit ghes_probe(struct platform_device *ghes_dev) 847static int __devinit ghes_probe(struct platform_device *ghes_dev)
508{ 848{
509 struct acpi_hest_generic *generic; 849 struct acpi_hest_generic *generic;
510 struct ghes *ghes = NULL; 850 struct ghes *ghes = NULL;
851 unsigned long len;
511 int rc = -EINVAL; 852 int rc = -EINVAL;
512 853
513 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 854 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -573,6 +914,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
573 mutex_unlock(&ghes_list_mutex); 914 mutex_unlock(&ghes_list_mutex);
574 break; 915 break;
575 case ACPI_HEST_NOTIFY_NMI: 916 case ACPI_HEST_NOTIFY_NMI:
917 len = ghes_esource_prealloc_size(generic);
918 ghes_estatus_pool_expand(len);
576 mutex_lock(&ghes_list_mutex); 919 mutex_lock(&ghes_list_mutex);
577 if (list_empty(&ghes_nmi)) 920 if (list_empty(&ghes_nmi))
578 register_die_notifier(&ghes_notifier_nmi); 921 register_die_notifier(&ghes_notifier_nmi);
@@ -597,6 +940,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
597{ 940{
598 struct ghes *ghes; 941 struct ghes *ghes;
599 struct acpi_hest_generic *generic; 942 struct acpi_hest_generic *generic;
943 unsigned long len;
600 944
601 ghes = platform_get_drvdata(ghes_dev); 945 ghes = platform_get_drvdata(ghes_dev);
602 generic = ghes->generic; 946 generic = ghes->generic;
@@ -627,6 +971,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
627 * freed after NMI handler finishes. 971 * freed after NMI handler finishes.
628 */ 972 */
629 synchronize_rcu(); 973 synchronize_rcu();
974 len = ghes_esource_prealloc_size(generic);
975 ghes_estatus_pool_shrink(len);
630 break; 976 break;
631 default: 977 default:
632 BUG(); 978 BUG();
@@ -662,15 +1008,43 @@ static int __init ghes_init(void)
662 return -EINVAL; 1008 return -EINVAL;
663 } 1009 }
664 1010
1011 if (ghes_disable) {
1012 pr_info(GHES_PFX "GHES is not enabled!\n");
1013 return -EINVAL;
1014 }
1015
1016 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1017
665 rc = ghes_ioremap_init(); 1018 rc = ghes_ioremap_init();
666 if (rc) 1019 if (rc)
667 goto err; 1020 goto err;
668 1021
669 rc = platform_driver_register(&ghes_platform_driver); 1022 rc = ghes_estatus_pool_init();
670 if (rc) 1023 if (rc)
671 goto err_ioremap_exit; 1024 goto err_ioremap_exit;
672 1025
1026 rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
1027 GHES_ESTATUS_CACHE_ALLOCED_MAX);
1028 if (rc)
1029 goto err_pool_exit;
1030
1031 rc = platform_driver_register(&ghes_platform_driver);
1032 if (rc)
1033 goto err_pool_exit;
1034
1035 rc = apei_osc_setup();
1036 if (rc == 0 && osc_sb_apei_support_acked)
1037 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1038 else if (rc == 0 && !osc_sb_apei_support_acked)
1039 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1040 else if (rc && osc_sb_apei_support_acked)
1041 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1042 else
1043 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1044
673 return 0; 1045 return 0;
1046err_pool_exit:
1047 ghes_estatus_pool_exit();
674err_ioremap_exit: 1048err_ioremap_exit:
675 ghes_ioremap_exit(); 1049 ghes_ioremap_exit();
676err: 1050err:
@@ -680,6 +1054,7 @@ err:
680static void __exit ghes_exit(void) 1054static void __exit ghes_exit(void)
681{ 1055{
682 platform_driver_unregister(&ghes_platform_driver); 1056 platform_driver_unregister(&ghes_platform_driver);
1057 ghes_estatus_pool_exit();
683 ghes_ioremap_exit(); 1058 ghes_ioremap_exit();
684} 1059}
685 1060
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 181bc2f7bb7..05fee06f4d6 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -231,16 +231,17 @@ void __init acpi_hest_init(void)
231 goto err; 231 goto err;
232 } 232 }
233 233
234 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); 234 if (!ghes_disable) {
235 if (rc) 235 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
236 goto err; 236 if (rc)
237 237 goto err;
238 rc = hest_ghes_dev_register(ghes_count); 238 rc = hest_ghes_dev_register(ghes_count);
239 if (!rc) { 239 if (rc)
240 pr_info(HEST_PFX "Table parsing has been initialized.\n"); 240 goto err;
241 return;
242 } 241 }
243 242
243 pr_info(HEST_PFX "Table parsing has been initialized.\n");
244 return;
244err: 245err:
245 hest_disable = 1; 246 hest_disable = 1;
246} 247}
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 2c661353e8f..7711d94a040 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -55,6 +55,9 @@
55#define ACPI_BATTERY_NOTIFY_INFO 0x81 55#define ACPI_BATTERY_NOTIFY_INFO 0x81
56#define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82 56#define ACPI_BATTERY_NOTIFY_THRESHOLD 0x82
57 57
58/* Battery power unit: 0 means mW, 1 means mA */
59#define ACPI_BATTERY_POWER_UNIT_MA 1
60
58#define _COMPONENT ACPI_BATTERY_COMPONENT 61#define _COMPONENT ACPI_BATTERY_COMPONENT
59 62
60ACPI_MODULE_NAME("battery"); 63ACPI_MODULE_NAME("battery");
@@ -91,16 +94,12 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids);
91enum { 94enum {
92 ACPI_BATTERY_ALARM_PRESENT, 95 ACPI_BATTERY_ALARM_PRESENT,
93 ACPI_BATTERY_XINFO_PRESENT, 96 ACPI_BATTERY_XINFO_PRESENT,
94 /* For buggy DSDTs that report negative 16-bit values for either
95 * charging or discharging current and/or report 0 as 65536
96 * due to bad math.
97 */
98 ACPI_BATTERY_QUIRK_SIGNED16_CURRENT,
99 ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, 97 ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY,
100}; 98};
101 99
102struct acpi_battery { 100struct acpi_battery {
103 struct mutex lock; 101 struct mutex lock;
102 struct mutex sysfs_lock;
104 struct power_supply bat; 103 struct power_supply bat;
105 struct acpi_device *device; 104 struct acpi_device *device;
106 struct notifier_block pm_nb; 105 struct notifier_block pm_nb;
@@ -301,7 +300,8 @@ static enum power_supply_property energy_battery_props[] = {
301#ifdef CONFIG_ACPI_PROCFS_POWER 300#ifdef CONFIG_ACPI_PROCFS_POWER
302inline char *acpi_battery_units(struct acpi_battery *battery) 301inline char *acpi_battery_units(struct acpi_battery *battery)
303{ 302{
304 return (battery->power_unit)?"mA":"mW"; 303 return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ?
304 "mA" : "mW";
305} 305}
306#endif 306#endif
307 307
@@ -461,9 +461,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
461 battery->update_time = jiffies; 461 battery->update_time = jiffies;
462 kfree(buffer.pointer); 462 kfree(buffer.pointer);
463 463
464 if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) && 464 /* For buggy DSDTs that report negative 16-bit values for either
465 battery->rate_now != -1) 465 * charging or discharging current and/or report 0 as 65536
466 * due to bad math.
467 */
468 if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA &&
469 battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN &&
470 (s16)(battery->rate_now) < 0) {
466 battery->rate_now = abs((s16)battery->rate_now); 471 battery->rate_now = abs((s16)battery->rate_now);
472 printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate"
473 " invalid.\n");
474 }
467 475
468 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags) 476 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)
469 && battery->capacity_now >= 0 && battery->capacity_now <= 100) 477 && battery->capacity_now >= 0 && battery->capacity_now <= 100)
@@ -544,7 +552,7 @@ static int sysfs_add_battery(struct acpi_battery *battery)
544{ 552{
545 int result; 553 int result;
546 554
547 if (battery->power_unit) { 555 if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
548 battery->bat.properties = charge_battery_props; 556 battery->bat.properties = charge_battery_props;
549 battery->bat.num_properties = 557 battery->bat.num_properties =
550 ARRAY_SIZE(charge_battery_props); 558 ARRAY_SIZE(charge_battery_props);
@@ -566,18 +574,16 @@ static int sysfs_add_battery(struct acpi_battery *battery)
566 574
567static void sysfs_remove_battery(struct acpi_battery *battery) 575static void sysfs_remove_battery(struct acpi_battery *battery)
568{ 576{
569 if (!battery->bat.dev) 577 mutex_lock(&battery->sysfs_lock);
578 if (!battery->bat.dev) {
579 mutex_unlock(&battery->sysfs_lock);
570 return; 580 return;
581 }
582
571 device_remove_file(battery->bat.dev, &alarm_attr); 583 device_remove_file(battery->bat.dev, &alarm_attr);
572 power_supply_unregister(&battery->bat); 584 power_supply_unregister(&battery->bat);
573 battery->bat.dev = NULL; 585 battery->bat.dev = NULL;
574} 586 mutex_unlock(&battery->sysfs_lock);
575
576static void acpi_battery_quirks(struct acpi_battery *battery)
577{
578 if (dmi_name_in_vendors("Acer") && battery->power_unit) {
579 set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags);
580 }
581} 587}
582 588
583/* 589/*
@@ -592,7 +598,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery)
592 * 598 *
593 * Handle this correctly so that they won't break userspace. 599 * Handle this correctly so that they won't break userspace.
594 */ 600 */
595static void acpi_battery_quirks2(struct acpi_battery *battery) 601static void acpi_battery_quirks(struct acpi_battery *battery)
596{ 602{
597 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)) 603 if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags))
598 return ; 604 return ;
@@ -623,13 +629,15 @@ static int acpi_battery_update(struct acpi_battery *battery)
623 result = acpi_battery_get_info(battery); 629 result = acpi_battery_get_info(battery);
624 if (result) 630 if (result)
625 return result; 631 return result;
626 acpi_battery_quirks(battery);
627 acpi_battery_init_alarm(battery); 632 acpi_battery_init_alarm(battery);
628 } 633 }
629 if (!battery->bat.dev) 634 if (!battery->bat.dev) {
630 sysfs_add_battery(battery); 635 result = sysfs_add_battery(battery);
636 if (result)
637 return result;
638 }
631 result = acpi_battery_get_state(battery); 639 result = acpi_battery_get_state(battery);
632 acpi_battery_quirks2(battery); 640 acpi_battery_quirks(battery);
633 return result; 641 return result;
634} 642}
635 643
@@ -863,7 +871,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
863 }, \ 871 }, \
864 } 872 }
865 873
866static struct battery_file { 874static const struct battery_file {
867 struct file_operations ops; 875 struct file_operations ops;
868 mode_t mode; 876 mode_t mode;
869 const char *name; 877 const char *name;
@@ -948,9 +956,12 @@ static int battery_notify(struct notifier_block *nb,
948 struct acpi_battery *battery = container_of(nb, struct acpi_battery, 956 struct acpi_battery *battery = container_of(nb, struct acpi_battery,
949 pm_nb); 957 pm_nb);
950 switch (mode) { 958 switch (mode) {
959 case PM_POST_HIBERNATION:
951 case PM_POST_SUSPEND: 960 case PM_POST_SUSPEND:
952 sysfs_remove_battery(battery); 961 if (battery->bat.dev) {
953 sysfs_add_battery(battery); 962 sysfs_remove_battery(battery);
963 sysfs_add_battery(battery);
964 }
954 break; 965 break;
955 } 966 }
956 967
@@ -972,28 +983,38 @@ static int acpi_battery_add(struct acpi_device *device)
972 strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS); 983 strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
973 device->driver_data = battery; 984 device->driver_data = battery;
974 mutex_init(&battery->lock); 985 mutex_init(&battery->lock);
986 mutex_init(&battery->sysfs_lock);
975 if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle, 987 if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle,
976 "_BIX", &handle))) 988 "_BIX", &handle)))
977 set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); 989 set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags);
978 acpi_battery_update(battery); 990 result = acpi_battery_update(battery);
991 if (result)
992 goto fail;
979#ifdef CONFIG_ACPI_PROCFS_POWER 993#ifdef CONFIG_ACPI_PROCFS_POWER
980 result = acpi_battery_add_fs(device); 994 result = acpi_battery_add_fs(device);
981#endif 995#endif
982 if (!result) { 996 if (result) {
983 printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
984 ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
985 device->status.battery_present ? "present" : "absent");
986 } else {
987#ifdef CONFIG_ACPI_PROCFS_POWER 997#ifdef CONFIG_ACPI_PROCFS_POWER
988 acpi_battery_remove_fs(device); 998 acpi_battery_remove_fs(device);
989#endif 999#endif
990 kfree(battery); 1000 goto fail;
991 } 1001 }
992 1002
1003 printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
1004 ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
1005 device->status.battery_present ? "present" : "absent");
1006
993 battery->pm_nb.notifier_call = battery_notify; 1007 battery->pm_nb.notifier_call = battery_notify;
994 register_pm_notifier(&battery->pm_nb); 1008 register_pm_notifier(&battery->pm_nb);
995 1009
996 return result; 1010 return result;
1011
1012fail:
1013 sysfs_remove_battery(battery);
1014 mutex_destroy(&battery->lock);
1015 mutex_destroy(&battery->sysfs_lock);
1016 kfree(battery);
1017 return result;
997} 1018}
998 1019
999static int acpi_battery_remove(struct acpi_device *device, int type) 1020static int acpi_battery_remove(struct acpi_device *device, int type)
@@ -1009,6 +1030,7 @@ static int acpi_battery_remove(struct acpi_device *device, int type)
1009#endif 1030#endif
1010 sysfs_remove_battery(battery); 1031 sysfs_remove_battery(battery);
1011 mutex_destroy(&battery->lock); 1032 mutex_destroy(&battery->lock);
1033 mutex_destroy(&battery->sysfs_lock);
1012 kfree(battery); 1034 kfree(battery);
1013 return 0; 1035 return 0;
1014} 1036}
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d1e06c182cd..437ddbf0c49 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -39,6 +39,7 @@
39#include <linux/pci.h> 39#include <linux/pci.h>
40#include <acpi/acpi_bus.h> 40#include <acpi/acpi_bus.h>
41#include <acpi/acpi_drivers.h> 41#include <acpi/acpi_drivers.h>
42#include <acpi/apei.h>
42#include <linux/dmi.h> 43#include <linux/dmi.h>
43#include <linux/suspend.h> 44#include <linux/suspend.h>
44 45
@@ -519,6 +520,7 @@ out_kfree:
519} 520}
520EXPORT_SYMBOL(acpi_run_osc); 521EXPORT_SYMBOL(acpi_run_osc);
521 522
523bool osc_sb_apei_support_acked;
522static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; 524static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
523static void acpi_bus_osc_support(void) 525static void acpi_bus_osc_support(void)
524{ 526{
@@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void)
541#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) 543#if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
542 capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT; 544 capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
543#endif 545#endif
546
547 if (!ghes_disable)
548 capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
544 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) 549 if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
545 return; 550 return;
546 if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) 551 if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
552 u32 *capbuf_ret = context.ret.pointer;
553 if (context.ret.length > OSC_SUPPORT_TYPE)
554 osc_sb_apei_support_acked =
555 capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
547 kfree(context.ret.pointer); 556 kfree(context.ret.pointer);
548 /* do we need to check the returned cap? Sounds no */ 557 }
558 /* do we need to check other returned cap? Sounds no */
549} 559}
550 560
551/* -------------------------------------------------------------------------- 561/* --------------------------------------------------------------------------
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 1864ad3cf89..19a61136d84 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -77,7 +77,7 @@ struct dock_dependent_device {
77 struct list_head list; 77 struct list_head list;
78 struct list_head hotplug_list; 78 struct list_head hotplug_list;
79 acpi_handle handle; 79 acpi_handle handle;
80 struct acpi_dock_ops *ops; 80 const struct acpi_dock_ops *ops;
81 void *context; 81 void *context;
82}; 82};
83 83
@@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier);
589 * the dock driver after _DCK is executed. 589 * the dock driver after _DCK is executed.
590 */ 590 */
591int 591int
592register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, 592register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops,
593 void *context) 593 void *context)
594{ 594{
595 struct dock_dependent_device *dd; 595 struct dock_dependent_device *dd;
diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c
index 05b44201a61..22f918bacd3 100644
--- a/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
92 return count; 92 return count;
93} 93}
94 94
95static struct file_operations acpi_ec_io_ops = { 95static const struct file_operations acpi_ec_io_ops = {
96 .owner = THIS_MODULE, 96 .owner = THIS_MODULE,
97 .open = acpi_ec_open_io, 97 .open = acpi_ec_open_io,
98 .read = acpi_ec_read_io, 98 .read = acpi_ec_read_io,
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 467479f07c1..0f0356ca1a9 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
110 return result; 110 return result;
111} 111}
112 112
113static struct thermal_cooling_device_ops fan_cooling_ops = { 113static const struct thermal_cooling_device_ops fan_cooling_ops = {
114 .get_max_state = fan_get_max_state, 114 .get_max_state = fan_get_max_state,
115 .get_cur_state = fan_get_cur_state, 115 .get_cur_state = fan_get_cur_state,
116 .set_cur_state = fan_set_cur_state, 116 .set_cur_state = fan_set_cur_state,
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 372f9b70f7f..fa32f584229 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
155{ 155{
156 if (!strcmp("Linux", interface)) { 156 if (!strcmp("Linux", interface)) {
157 157
158 printk(KERN_NOTICE FW_BUG PREFIX 158 printk_once(KERN_NOTICE FW_BUG PREFIX
159 "BIOS _OSI(Linux) query %s%s\n", 159 "BIOS _OSI(Linux) query %s%s\n",
160 osi_linux.enable ? "honored" : "ignored", 160 osi_linux.enable ? "honored" : "ignored",
161 osi_linux.cmdline ? " via cmdline" : 161 osi_linux.cmdline ? " via cmdline" :
@@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args)
237#endif 237#endif
238} 238}
239 239
240#ifdef CONFIG_KEXEC
241static unsigned long acpi_rsdp;
242static int __init setup_acpi_rsdp(char *arg)
243{
244 acpi_rsdp = simple_strtoul(arg, NULL, 16);
245 return 0;
246}
247early_param("acpi_rsdp", setup_acpi_rsdp);
248#endif
249
240acpi_physical_address __init acpi_os_get_root_pointer(void) 250acpi_physical_address __init acpi_os_get_root_pointer(void)
241{ 251{
252#ifdef CONFIG_KEXEC
253 if (acpi_rsdp)
254 return acpi_rsdp;
255#endif
256
242 if (efi_enabled) { 257 if (efi_enabled) {
243 if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) 258 if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
244 return efi.acpi20; 259 return efi.acpi20;
@@ -1083,7 +1098,13 @@ struct osi_setup_entry {
1083 bool enable; 1098 bool enable;
1084}; 1099};
1085 1100
1086static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX]; 1101static struct osi_setup_entry __initdata
1102 osi_setup_entries[OSI_STRING_ENTRIES_MAX] = {
1103 {"Module Device", true},
1104 {"Processor Device", true},
1105 {"3.0 _SCP Extensions", true},
1106 {"Processor Aggregator Device", true},
1107};
1087 1108
1088void __init acpi_osi_setup(char *str) 1109void __init acpi_osi_setup(char *str)
1089{ 1110{
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index f907cfbfa13..7f9eba9a0b0 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus)
303/* -------------------------------------------------------------------------- 303/* --------------------------------------------------------------------------
304 PCI Interrupt Routing Support 304 PCI Interrupt Routing Support
305 -------------------------------------------------------------------------- */ 305 -------------------------------------------------------------------------- */
306#ifdef CONFIG_X86_IO_APIC
307extern int noioapicquirk;
308extern int noioapicreroute;
309
310static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
311{
312 struct pci_bus *bus_it;
313
314 for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
315 if (!bus_it->self)
316 return 0;
317 if (bus_it->self->irq_reroute_variant)
318 return bus_it->self->irq_reroute_variant;
319 }
320 return 0;
321}
322
323/*
324 * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ
325 * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does
326 * during interrupt handling). When this INTx generation cannot be disabled,
327 * we reroute these interrupts to their legacy equivalent to get rid of
328 * spurious interrupts.
329 */
330static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
331 struct acpi_prt_entry *entry)
332{
333 if (noioapicquirk || noioapicreroute) {
334 return 0;
335 } else {
336 switch (bridge_has_boot_interrupt_variant(dev->bus)) {
337 case 0:
338 /* no rerouting necessary */
339 return 0;
340 case INTEL_IRQ_REROUTE_VARIANT:
341 /*
342 * Remap according to INTx routing table in 6700PXH
343 * specs, intel order number 302628-002, section
344 * 2.15.2. Other chipsets (80332, ...) have the same
345 * mapping and are handled here as well.
346 */
347 dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy "
348 "IRQ %d\n", entry->index,
349 (entry->index % 4) + 16);
350 entry->index = (entry->index % 4) + 16;
351 return 1;
352 default:
353 dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy "
354 "IRQ: unknown mapping\n", entry->index);
355 return -1;
356 }
357 }
358}
359#endif /* CONFIG_X86_IO_APIC */
360
306static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) 361static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
307{ 362{
308 struct acpi_prt_entry *entry; 363 struct acpi_prt_entry *entry;
@@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
311 366
312 entry = acpi_pci_irq_find_prt_entry(dev, pin); 367 entry = acpi_pci_irq_find_prt_entry(dev, pin);
313 if (entry) { 368 if (entry) {
369#ifdef CONFIG_X86_IO_APIC
370 acpi_reroute_boot_interrupt(dev, entry);
371#endif /* CONFIG_X86_IO_APIC */
314 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n", 372 ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
315 pci_name(dev), pin_name(pin))); 373 pci_name(dev), pin_name(pin)));
316 return entry; 374 return entry;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d06078d660a..2672c798272 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
485 root->secondary.end = 0xFF; 485 root->secondary.end = 0xFF;
486 printk(KERN_WARNING FW_BUG PREFIX 486 printk(KERN_WARNING FW_BUG PREFIX
487 "no secondary bus range in _CRS\n"); 487 "no secondary bus range in _CRS\n");
488 status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL, &bus); 488 status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,
489 NULL, &bus);
489 if (ACPI_SUCCESS(status)) 490 if (ACPI_SUCCESS(status))
490 root->secondary.start = bus; 491 root->secondary.start = bus;
491 else if (status == AE_NOT_FOUND) 492 else if (status == AE_NOT_FOUND)
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 79cb6533289..870550d6a4b 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
244 return result; 244 return result;
245} 245}
246 246
247struct thermal_cooling_device_ops processor_cooling_ops = { 247const struct thermal_cooling_device_ops processor_cooling_ops = {
248 .get_max_state = processor_get_max_state, 248 .get_max_state = processor_get_max_state,
249 .get_cur_state = processor_get_cur_state, 249 .get_cur_state = processor_get_cur_state,
250 .set_cur_state = processor_set_cur_state, 250 .set_cur_state = processor_set_cur_state,
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 50658ff887d..6e36d0c0057 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -130,6 +130,9 @@ struct acpi_sbs {
130 130
131#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger) 131#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
132 132
133static int acpi_sbs_remove(struct acpi_device *device, int type);
134static int acpi_battery_get_state(struct acpi_battery *battery);
135
133static inline int battery_scale(int log) 136static inline int battery_scale(int log)
134{ 137{
135 int scale = 1; 138 int scale = 1;
@@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
195 198
196 if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT) 199 if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
197 return -ENODEV; 200 return -ENODEV;
201
202 acpi_battery_get_state(battery);
198 switch (psp) { 203 switch (psp) {
199 case POWER_SUPPLY_PROP_STATUS: 204 case POWER_SUPPLY_PROP_STATUS:
200 if (battery->rate_now < 0) 205 if (battery->rate_now < 0)
@@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
225 case POWER_SUPPLY_PROP_POWER_NOW: 230 case POWER_SUPPLY_PROP_POWER_NOW:
226 val->intval = abs(battery->rate_now) * 231 val->intval = abs(battery->rate_now) *
227 acpi_battery_ipscale(battery) * 1000; 232 acpi_battery_ipscale(battery) * 1000;
233 val->intval *= (acpi_battery_mode(battery)) ?
234 (battery->voltage_now *
235 acpi_battery_vscale(battery) / 1000) : 1;
228 break; 236 break;
229 case POWER_SUPPLY_PROP_CURRENT_AVG: 237 case POWER_SUPPLY_PROP_CURRENT_AVG:
230 case POWER_SUPPLY_PROP_POWER_AVG: 238 case POWER_SUPPLY_PROP_POWER_AVG:
231 val->intval = abs(battery->rate_avg) * 239 val->intval = abs(battery->rate_avg) *
232 acpi_battery_ipscale(battery) * 1000; 240 acpi_battery_ipscale(battery) * 1000;
241 val->intval *= (acpi_battery_mode(battery)) ?
242 (battery->voltage_now *
243 acpi_battery_vscale(battery) / 1000) : 1;
233 break; 244 break;
234 case POWER_SUPPLY_PROP_CAPACITY: 245 case POWER_SUPPLY_PROP_CAPACITY:
235 val->intval = battery->state_of_charge; 246 val->intval = battery->state_of_charge;
@@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context)
903 } 914 }
904} 915}
905 916
906static int acpi_sbs_remove(struct acpi_device *device, int type);
907
908static int acpi_sbs_add(struct acpi_device *device) 917static int acpi_sbs_add(struct acpi_device *device)
909{ 918{
910 struct acpi_sbs *sbs; 919 struct acpi_sbs *sbs;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 6c949602cbd..3ed80b2ca90 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
428 DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"), 428 DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"),
429 }, 429 },
430 }, 430 },
431 {
432 .callback = init_old_suspend_ordering,
433 .ident = "Asus A8N-SLI DELUXE",
434 .matches = {
435 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
436 DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"),
437 },
438 },
439 {
440 .callback = init_old_suspend_ordering,
441 .ident = "Asus A8N-SLI Premium",
442 .matches = {
443 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
444 DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
445 },
446 },
431 {}, 447 {},
432}; 448};
433#endif /* CONFIG_SUSPEND */ 449#endif /* CONFIG_SUSPEND */
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 77255f250db..c538d0ef10f 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
149 return result; 149 return result;
150} 150}
151 151
152static struct kernel_param_ops param_ops_debug_layer = { 152static const struct kernel_param_ops param_ops_debug_layer = {
153 .set = param_set_uint, 153 .set = param_set_uint,
154 .get = param_get_debug_layer, 154 .get = param_get_debug_layer,
155}; 155};
156 156
157static struct kernel_param_ops param_ops_debug_level = { 157static const struct kernel_param_ops param_ops_debug_level = {
158 .set = param_set_uint, 158 .set = param_set_uint,
159 .get = param_get_debug_level, 159 .get = param_get_debug_level,
160}; 160};
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 2607e17b520..48fbc647b17 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal,
812 thermal_zone_unbind_cooling_device); 812 thermal_zone_unbind_cooling_device);
813} 813}
814 814
815static struct thermal_zone_device_ops acpi_thermal_zone_ops = { 815static const struct thermal_zone_device_ops acpi_thermal_zone_ops = {
816 .bind = acpi_thermal_bind_cooling_device, 816 .bind = acpi_thermal_bind_cooling_device,
817 .unbind = acpi_thermal_unbind_cooling_device, 817 .unbind = acpi_thermal_unbind_cooling_device,
818 .get_temp = thermal_get_temp, 818 .get_temp = thermal_get_temp,
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ada4b4d9bdc..08a44b532f7 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
307 return acpi_video_device_lcd_set_level(video, level); 307 return acpi_video_device_lcd_set_level(video, level);
308} 308}
309 309
310static struct thermal_cooling_device_ops video_cooling_ops = { 310static const struct thermal_cooling_device_ops video_cooling_ops = {
311 .get_max_state = video_get_max_state, 311 .get_max_state = video_get_max_state,
312 .get_cur_state = video_get_cur_state, 312 .get_cur_state = video_get_cur_state,
313 .set_cur_state = video_set_cur_state, 313 .set_cur_state = video_set_cur_state,
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index e0a5b555cee..bb7c5f1085c 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data)
218 ata_acpi_uevent(dev->link->ap, dev, event); 218 ata_acpi_uevent(dev->link->ap, dev, event);
219} 219}
220 220
221static struct acpi_dock_ops ata_acpi_dev_dock_ops = { 221static const struct acpi_dock_ops ata_acpi_dev_dock_ops = {
222 .handler = ata_acpi_dev_notify_dock, 222 .handler = ata_acpi_dev_notify_dock,
223 .uevent = ata_acpi_dev_uevent, 223 .uevent = ata_acpi_dev_uevent,
224}; 224};
225 225
226static struct acpi_dock_ops ata_acpi_ap_dock_ops = { 226static const struct acpi_dock_ops ata_acpi_ap_dock_ops = {
227 .handler = ata_acpi_ap_notify_dock, 227 .handler = ata_acpi_ap_notify_dock,
228 .uevent = ata_acpi_ap_uevent, 228 .uevent = ata_acpi_ap_uevent,
229}; 229};
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index b89fffc1d77..33e1bed68fd 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -166,7 +166,7 @@ static int create_path(const char *nodepath)
166{ 166{
167 char *path; 167 char *path;
168 char *s; 168 char *s;
169 int err; 169 int err = 0;
170 170
171 /* parent directories do not exist, create them */ 171 /* parent directories do not exist, create them */
172 path = kstrdup(nodepath, GFP_KERNEL); 172 path = kstrdup(nodepath, GFP_KERNEL);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index be8714aa9dd..e18566a0fed 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -80,7 +80,6 @@ static void genpd_set_active(struct generic_pm_domain *genpd)
80int pm_genpd_poweron(struct generic_pm_domain *genpd) 80int pm_genpd_poweron(struct generic_pm_domain *genpd)
81{ 81{
82 struct generic_pm_domain *parent = genpd->parent; 82 struct generic_pm_domain *parent = genpd->parent;
83 DEFINE_WAIT(wait);
84 int ret = 0; 83 int ret = 0;
85 84
86 start: 85 start:
@@ -112,7 +111,7 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd)
112 } 111 }
113 112
114 if (genpd->power_on) { 113 if (genpd->power_on) {
115 int ret = genpd->power_on(genpd); 114 ret = genpd->power_on(genpd);
116 if (ret) 115 if (ret)
117 goto out; 116 goto out;
118 } 117 }
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8dc247c974a..acb3f83b807 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -226,11 +226,17 @@ static int rpm_idle(struct device *dev, int rpmflags)
226 callback = NULL; 226 callback = NULL;
227 227
228 if (callback) { 228 if (callback) {
229 spin_unlock_irq(&dev->power.lock); 229 if (dev->power.irq_safe)
230 spin_unlock(&dev->power.lock);
231 else
232 spin_unlock_irq(&dev->power.lock);
230 233
231 callback(dev); 234 callback(dev);
232 235
233 spin_lock_irq(&dev->power.lock); 236 if (dev->power.irq_safe)
237 spin_lock(&dev->power.lock);
238 else
239 spin_lock_irq(&dev->power.lock);
234 } 240 }
235 241
236 dev->power.idle_notification = false; 242 dev->power.idle_notification = false;
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 49502bc5360..423fd56bf61 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -616,5 +616,16 @@ config MSM_SMD_PKT
616 Enables userspace clients to read and write to some packet SMD 616 Enables userspace clients to read and write to some packet SMD
617 ports via device interface for MSM chipset. 617 ports via device interface for MSM chipset.
618 618
619config TILE_SROM
620 bool "Character-device access via hypervisor to the Tilera SPI ROM"
621 depends on TILE
622 default y
623 ---help---
624 This device provides character-level read-write access
625 to the SROM, typically via the "0", "1", and "2" devices
626 in /dev/srom/. The Tilera hypervisor makes the flash
627 device appear much like a simple EEPROM, and knows
628 how to partition a single ROM for multiple purposes.
629
619endmenu 630endmenu
620 631
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 7a00672bd85..32762ba769c 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o
63 63
64obj-$(CONFIG_JS_RTC) += js-rtc.o 64obj-$(CONFIG_JS_RTC) += js-rtc.o
65js-rtc-y = rtc.o 65js-rtc-y = rtc.o
66
67obj-$(CONFIG_TILE_SROM) += tile-srom.o
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index fca0c51bbc9..810aff9e750 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev)
147 cxt->phys_addr = pdata->mem_address; 147 cxt->phys_addr = pdata->mem_address;
148 cxt->record_size = pdata->record_size; 148 cxt->record_size = pdata->record_size;
149 cxt->dump_oops = pdata->dump_oops; 149 cxt->dump_oops = pdata->dump_oops;
150 /*
151 * Update the module parameter variables as well so they are visible
152 * through /sys/module/ramoops/parameters/
153 */
154 mem_size = pdata->mem_size;
155 mem_address = pdata->mem_address;
156 record_size = pdata->record_size;
157 dump_oops = pdata->dump_oops;
150 158
151 if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) { 159 if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
152 pr_err("request mem region failed\n"); 160 pr_err("request mem region failed\n");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 729281961f2..c35a785005b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1300,345 +1300,14 @@ ctl_table random_table[] = {
1300}; 1300};
1301#endif /* CONFIG_SYSCTL */ 1301#endif /* CONFIG_SYSCTL */
1302 1302
1303/******************************************************************** 1303static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
1304 *
1305 * Random functions for networking
1306 *
1307 ********************************************************************/
1308
1309/*
1310 * TCP initial sequence number picking. This uses the random number
1311 * generator to pick an initial secret value. This value is hashed
1312 * along with the TCP endpoint information to provide a unique
1313 * starting point for each pair of TCP endpoints. This defeats
1314 * attacks which rely on guessing the initial TCP sequence number.
1315 * This algorithm was suggested by Steve Bellovin.
1316 *
1317 * Using a very strong hash was taking an appreciable amount of the total
1318 * TCP connection establishment time, so this is a weaker hash,
1319 * compensated for by changing the secret periodically.
1320 */
1321
1322/* F, G and H are basic MD4 functions: selection, majority, parity */
1323#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
1324#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
1325#define H(x, y, z) ((x) ^ (y) ^ (z))
1326
1327/*
1328 * The generic round function. The application is so specific that
1329 * we don't bother protecting all the arguments with parens, as is generally
1330 * good macro practice, in favor of extra legibility.
1331 * Rotation is separate from addition to prevent recomputation
1332 */
1333#define ROUND(f, a, b, c, d, x, s) \
1334 (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s)))
1335#define K1 0
1336#define K2 013240474631UL
1337#define K3 015666365641UL
1338
1339#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1340
1341static __u32 twothirdsMD4Transform(__u32 const buf[4], __u32 const in[12])
1342{
1343 __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
1344
1345 /* Round 1 */
1346 ROUND(F, a, b, c, d, in[ 0] + K1, 3);
1347 ROUND(F, d, a, b, c, in[ 1] + K1, 7);
1348 ROUND(F, c, d, a, b, in[ 2] + K1, 11);
1349 ROUND(F, b, c, d, a, in[ 3] + K1, 19);
1350 ROUND(F, a, b, c, d, in[ 4] + K1, 3);
1351 ROUND(F, d, a, b, c, in[ 5] + K1, 7);
1352 ROUND(F, c, d, a, b, in[ 6] + K1, 11);
1353 ROUND(F, b, c, d, a, in[ 7] + K1, 19);
1354 ROUND(F, a, b, c, d, in[ 8] + K1, 3);
1355 ROUND(F, d, a, b, c, in[ 9] + K1, 7);
1356 ROUND(F, c, d, a, b, in[10] + K1, 11);
1357 ROUND(F, b, c, d, a, in[11] + K1, 19);
1358
1359 /* Round 2 */
1360 ROUND(G, a, b, c, d, in[ 1] + K2, 3);
1361 ROUND(G, d, a, b, c, in[ 3] + K2, 5);
1362 ROUND(G, c, d, a, b, in[ 5] + K2, 9);
1363 ROUND(G, b, c, d, a, in[ 7] + K2, 13);
1364 ROUND(G, a, b, c, d, in[ 9] + K2, 3);
1365 ROUND(G, d, a, b, c, in[11] + K2, 5);
1366 ROUND(G, c, d, a, b, in[ 0] + K2, 9);
1367 ROUND(G, b, c, d, a, in[ 2] + K2, 13);
1368 ROUND(G, a, b, c, d, in[ 4] + K2, 3);
1369 ROUND(G, d, a, b, c, in[ 6] + K2, 5);
1370 ROUND(G, c, d, a, b, in[ 8] + K2, 9);
1371 ROUND(G, b, c, d, a, in[10] + K2, 13);
1372
1373 /* Round 3 */
1374 ROUND(H, a, b, c, d, in[ 3] + K3, 3);
1375 ROUND(H, d, a, b, c, in[ 7] + K3, 9);
1376 ROUND(H, c, d, a, b, in[11] + K3, 11);
1377 ROUND(H, b, c, d, a, in[ 2] + K3, 15);
1378 ROUND(H, a, b, c, d, in[ 6] + K3, 3);
1379 ROUND(H, d, a, b, c, in[10] + K3, 9);
1380 ROUND(H, c, d, a, b, in[ 1] + K3, 11);
1381 ROUND(H, b, c, d, a, in[ 5] + K3, 15);
1382 ROUND(H, a, b, c, d, in[ 9] + K3, 3);
1383 ROUND(H, d, a, b, c, in[ 0] + K3, 9);
1384 ROUND(H, c, d, a, b, in[ 4] + K3, 11);
1385 ROUND(H, b, c, d, a, in[ 8] + K3, 15);
1386
1387 return buf[1] + b; /* "most hashed" word */
1388 /* Alternative: return sum of all words? */
1389}
1390#endif
1391
1392#undef ROUND
1393#undef F
1394#undef G
1395#undef H
1396#undef K1
1397#undef K2
1398#undef K3
1399
1400/* This should not be decreased so low that ISNs wrap too fast. */
1401#define REKEY_INTERVAL (300 * HZ)
1402/*
1403 * Bit layout of the tcp sequence numbers (before adding current time):
1404 * bit 24-31: increased after every key exchange
1405 * bit 0-23: hash(source,dest)
1406 *
1407 * The implementation is similar to the algorithm described
1408 * in the Appendix of RFC 1185, except that
1409 * - it uses a 1 MHz clock instead of a 250 kHz clock
1410 * - it performs a rekey every 5 minutes, which is equivalent
1411 * to a (source,dest) tulple dependent forward jump of the
1412 * clock by 0..2^(HASH_BITS+1)
1413 *
1414 * Thus the average ISN wraparound time is 68 minutes instead of
1415 * 4.55 hours.
1416 *
1417 * SMP cleanup and lock avoidance with poor man's RCU.
1418 * Manfred Spraul <manfred@colorfullife.com>
1419 *
1420 */
1421#define COUNT_BITS 8
1422#define COUNT_MASK ((1 << COUNT_BITS) - 1)
1423#define HASH_BITS 24
1424#define HASH_MASK ((1 << HASH_BITS) - 1)
1425 1304
1426static struct keydata { 1305static int __init random_int_secret_init(void)
1427 __u32 count; /* already shifted to the final position */
1428 __u32 secret[12];
1429} ____cacheline_aligned ip_keydata[2];
1430
1431static unsigned int ip_cnt;
1432
1433static void rekey_seq_generator(struct work_struct *work);
1434
1435static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator);
1436
1437/*
1438 * Lock avoidance:
1439 * The ISN generation runs lockless - it's just a hash over random data.
1440 * State changes happen every 5 minutes when the random key is replaced.
1441 * Synchronization is performed by having two copies of the hash function
1442 * state and rekey_seq_generator always updates the inactive copy.
1443 * The copy is then activated by updating ip_cnt.
1444 * The implementation breaks down if someone blocks the thread
1445 * that processes SYN requests for more than 5 minutes. Should never
1446 * happen, and even if that happens only a not perfectly compliant
1447 * ISN is generated, nothing fatal.
1448 */
1449static void rekey_seq_generator(struct work_struct *work)
1450{ 1306{
1451 struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)]; 1307 get_random_bytes(random_int_secret, sizeof(random_int_secret));
1452
1453 get_random_bytes(keyptr->secret, sizeof(keyptr->secret));
1454 keyptr->count = (ip_cnt & COUNT_MASK) << HASH_BITS;
1455 smp_wmb();
1456 ip_cnt++;
1457 schedule_delayed_work(&rekey_work,
1458 round_jiffies_relative(REKEY_INTERVAL));
1459}
1460
1461static inline struct keydata *get_keyptr(void)
1462{
1463 struct keydata *keyptr = &ip_keydata[ip_cnt & 1];
1464
1465 smp_rmb();
1466
1467 return keyptr;
1468}
1469
1470static __init int seqgen_init(void)
1471{
1472 rekey_seq_generator(NULL);
1473 return 0; 1308 return 0;
1474} 1309}
1475late_initcall(seqgen_init); 1310late_initcall(random_int_secret_init);
1476
1477#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1478__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
1479 __be16 sport, __be16 dport)
1480{
1481 __u32 seq;
1482 __u32 hash[12];
1483 struct keydata *keyptr = get_keyptr();
1484
1485 /* The procedure is the same as for IPv4, but addresses are longer.
1486 * Thus we must use twothirdsMD4Transform.
1487 */
1488
1489 memcpy(hash, saddr, 16);
1490 hash[4] = ((__force u16)sport << 16) + (__force u16)dport;
1491 memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7);
1492
1493 seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK;
1494 seq += keyptr->count;
1495
1496 seq += ktime_to_ns(ktime_get_real());
1497
1498 return seq;
1499}
1500EXPORT_SYMBOL(secure_tcpv6_sequence_number);
1501#endif
1502
1503/* The code below is shamelessly stolen from secure_tcp_sequence_number().
1504 * All blames to Andrey V. Savochkin <saw@msu.ru>.
1505 */
1506__u32 secure_ip_id(__be32 daddr)
1507{
1508 struct keydata *keyptr;
1509 __u32 hash[4];
1510
1511 keyptr = get_keyptr();
1512
1513 /*
1514 * Pick a unique starting offset for each IP destination.
1515 * The dest ip address is placed in the starting vector,
1516 * which is then hashed with random data.
1517 */
1518 hash[0] = (__force __u32)daddr;
1519 hash[1] = keyptr->secret[9];
1520 hash[2] = keyptr->secret[10];
1521 hash[3] = keyptr->secret[11];
1522
1523 return half_md4_transform(hash, keyptr->secret);
1524}
1525
1526__u32 secure_ipv6_id(const __be32 daddr[4])
1527{
1528 const struct keydata *keyptr;
1529 __u32 hash[4];
1530
1531 keyptr = get_keyptr();
1532
1533 hash[0] = (__force __u32)daddr[0];
1534 hash[1] = (__force __u32)daddr[1];
1535 hash[2] = (__force __u32)daddr[2];
1536 hash[3] = (__force __u32)daddr[3];
1537
1538 return half_md4_transform(hash, keyptr->secret);
1539}
1540
1541#ifdef CONFIG_INET
1542
1543__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
1544 __be16 sport, __be16 dport)
1545{
1546 __u32 seq;
1547 __u32 hash[4];
1548 struct keydata *keyptr = get_keyptr();
1549
1550 /*
1551 * Pick a unique starting offset for each TCP connection endpoints
1552 * (saddr, daddr, sport, dport).
1553 * Note that the words are placed into the starting vector, which is
1554 * then mixed with a partial MD4 over random data.
1555 */
1556 hash[0] = (__force u32)saddr;
1557 hash[1] = (__force u32)daddr;
1558 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
1559 hash[3] = keyptr->secret[11];
1560
1561 seq = half_md4_transform(hash, keyptr->secret) & HASH_MASK;
1562 seq += keyptr->count;
1563 /*
1564 * As close as possible to RFC 793, which
1565 * suggests using a 250 kHz clock.
1566 * Further reading shows this assumes 2 Mb/s networks.
1567 * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
1568 * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
1569 * we also need to limit the resolution so that the u32 seq
1570 * overlaps less than one time per MSL (2 minutes).
1571 * Choosing a clock of 64 ns period is OK. (period of 274 s)
1572 */
1573 seq += ktime_to_ns(ktime_get_real()) >> 6;
1574
1575 return seq;
1576}
1577
1578/* Generate secure starting point for ephemeral IPV4 transport port search */
1579u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
1580{
1581 struct keydata *keyptr = get_keyptr();
1582 u32 hash[4];
1583
1584 /*
1585 * Pick a unique starting offset for each ephemeral port search
1586 * (saddr, daddr, dport) and 48bits of random data.
1587 */
1588 hash[0] = (__force u32)saddr;
1589 hash[1] = (__force u32)daddr;
1590 hash[2] = (__force u32)dport ^ keyptr->secret[10];
1591 hash[3] = keyptr->secret[11];
1592
1593 return half_md4_transform(hash, keyptr->secret);
1594}
1595EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
1596
1597#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1598u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
1599 __be16 dport)
1600{
1601 struct keydata *keyptr = get_keyptr();
1602 u32 hash[12];
1603
1604 memcpy(hash, saddr, 16);
1605 hash[4] = (__force u32)dport;
1606 memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7);
1607
1608 return twothirdsMD4Transform((const __u32 *)daddr, hash);
1609}
1610#endif
1611
1612#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
1613/* Similar to secure_tcp_sequence_number but generate a 48 bit value
1614 * bit's 32-47 increase every key exchange
1615 * 0-31 hash(source, dest)
1616 */
1617u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
1618 __be16 sport, __be16 dport)
1619{
1620 u64 seq;
1621 __u32 hash[4];
1622 struct keydata *keyptr = get_keyptr();
1623
1624 hash[0] = (__force u32)saddr;
1625 hash[1] = (__force u32)daddr;
1626 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
1627 hash[3] = keyptr->secret[11];
1628
1629 seq = half_md4_transform(hash, keyptr->secret);
1630 seq |= ((u64)keyptr->count) << (32 - HASH_BITS);
1631
1632 seq += ktime_to_ns(ktime_get_real());
1633 seq &= (1ull << 48) - 1;
1634
1635 return seq;
1636}
1637EXPORT_SYMBOL(secure_dccp_sequence_number);
1638#endif
1639
1640#endif /* CONFIG_INET */
1641
1642 1311
1643/* 1312/*
1644 * Get a random word for internal kernel use only. Similar to urandom but 1313 * Get a random word for internal kernel use only. Similar to urandom but
@@ -1646,17 +1315,15 @@ EXPORT_SYMBOL(secure_dccp_sequence_number);
1646 * value is not cryptographically secure but for several uses the cost of 1315 * value is not cryptographically secure but for several uses the cost of
1647 * depleting entropy is too high 1316 * depleting entropy is too high
1648 */ 1317 */
1649DEFINE_PER_CPU(__u32 [4], get_random_int_hash); 1318DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
1650unsigned int get_random_int(void) 1319unsigned int get_random_int(void)
1651{ 1320{
1652 struct keydata *keyptr;
1653 __u32 *hash = get_cpu_var(get_random_int_hash); 1321 __u32 *hash = get_cpu_var(get_random_int_hash);
1654 int ret; 1322 unsigned int ret;
1655 1323
1656 keyptr = get_keyptr();
1657 hash[0] += current->pid + jiffies + get_cycles(); 1324 hash[0] += current->pid + jiffies + get_cycles();
1658 1325 md5_transform(hash, random_int_secret);
1659 ret = half_md4_transform(hash, keyptr->secret); 1326 ret = hash[0];
1660 put_cpu_var(get_random_int_hash); 1327 put_cpu_var(get_random_int_hash);
1661 1328
1662 return ret; 1329 return ret;
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
new file mode 100644
index 00000000000..cf3ee008dca
--- /dev/null
+++ b/drivers/char/tile-srom.c
@@ -0,0 +1,481 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * SPI Flash ROM driver
15 *
16 * This source code is derived from code provided in "Linux Device
17 * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and
18 * Greg Kroah-Hartman, published by O'Reilly Media, Inc.
19 */
20
21#include <linux/module.h>
22#include <linux/moduleparam.h>
23#include <linux/init.h>
24#include <linux/kernel.h> /* printk() */
25#include <linux/slab.h> /* kmalloc() */
26#include <linux/fs.h> /* everything... */
27#include <linux/errno.h> /* error codes */
28#include <linux/types.h> /* size_t */
29#include <linux/proc_fs.h>
30#include <linux/fcntl.h> /* O_ACCMODE */
31#include <linux/aio.h>
32#include <linux/pagemap.h>
33#include <linux/hugetlb.h>
34#include <linux/uaccess.h>
35#include <linux/platform_device.h>
36#include <hv/hypervisor.h>
37#include <linux/ioctl.h>
38#include <linux/cdev.h>
39#include <linux/delay.h>
40#include <hv/drv_srom_intf.h>
41
42/*
43 * Size of our hypervisor I/O requests. We break up large transfers
44 * so that we don't spend large uninterrupted spans of time in the
45 * hypervisor. Erasing an SROM sector takes a significant fraction of
46 * a second, so if we allowed the user to, say, do one I/O to write the
47 * entire ROM, we'd get soft lockup timeouts, or worse.
48 */
49#define SROM_CHUNK_SIZE ((size_t)4096)
50
51/*
52 * When hypervisor is busy (e.g. erasing), poll the status periodically.
53 */
54
55/*
56 * Interval to poll the state in msec
57 */
58#define SROM_WAIT_TRY_INTERVAL 20
59
60/*
61 * Maximum times to poll the state
62 */
63#define SROM_MAX_WAIT_TRY_TIMES 1000
64
65struct srom_dev {
66 int hv_devhdl; /* Handle for hypervisor device */
67 u32 total_size; /* Size of this device */
68 u32 sector_size; /* Size of a sector */
69 u32 page_size; /* Size of a page */
70 struct mutex lock; /* Allow only one accessor at a time */
71};
72
73static int srom_major; /* Dynamic major by default */
74module_param(srom_major, int, 0);
75MODULE_AUTHOR("Tilera Corporation");
76MODULE_LICENSE("GPL");
77
78static int srom_devs; /* Number of SROM partitions */
79static struct cdev srom_cdev;
80static struct class *srom_class;
81static struct srom_dev *srom_devices;
82
83/*
84 * Handle calling the hypervisor and managing EAGAIN/EBUSY.
85 */
86
87static ssize_t _srom_read(int hv_devhdl, void *buf,
88 loff_t off, size_t count)
89{
90 int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
91 for (;;) {
92 retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf,
93 count, off);
94 if (retval >= 0)
95 return retval;
96 if (retval == HV_EAGAIN)
97 continue;
98 if (retval == HV_EBUSY && --retries > 0) {
99 msleep(SROM_WAIT_TRY_INTERVAL);
100 continue;
101 }
102 pr_err("_srom_read: error %d\n", retval);
103 return -EIO;
104 }
105}
106
107static ssize_t _srom_write(int hv_devhdl, const void *buf,
108 loff_t off, size_t count)
109{
110 int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
111 for (;;) {
112 retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf,
113 count, off);
114 if (retval >= 0)
115 return retval;
116 if (retval == HV_EAGAIN)
117 continue;
118 if (retval == HV_EBUSY && --retries > 0) {
119 msleep(SROM_WAIT_TRY_INTERVAL);
120 continue;
121 }
122 pr_err("_srom_write: error %d\n", retval);
123 return -EIO;
124 }
125}
126
127/**
128 * srom_open() - Device open routine.
129 * @inode: Inode for this device.
130 * @filp: File for this specific open of the device.
131 *
132 * Returns zero, or an error code.
133 */
134static int srom_open(struct inode *inode, struct file *filp)
135{
136 filp->private_data = &srom_devices[iminor(inode)];
137 return 0;
138}
139
140
141/**
142 * srom_release() - Device release routine.
143 * @inode: Inode for this device.
144 * @filp: File for this specific open of the device.
145 *
146 * Returns zero, or an error code.
147 */
148static int srom_release(struct inode *inode, struct file *filp)
149{
150 struct srom_dev *srom = filp->private_data;
151 char dummy;
152
153 /* Make sure we've flushed anything written to the ROM. */
154 mutex_lock(&srom->lock);
155 if (srom->hv_devhdl >= 0)
156 _srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1);
157 mutex_unlock(&srom->lock);
158
159 filp->private_data = NULL;
160
161 return 0;
162}
163
164
165/**
166 * srom_read() - Read data from the device.
167 * @filp: File for this specific open of the device.
168 * @buf: User's data buffer.
169 * @count: Number of bytes requested.
170 * @f_pos: File position.
171 *
172 * Returns number of bytes read, or an error code.
173 */
174static ssize_t srom_read(struct file *filp, char __user *buf,
175 size_t count, loff_t *f_pos)
176{
177 int retval = 0;
178 void *kernbuf;
179 struct srom_dev *srom = filp->private_data;
180
181 kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
182 if (!kernbuf)
183 return -ENOMEM;
184
185 if (mutex_lock_interruptible(&srom->lock)) {
186 retval = -ERESTARTSYS;
187 kfree(kernbuf);
188 return retval;
189 }
190
191 while (count) {
192 int hv_retval;
193 int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
194
195 hv_retval = _srom_read(srom->hv_devhdl, kernbuf,
196 *f_pos, bytes_this_pass);
197 if (hv_retval > 0) {
198 if (copy_to_user(buf, kernbuf, hv_retval) != 0) {
199 retval = -EFAULT;
200 break;
201 }
202 } else if (hv_retval <= 0) {
203 if (retval == 0)
204 retval = hv_retval;
205 break;
206 }
207
208 retval += hv_retval;
209 *f_pos += hv_retval;
210 buf += hv_retval;
211 count -= hv_retval;
212 }
213
214 mutex_unlock(&srom->lock);
215 kfree(kernbuf);
216
217 return retval;
218}
219
220/**
221 * srom_write() - Write data to the device.
222 * @filp: File for this specific open of the device.
223 * @buf: User's data buffer.
224 * @count: Number of bytes requested.
225 * @f_pos: File position.
226 *
227 * Returns number of bytes written, or an error code.
228 */
229static ssize_t srom_write(struct file *filp, const char __user *buf,
230 size_t count, loff_t *f_pos)
231{
232 int retval = 0;
233 void *kernbuf;
234 struct srom_dev *srom = filp->private_data;
235
236 kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
237 if (!kernbuf)
238 return -ENOMEM;
239
240 if (mutex_lock_interruptible(&srom->lock)) {
241 retval = -ERESTARTSYS;
242 kfree(kernbuf);
243 return retval;
244 }
245
246 while (count) {
247 int hv_retval;
248 int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
249
250 if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) {
251 retval = -EFAULT;
252 break;
253 }
254
255 hv_retval = _srom_write(srom->hv_devhdl, kernbuf,
256 *f_pos, bytes_this_pass);
257 if (hv_retval <= 0) {
258 if (retval == 0)
259 retval = hv_retval;
260 break;
261 }
262
263 retval += hv_retval;
264 *f_pos += hv_retval;
265 buf += hv_retval;
266 count -= hv_retval;
267 }
268
269 mutex_unlock(&srom->lock);
270 kfree(kernbuf);
271
272 return retval;
273}
274
275/* Provide our own implementation so we can use srom->total_size. */
276loff_t srom_llseek(struct file *filp, loff_t offset, int origin)
277{
278 struct srom_dev *srom = filp->private_data;
279
280 if (mutex_lock_interruptible(&srom->lock))
281 return -ERESTARTSYS;
282
283 switch (origin) {
284 case SEEK_END:
285 offset += srom->total_size;
286 break;
287 case SEEK_CUR:
288 offset += filp->f_pos;
289 break;
290 }
291
292 if (offset < 0 || offset > srom->total_size) {
293 offset = -EINVAL;
294 } else {
295 filp->f_pos = offset;
296 filp->f_version = 0;
297 }
298
299 mutex_unlock(&srom->lock);
300
301 return offset;
302}
303
304static ssize_t total_show(struct device *dev,
305 struct device_attribute *attr, char *buf)
306{
307 struct srom_dev *srom = dev_get_drvdata(dev);
308 return sprintf(buf, "%u\n", srom->total_size);
309}
310
311static ssize_t sector_show(struct device *dev,
312 struct device_attribute *attr, char *buf)
313{
314 struct srom_dev *srom = dev_get_drvdata(dev);
315 return sprintf(buf, "%u\n", srom->sector_size);
316}
317
318static ssize_t page_show(struct device *dev,
319 struct device_attribute *attr, char *buf)
320{
321 struct srom_dev *srom = dev_get_drvdata(dev);
322 return sprintf(buf, "%u\n", srom->page_size);
323}
324
325static struct device_attribute srom_dev_attrs[] = {
326 __ATTR(total_size, S_IRUGO, total_show, NULL),
327 __ATTR(sector_size, S_IRUGO, sector_show, NULL),
328 __ATTR(page_size, S_IRUGO, page_show, NULL),
329 __ATTR_NULL
330};
331
332static char *srom_devnode(struct device *dev, mode_t *mode)
333{
334 *mode = S_IRUGO | S_IWUSR;
335 return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
336}
337
338/*
339 * The fops
340 */
341static const struct file_operations srom_fops = {
342 .owner = THIS_MODULE,
343 .llseek = srom_llseek,
344 .read = srom_read,
345 .write = srom_write,
346 .open = srom_open,
347 .release = srom_release,
348};
349
350/**
351 * srom_setup_minor() - Initialize per-minor information.
352 * @srom: Per-device SROM state.
353 * @index: Device to set up.
354 */
355static int srom_setup_minor(struct srom_dev *srom, int index)
356{
357 struct device *dev;
358 int devhdl = srom->hv_devhdl;
359
360 mutex_init(&srom->lock);
361
362 if (_srom_read(devhdl, &srom->total_size,
363 SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0)
364 return -EIO;
365 if (_srom_read(devhdl, &srom->sector_size,
366 SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0)
367 return -EIO;
368 if (_srom_read(devhdl, &srom->page_size,
369 SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
370 return -EIO;
371
372 dev = device_create(srom_class, &platform_bus,
373 MKDEV(srom_major, index), srom, "%d", index);
374 return IS_ERR(dev) ? PTR_ERR(dev) : 0;
375}
376
377/** srom_init() - Initialize the driver's module. */
378static int srom_init(void)
379{
380 int result, i;
381 dev_t dev = MKDEV(srom_major, 0);
382
383 /*
384 * Start with a plausible number of partitions; the krealloc() call
385 * below will yield about log(srom_devs) additional allocations.
386 */
387 srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
388
389 /* Discover the number of srom partitions. */
390 for (i = 0; ; i++) {
391 int devhdl;
392 char buf[20];
393 struct srom_dev *new_srom_devices =
394 krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
395 GFP_KERNEL | __GFP_ZERO);
396 if (!new_srom_devices) {
397 result = -ENOMEM;
398 goto fail_mem;
399 }
400 srom_devices = new_srom_devices;
401 sprintf(buf, "srom/0/%d", i);
402 devhdl = hv_dev_open((HV_VirtAddr)buf, 0);
403 if (devhdl < 0) {
404 if (devhdl != HV_ENODEV)
405 pr_notice("srom/%d: hv_dev_open failed: %d.\n",
406 i, devhdl);
407 break;
408 }
409 srom_devices[i].hv_devhdl = devhdl;
410 }
411 srom_devs = i;
412
413 /* Bail out early if we have no partitions at all. */
414 if (srom_devs == 0) {
415 result = -ENODEV;
416 goto fail_mem;
417 }
418
419 /* Register our major, and accept a dynamic number. */
420 if (srom_major)
421 result = register_chrdev_region(dev, srom_devs, "srom");
422 else {
423 result = alloc_chrdev_region(&dev, 0, srom_devs, "srom");
424 srom_major = MAJOR(dev);
425 }
426 if (result < 0)
427 goto fail_mem;
428
429 /* Register a character device. */
430 cdev_init(&srom_cdev, &srom_fops);
431 srom_cdev.owner = THIS_MODULE;
432 srom_cdev.ops = &srom_fops;
433 result = cdev_add(&srom_cdev, dev, srom_devs);
434 if (result < 0)
435 goto fail_chrdev;
436
437 /* Create a sysfs class. */
438 srom_class = class_create(THIS_MODULE, "srom");
439 if (IS_ERR(srom_class)) {
440 result = PTR_ERR(srom_class);
441 goto fail_cdev;
442 }
443 srom_class->dev_attrs = srom_dev_attrs;
444 srom_class->devnode = srom_devnode;
445
446 /* Do per-partition initialization */
447 for (i = 0; i < srom_devs; i++) {
448 result = srom_setup_minor(srom_devices + i, i);
449 if (result < 0)
450 goto fail_class;
451 }
452
453 return 0;
454
455fail_class:
456 for (i = 0; i < srom_devs; i++)
457 device_destroy(srom_class, MKDEV(srom_major, i));
458 class_destroy(srom_class);
459fail_cdev:
460 cdev_del(&srom_cdev);
461fail_chrdev:
462 unregister_chrdev_region(dev, srom_devs);
463fail_mem:
464 kfree(srom_devices);
465 return result;
466}
467
468/** srom_cleanup() - Clean up the driver's module. */
469static void srom_cleanup(void)
470{
471 int i;
472 for (i = 0; i < srom_devs; i++)
473 device_destroy(srom_class, MKDEV(srom_major, i));
474 class_destroy(srom_class);
475 cdev_del(&srom_cdev);
476 unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs);
477 kfree(srom_devices);
478}
479
480module_init(srom_init);
481module_exit(srom_cleanup);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7fc2f108f49..3f4051a7c5a 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -80,7 +80,7 @@ enum tis_defaults {
80static LIST_HEAD(tis_chips); 80static LIST_HEAD(tis_chips);
81static DEFINE_SPINLOCK(tis_lock); 81static DEFINE_SPINLOCK(tis_lock);
82 82
83#ifdef CONFIG_PNP 83#if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
84static int is_itpm(struct pnp_dev *dev) 84static int is_itpm(struct pnp_dev *dev)
85{ 85{
86 struct acpi_device *acpi = pnp_acpi_device(dev); 86 struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev)
93 93
94 return 0; 94 return 0;
95} 95}
96#else
97static inline int is_itpm(struct pnp_dev *dev)
98{
99 return 0;
100}
96#endif 101#endif
97 102
98static int check_locality(struct tpm_chip *chip, int l) 103static int check_locality(struct tpm_chip *chip, int l)
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 3ee1fdb31ea..e55814bc0d0 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -57,6 +57,7 @@ void proc_fork_connector(struct task_struct *task)
57 struct proc_event *ev; 57 struct proc_event *ev;
58 __u8 buffer[CN_PROC_MSG_SIZE]; 58 __u8 buffer[CN_PROC_MSG_SIZE];
59 struct timespec ts; 59 struct timespec ts;
60 struct task_struct *parent;
60 61
61 if (atomic_read(&proc_event_num_listeners) < 1) 62 if (atomic_read(&proc_event_num_listeners) < 1)
62 return; 63 return;
@@ -67,8 +68,11 @@ void proc_fork_connector(struct task_struct *task)
67 ktime_get_ts(&ts); /* get high res monotonic timestamp */ 68 ktime_get_ts(&ts); /* get high res monotonic timestamp */
68 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); 69 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
69 ev->what = PROC_EVENT_FORK; 70 ev->what = PROC_EVENT_FORK;
70 ev->event_data.fork.parent_pid = task->real_parent->pid; 71 rcu_read_lock();
71 ev->event_data.fork.parent_tgid = task->real_parent->tgid; 72 parent = rcu_dereference(task->real_parent);
73 ev->event_data.fork.parent_pid = parent->pid;
74 ev->event_data.fork.parent_tgid = parent->tgid;
75 rcu_read_unlock();
72 ev->event_data.fork.child_pid = task->pid; 76 ev->event_data.fork.child_pid = task->pid;
73 ev->event_data.fork.child_tgid = task->tgid; 77 ev->event_data.fork.child_tgid = task->tgid;
74 78
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index bf5092455a8..d4c54237288 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -25,9 +25,19 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
25 25
26DEFINE_MUTEX(cpuidle_lock); 26DEFINE_MUTEX(cpuidle_lock);
27LIST_HEAD(cpuidle_detected_devices); 27LIST_HEAD(cpuidle_detected_devices);
28static void (*pm_idle_old)(void);
29 28
30static int enabled_devices; 29static int enabled_devices;
30static int off __read_mostly;
31static int initialized __read_mostly;
32
33int cpuidle_disabled(void)
34{
35 return off;
36}
37void disable_cpuidle(void)
38{
39 off = 1;
40}
31 41
32#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT) 42#if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
33static void cpuidle_kick_cpus(void) 43static void cpuidle_kick_cpus(void)
@@ -46,25 +56,23 @@ static int __cpuidle_register_device(struct cpuidle_device *dev);
46 * cpuidle_idle_call - the main idle loop 56 * cpuidle_idle_call - the main idle loop
47 * 57 *
48 * NOTE: no locks or semaphores should be used here 58 * NOTE: no locks or semaphores should be used here
59 * return non-zero on failure
49 */ 60 */
50static void cpuidle_idle_call(void) 61int cpuidle_idle_call(void)
51{ 62{
52 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); 63 struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
53 struct cpuidle_state *target_state; 64 struct cpuidle_state *target_state;
54 int next_state; 65 int next_state;
55 66
67 if (off)
68 return -ENODEV;
69
70 if (!initialized)
71 return -ENODEV;
72
56 /* check if the device is ready */ 73 /* check if the device is ready */
57 if (!dev || !dev->enabled) { 74 if (!dev || !dev->enabled)
58 if (pm_idle_old) 75 return -EBUSY;
59 pm_idle_old();
60 else
61#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
62 default_idle();
63#else
64 local_irq_enable();
65#endif
66 return;
67 }
68 76
69#if 0 77#if 0
70 /* shows regressions, re-enable for 2.6.29 */ 78 /* shows regressions, re-enable for 2.6.29 */
@@ -89,7 +97,7 @@ static void cpuidle_idle_call(void)
89 next_state = cpuidle_curr_governor->select(dev); 97 next_state = cpuidle_curr_governor->select(dev);
90 if (need_resched()) { 98 if (need_resched()) {
91 local_irq_enable(); 99 local_irq_enable();
92 return; 100 return 0;
93 } 101 }
94 102
95 target_state = &dev->states[next_state]; 103 target_state = &dev->states[next_state];
@@ -114,6 +122,8 @@ static void cpuidle_idle_call(void)
114 /* give the governor an opportunity to reflect on the outcome */ 122 /* give the governor an opportunity to reflect on the outcome */
115 if (cpuidle_curr_governor->reflect) 123 if (cpuidle_curr_governor->reflect)
116 cpuidle_curr_governor->reflect(dev); 124 cpuidle_curr_governor->reflect(dev);
125
126 return 0;
117} 127}
118 128
119/** 129/**
@@ -121,10 +131,10 @@ static void cpuidle_idle_call(void)
121 */ 131 */
122void cpuidle_install_idle_handler(void) 132void cpuidle_install_idle_handler(void)
123{ 133{
124 if (enabled_devices && (pm_idle != cpuidle_idle_call)) { 134 if (enabled_devices) {
125 /* Make sure all changes finished before we switch to new idle */ 135 /* Make sure all changes finished before we switch to new idle */
126 smp_wmb(); 136 smp_wmb();
127 pm_idle = cpuidle_idle_call; 137 initialized = 1;
128 } 138 }
129} 139}
130 140
@@ -133,8 +143,8 @@ void cpuidle_install_idle_handler(void)
133 */ 143 */
134void cpuidle_uninstall_idle_handler(void) 144void cpuidle_uninstall_idle_handler(void)
135{ 145{
136 if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) { 146 if (enabled_devices) {
137 pm_idle = pm_idle_old; 147 initialized = 0;
138 cpuidle_kick_cpus(); 148 cpuidle_kick_cpus();
139 } 149 }
140} 150}
@@ -427,7 +437,8 @@ static int __init cpuidle_init(void)
427{ 437{
428 int ret; 438 int ret;
429 439
430 pm_idle_old = pm_idle; 440 if (cpuidle_disabled())
441 return -ENODEV;
431 442
432 ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); 443 ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
433 if (ret) 444 if (ret)
@@ -438,4 +449,5 @@ static int __init cpuidle_init(void)
438 return 0; 449 return 0;
439} 450}
440 451
452module_param(off, int, 0444);
441core_initcall(cpuidle_init); 453core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 33e50d556f1..38c3fd8b9d7 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors;
13extern struct list_head cpuidle_detected_devices; 13extern struct list_head cpuidle_detected_devices;
14extern struct mutex cpuidle_lock; 14extern struct mutex cpuidle_lock;
15extern spinlock_t cpuidle_driver_lock; 15extern spinlock_t cpuidle_driver_lock;
16extern int cpuidle_disabled(void);
16 17
17/* idle loop */ 18/* idle loop */
18extern void cpuidle_install_idle_handler(void); 19extern void cpuidle_install_idle_handler(void);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index fd1601e3d12..3f7e3cedd13 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -26,6 +26,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
26 if (!drv) 26 if (!drv)
27 return -EINVAL; 27 return -EINVAL;
28 28
29 if (cpuidle_disabled())
30 return -ENODEV;
31
29 spin_lock(&cpuidle_driver_lock); 32 spin_lock(&cpuidle_driver_lock);
30 if (cpuidle_curr_driver) { 33 if (cpuidle_curr_driver) {
31 spin_unlock(&cpuidle_driver_lock); 34 spin_unlock(&cpuidle_driver_lock);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 724c164d31c..ea2f8e7aa24 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
81 if (!gov || !gov->select) 81 if (!gov || !gov->select)
82 return -EINVAL; 82 return -EINVAL;
83 83
84 if (cpuidle_disabled())
85 return -ENODEV;
86
84 mutex_lock(&cpuidle_lock); 87 mutex_lock(&cpuidle_lock);
85 if (__cpuidle_find_governor(gov->name) == NULL) { 88 if (__cpuidle_find_governor(gov->name) == NULL) {
86 ret = 0; 89 ret = 0;
diff --git a/drivers/dma/TODO b/drivers/dma/TODO
index a4af8589330..734ed0206cd 100644
--- a/drivers/dma/TODO
+++ b/drivers/dma/TODO
@@ -9,6 +9,5 @@ TODO for slave dma
9 - mxs-dma.c 9 - mxs-dma.c
10 - dw_dmac 10 - dw_dmac
11 - intel_mid_dma 11 - intel_mid_dma
12 - ste_dma40
134. Check other subsystems for dma drivers and merge/move to dmaengine 124. Check other subsystems for dma drivers and merge/move to dmaengine
145. Remove dma_slave_config's dma direction. 135. Remove dma_slave_config's dma direction.
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e6d7228b147..196a7378d33 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -156,14 +156,10 @@ struct pl08x_driver_data {
156#define PL08X_BOUNDARY_SHIFT (10) /* 1KB 0x400 */ 156#define PL08X_BOUNDARY_SHIFT (10) /* 1KB 0x400 */
157#define PL08X_BOUNDARY_SIZE (1 << PL08X_BOUNDARY_SHIFT) 157#define PL08X_BOUNDARY_SIZE (1 << PL08X_BOUNDARY_SHIFT)
158 158
159/* Minimum period between work queue runs */
160#define PL08X_WQ_PERIODMIN 20
161
162/* Size (bytes) of each LLI buffer allocated for one transfer */ 159/* Size (bytes) of each LLI buffer allocated for one transfer */
163# define PL08X_LLI_TSFR_SIZE 0x2000 160# define PL08X_LLI_TSFR_SIZE 0x2000
164 161
165/* Maximum times we call dma_pool_alloc on this pool without freeing */ 162/* Maximum times we call dma_pool_alloc on this pool without freeing */
166#define PL08X_MAX_ALLOCS 0x40
167#define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli)) 163#define MAX_NUM_TSFR_LLIS (PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
168#define PL08X_ALIGN 8 164#define PL08X_ALIGN 8
169 165
@@ -495,10 +491,10 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
495 491
496struct pl08x_lli_build_data { 492struct pl08x_lli_build_data {
497 struct pl08x_txd *txd; 493 struct pl08x_txd *txd;
498 struct pl08x_driver_data *pl08x;
499 struct pl08x_bus_data srcbus; 494 struct pl08x_bus_data srcbus;
500 struct pl08x_bus_data dstbus; 495 struct pl08x_bus_data dstbus;
501 size_t remainder; 496 size_t remainder;
497 u32 lli_bus;
502}; 498};
503 499
504/* 500/*
@@ -551,8 +547,7 @@ static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
551 llis_va[num_llis].src = bd->srcbus.addr; 547 llis_va[num_llis].src = bd->srcbus.addr;
552 llis_va[num_llis].dst = bd->dstbus.addr; 548 llis_va[num_llis].dst = bd->dstbus.addr;
553 llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli); 549 llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli);
554 if (bd->pl08x->lli_buses & PL08X_AHB2) 550 llis_va[num_llis].lli |= bd->lli_bus;
555 llis_va[num_llis].lli |= PL080_LLI_LM_AHB2;
556 551
557 if (cctl & PL080_CONTROL_SRC_INCR) 552 if (cctl & PL080_CONTROL_SRC_INCR)
558 bd->srcbus.addr += len; 553 bd->srcbus.addr += len;
@@ -605,9 +600,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
605 cctl = txd->cctl; 600 cctl = txd->cctl;
606 601
607 bd.txd = txd; 602 bd.txd = txd;
608 bd.pl08x = pl08x;
609 bd.srcbus.addr = txd->src_addr; 603 bd.srcbus.addr = txd->src_addr;
610 bd.dstbus.addr = txd->dst_addr; 604 bd.dstbus.addr = txd->dst_addr;
605 bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0;
611 606
612 /* Find maximum width of the source bus */ 607 /* Find maximum width of the source bus */
613 bd.srcbus.maxwidth = 608 bd.srcbus.maxwidth =
@@ -622,25 +617,15 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
622 /* Set up the bus widths to the maximum */ 617 /* Set up the bus widths to the maximum */
623 bd.srcbus.buswidth = bd.srcbus.maxwidth; 618 bd.srcbus.buswidth = bd.srcbus.maxwidth;
624 bd.dstbus.buswidth = bd.dstbus.maxwidth; 619 bd.dstbus.buswidth = bd.dstbus.maxwidth;
625 dev_vdbg(&pl08x->adev->dev,
626 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
627 __func__, bd.srcbus.buswidth, bd.dstbus.buswidth);
628
629 620
630 /* 621 /*
631 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths) 622 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
632 */ 623 */
633 max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) * 624 max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) *
634 PL080_CONTROL_TRANSFER_SIZE_MASK; 625 PL080_CONTROL_TRANSFER_SIZE_MASK;
635 dev_vdbg(&pl08x->adev->dev,
636 "%s max bytes per lli = %zu\n",
637 __func__, max_bytes_per_lli);
638 626
639 /* We need to count this down to zero */ 627 /* We need to count this down to zero */
640 bd.remainder = txd->len; 628 bd.remainder = txd->len;
641 dev_vdbg(&pl08x->adev->dev,
642 "%s remainder = %zu\n",
643 __func__, bd.remainder);
644 629
645 /* 630 /*
646 * Choose bus to align to 631 * Choose bus to align to
@@ -649,6 +634,16 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
649 */ 634 */
650 pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl); 635 pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
651 636
637 dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n",
638 bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
639 bd.srcbus.buswidth,
640 bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "",
641 bd.dstbus.buswidth,
642 bd.remainder, max_bytes_per_lli);
643 dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
644 mbus == &bd.srcbus ? "src" : "dst",
645 sbus == &bd.srcbus ? "src" : "dst");
646
652 if (txd->len < mbus->buswidth) { 647 if (txd->len < mbus->buswidth) {
653 /* Less than a bus width available - send as single bytes */ 648 /* Less than a bus width available - send as single bytes */
654 while (bd.remainder) { 649 while (bd.remainder) {
@@ -840,15 +835,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
840 { 835 {
841 int i; 836 int i;
842 837
838 dev_vdbg(&pl08x->adev->dev,
839 "%-3s %-9s %-10s %-10s %-10s %s\n",
840 "lli", "", "csrc", "cdst", "clli", "cctl");
843 for (i = 0; i < num_llis; i++) { 841 for (i = 0; i < num_llis; i++) {
844 dev_vdbg(&pl08x->adev->dev, 842 dev_vdbg(&pl08x->adev->dev,
845 "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n", 843 "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
846 i, 844 i, &llis_va[i], llis_va[i].src,
847 &llis_va[i], 845 llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl
848 llis_va[i].src,
849 llis_va[i].dst,
850 llis_va[i].cctl,
851 llis_va[i].lli
852 ); 846 );
853 } 847 }
854 } 848 }
@@ -1054,64 +1048,105 @@ pl08x_dma_tx_status(struct dma_chan *chan,
1054 1048
1055/* PrimeCell DMA extension */ 1049/* PrimeCell DMA extension */
1056struct burst_table { 1050struct burst_table {
1057 int burstwords; 1051 u32 burstwords;
1058 u32 reg; 1052 u32 reg;
1059}; 1053};
1060 1054
1061static const struct burst_table burst_sizes[] = { 1055static const struct burst_table burst_sizes[] = {
1062 { 1056 {
1063 .burstwords = 256, 1057 .burstwords = 256,
1064 .reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) | 1058 .reg = PL080_BSIZE_256,
1065 (PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT),
1066 }, 1059 },
1067 { 1060 {
1068 .burstwords = 128, 1061 .burstwords = 128,
1069 .reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) | 1062 .reg = PL080_BSIZE_128,
1070 (PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT),
1071 }, 1063 },
1072 { 1064 {
1073 .burstwords = 64, 1065 .burstwords = 64,
1074 .reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) | 1066 .reg = PL080_BSIZE_64,
1075 (PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT),
1076 }, 1067 },
1077 { 1068 {
1078 .burstwords = 32, 1069 .burstwords = 32,
1079 .reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) | 1070 .reg = PL080_BSIZE_32,
1080 (PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT),
1081 }, 1071 },
1082 { 1072 {
1083 .burstwords = 16, 1073 .burstwords = 16,
1084 .reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) | 1074 .reg = PL080_BSIZE_16,
1085 (PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT),
1086 }, 1075 },
1087 { 1076 {
1088 .burstwords = 8, 1077 .burstwords = 8,
1089 .reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) | 1078 .reg = PL080_BSIZE_8,
1090 (PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT),
1091 }, 1079 },
1092 { 1080 {
1093 .burstwords = 4, 1081 .burstwords = 4,
1094 .reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) | 1082 .reg = PL080_BSIZE_4,
1095 (PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT),
1096 }, 1083 },
1097 { 1084 {
1098 .burstwords = 1, 1085 .burstwords = 0,
1099 .reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | 1086 .reg = PL080_BSIZE_1,
1100 (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT),
1101 }, 1087 },
1102}; 1088};
1103 1089
1090/*
1091 * Given the source and destination available bus masks, select which
1092 * will be routed to each port. We try to have source and destination
1093 * on separate ports, but always respect the allowable settings.
1094 */
1095static u32 pl08x_select_bus(u8 src, u8 dst)
1096{
1097 u32 cctl = 0;
1098
1099 if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
1100 cctl |= PL080_CONTROL_DST_AHB2;
1101 if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
1102 cctl |= PL080_CONTROL_SRC_AHB2;
1103
1104 return cctl;
1105}
1106
1107static u32 pl08x_cctl(u32 cctl)
1108{
1109 cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
1110 PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
1111 PL080_CONTROL_PROT_MASK);
1112
1113 /* Access the cell in privileged mode, non-bufferable, non-cacheable */
1114 return cctl | PL080_CONTROL_PROT_SYS;
1115}
1116
1117static u32 pl08x_width(enum dma_slave_buswidth width)
1118{
1119 switch (width) {
1120 case DMA_SLAVE_BUSWIDTH_1_BYTE:
1121 return PL080_WIDTH_8BIT;
1122 case DMA_SLAVE_BUSWIDTH_2_BYTES:
1123 return PL080_WIDTH_16BIT;
1124 case DMA_SLAVE_BUSWIDTH_4_BYTES:
1125 return PL080_WIDTH_32BIT;
1126 default:
1127 return ~0;
1128 }
1129}
1130
1131static u32 pl08x_burst(u32 maxburst)
1132{
1133 int i;
1134
1135 for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
1136 if (burst_sizes[i].burstwords <= maxburst)
1137 break;
1138
1139 return burst_sizes[i].reg;
1140}
1141
1104static int dma_set_runtime_config(struct dma_chan *chan, 1142static int dma_set_runtime_config(struct dma_chan *chan,
1105 struct dma_slave_config *config) 1143 struct dma_slave_config *config)
1106{ 1144{
1107 struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); 1145 struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
1108 struct pl08x_driver_data *pl08x = plchan->host; 1146 struct pl08x_driver_data *pl08x = plchan->host;
1109 struct pl08x_channel_data *cd = plchan->cd;
1110 enum dma_slave_buswidth addr_width; 1147 enum dma_slave_buswidth addr_width;
1111 dma_addr_t addr; 1148 u32 width, burst, maxburst;
1112 u32 maxburst;
1113 u32 cctl = 0; 1149 u32 cctl = 0;
1114 int i;
1115 1150
1116 if (!plchan->slave) 1151 if (!plchan->slave)
1117 return -EINVAL; 1152 return -EINVAL;
@@ -1119,11 +1154,9 @@ static int dma_set_runtime_config(struct dma_chan *chan,
1119 /* Transfer direction */ 1154 /* Transfer direction */
1120 plchan->runtime_direction = config->direction; 1155 plchan->runtime_direction = config->direction;
1121 if (config->direction == DMA_TO_DEVICE) { 1156 if (config->direction == DMA_TO_DEVICE) {
1122 addr = config->dst_addr;
1123 addr_width = config->dst_addr_width; 1157 addr_width = config->dst_addr_width;
1124 maxburst = config->dst_maxburst; 1158 maxburst = config->dst_maxburst;
1125 } else if (config->direction == DMA_FROM_DEVICE) { 1159 } else if (config->direction == DMA_FROM_DEVICE) {
1126 addr = config->src_addr;
1127 addr_width = config->src_addr_width; 1160 addr_width = config->src_addr_width;
1128 maxburst = config->src_maxburst; 1161 maxburst = config->src_maxburst;
1129 } else { 1162 } else {
@@ -1132,46 +1165,40 @@ static int dma_set_runtime_config(struct dma_chan *chan,
1132 return -EINVAL; 1165 return -EINVAL;
1133 } 1166 }
1134 1167
1135 switch (addr_width) { 1168 width = pl08x_width(addr_width);
1136 case DMA_SLAVE_BUSWIDTH_1_BYTE: 1169 if (width == ~0) {
1137 cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) |
1138 (PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT);
1139 break;
1140 case DMA_SLAVE_BUSWIDTH_2_BYTES:
1141 cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) |
1142 (PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT);
1143 break;
1144 case DMA_SLAVE_BUSWIDTH_4_BYTES:
1145 cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) |
1146 (PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT);
1147 break;
1148 default:
1149 dev_err(&pl08x->adev->dev, 1170 dev_err(&pl08x->adev->dev,
1150 "bad runtime_config: alien address width\n"); 1171 "bad runtime_config: alien address width\n");
1151 return -EINVAL; 1172 return -EINVAL;
1152 } 1173 }
1153 1174
1175 cctl |= width << PL080_CONTROL_SWIDTH_SHIFT;
1176 cctl |= width << PL080_CONTROL_DWIDTH_SHIFT;
1177
1154 /* 1178 /*
1155 * Now decide on a maxburst:
1156 * If this channel will only request single transfers, set this 1179 * If this channel will only request single transfers, set this
1157 * down to ONE element. Also select one element if no maxburst 1180 * down to ONE element. Also select one element if no maxburst
1158 * is specified. 1181 * is specified.
1159 */ 1182 */
1160 if (plchan->cd->single || maxburst == 0) { 1183 if (plchan->cd->single)
1161 cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) | 1184 maxburst = 1;
1162 (PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT); 1185
1186 burst = pl08x_burst(maxburst);
1187 cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
1188 cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
1189
1190 if (plchan->runtime_direction == DMA_FROM_DEVICE) {
1191 plchan->src_addr = config->src_addr;
1192 plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
1193 pl08x_select_bus(plchan->cd->periph_buses,
1194 pl08x->mem_buses);
1163 } else { 1195 } else {
1164 for (i = 0; i < ARRAY_SIZE(burst_sizes); i++) 1196 plchan->dst_addr = config->dst_addr;
1165 if (burst_sizes[i].burstwords <= maxburst) 1197 plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR |
1166 break; 1198 pl08x_select_bus(pl08x->mem_buses,
1167 cctl |= burst_sizes[i].reg; 1199 plchan->cd->periph_buses);
1168 } 1200 }
1169 1201
1170 plchan->runtime_addr = addr;
1171
1172 /* Modify the default channel data to fit PrimeCell request */
1173 cd->cctl = cctl;
1174
1175 dev_dbg(&pl08x->adev->dev, 1202 dev_dbg(&pl08x->adev->dev,
1176 "configured channel %s (%s) for %s, data width %d, " 1203 "configured channel %s (%s) for %s, data width %d, "
1177 "maxburst %d words, LE, CCTL=0x%08x\n", 1204 "maxburst %d words, LE, CCTL=0x%08x\n",
@@ -1270,23 +1297,6 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
1270 return 0; 1297 return 0;
1271} 1298}
1272 1299
1273/*
1274 * Given the source and destination available bus masks, select which
1275 * will be routed to each port. We try to have source and destination
1276 * on separate ports, but always respect the allowable settings.
1277 */
1278static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst)
1279{
1280 u32 cctl = 0;
1281
1282 if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
1283 cctl |= PL080_CONTROL_DST_AHB2;
1284 if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
1285 cctl |= PL080_CONTROL_SRC_AHB2;
1286
1287 return cctl;
1288}
1289
1290static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan, 1300static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
1291 unsigned long flags) 1301 unsigned long flags)
1292{ 1302{
@@ -1338,8 +1348,8 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
1338 txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR; 1348 txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
1339 1349
1340 if (pl08x->vd->dualmaster) 1350 if (pl08x->vd->dualmaster)
1341 txd->cctl |= pl08x_select_bus(pl08x, 1351 txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
1342 pl08x->mem_buses, pl08x->mem_buses); 1352 pl08x->mem_buses);
1343 1353
1344 ret = pl08x_prep_channel_resources(plchan, txd); 1354 ret = pl08x_prep_channel_resources(plchan, txd);
1345 if (ret) 1355 if (ret)
@@ -1356,7 +1366,6 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
1356 struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); 1366 struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
1357 struct pl08x_driver_data *pl08x = plchan->host; 1367 struct pl08x_driver_data *pl08x = plchan->host;
1358 struct pl08x_txd *txd; 1368 struct pl08x_txd *txd;
1359 u8 src_buses, dst_buses;
1360 int ret; 1369 int ret;
1361 1370
1362 /* 1371 /*
@@ -1390,42 +1399,22 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
1390 txd->direction = direction; 1399 txd->direction = direction;
1391 txd->len = sgl->length; 1400 txd->len = sgl->length;
1392 1401
1393 txd->cctl = plchan->cd->cctl &
1394 ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
1395 PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
1396 PL080_CONTROL_PROT_MASK);
1397
1398 /* Access the cell in privileged mode, non-bufferable, non-cacheable */
1399 txd->cctl |= PL080_CONTROL_PROT_SYS;
1400
1401 if (direction == DMA_TO_DEVICE) { 1402 if (direction == DMA_TO_DEVICE) {
1402 txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT; 1403 txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
1403 txd->cctl |= PL080_CONTROL_SRC_INCR; 1404 txd->cctl = plchan->dst_cctl;
1404 txd->src_addr = sgl->dma_address; 1405 txd->src_addr = sgl->dma_address;
1405 if (plchan->runtime_addr) 1406 txd->dst_addr = plchan->dst_addr;
1406 txd->dst_addr = plchan->runtime_addr;
1407 else
1408 txd->dst_addr = plchan->cd->addr;
1409 src_buses = pl08x->mem_buses;
1410 dst_buses = plchan->cd->periph_buses;
1411 } else if (direction == DMA_FROM_DEVICE) { 1407 } else if (direction == DMA_FROM_DEVICE) {
1412 txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT; 1408 txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
1413 txd->cctl |= PL080_CONTROL_DST_INCR; 1409 txd->cctl = plchan->src_cctl;
1414 if (plchan->runtime_addr) 1410 txd->src_addr = plchan->src_addr;
1415 txd->src_addr = plchan->runtime_addr;
1416 else
1417 txd->src_addr = plchan->cd->addr;
1418 txd->dst_addr = sgl->dma_address; 1411 txd->dst_addr = sgl->dma_address;
1419 src_buses = plchan->cd->periph_buses;
1420 dst_buses = pl08x->mem_buses;
1421 } else { 1412 } else {
1422 dev_err(&pl08x->adev->dev, 1413 dev_err(&pl08x->adev->dev,
1423 "%s direction unsupported\n", __func__); 1414 "%s direction unsupported\n", __func__);
1424 return NULL; 1415 return NULL;
1425 } 1416 }
1426 1417
1427 txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses);
1428
1429 ret = pl08x_prep_channel_resources(plchan, txd); 1418 ret = pl08x_prep_channel_resources(plchan, txd);
1430 if (ret) 1419 if (ret)
1431 return NULL; 1420 return NULL;
@@ -1676,6 +1665,20 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
1676 return mask ? IRQ_HANDLED : IRQ_NONE; 1665 return mask ? IRQ_HANDLED : IRQ_NONE;
1677} 1666}
1678 1667
1668static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan)
1669{
1670 u32 cctl = pl08x_cctl(chan->cd->cctl);
1671
1672 chan->slave = true;
1673 chan->name = chan->cd->bus_id;
1674 chan->src_addr = chan->cd->addr;
1675 chan->dst_addr = chan->cd->addr;
1676 chan->src_cctl = cctl | PL080_CONTROL_DST_INCR |
1677 pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses);
1678 chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR |
1679 pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses);
1680}
1681
1679/* 1682/*
1680 * Initialise the DMAC memcpy/slave channels. 1683 * Initialise the DMAC memcpy/slave channels.
1681 * Make a local wrapper to hold required data 1684 * Make a local wrapper to hold required data
@@ -1707,9 +1710,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
1707 chan->state = PL08X_CHAN_IDLE; 1710 chan->state = PL08X_CHAN_IDLE;
1708 1711
1709 if (slave) { 1712 if (slave) {
1710 chan->slave = true;
1711 chan->name = pl08x->pd->slave_channels[i].bus_id;
1712 chan->cd = &pl08x->pd->slave_channels[i]; 1713 chan->cd = &pl08x->pd->slave_channels[i];
1714 pl08x_dma_slave_init(chan);
1713 } else { 1715 } else {
1714 chan->cd = &pl08x->pd->memcpy_channel; 1716 chan->cd = &pl08x->pd->memcpy_channel;
1715 chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i); 1717 chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 36144f88d71..6a483eac7b3 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1216,7 +1216,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
1216 atdma->dma_common.cap_mask = pdata->cap_mask; 1216 atdma->dma_common.cap_mask = pdata->cap_mask;
1217 atdma->all_chan_mask = (1 << pdata->nr_channels) - 1; 1217 atdma->all_chan_mask = (1 << pdata->nr_channels) - 1;
1218 1218
1219 size = io->end - io->start + 1; 1219 size = resource_size(io);
1220 if (!request_mem_region(io->start, size, pdev->dev.driver->name)) { 1220 if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
1221 err = -EBUSY; 1221 err = -EBUSY;
1222 goto err_kfree; 1222 goto err_kfree;
@@ -1362,7 +1362,7 @@ static int __exit at_dma_remove(struct platform_device *pdev)
1362 atdma->regs = NULL; 1362 atdma->regs = NULL;
1363 1363
1364 io = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1364 io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1365 release_mem_region(io->start, io->end - io->start + 1); 1365 release_mem_region(io->start, resource_size(io));
1366 1366
1367 kfree(atdma); 1367 kfree(atdma);
1368 1368
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index a92d95eac86..4234f416ef1 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -41,6 +41,8 @@ struct coh901318_desc {
41 struct coh901318_lli *lli; 41 struct coh901318_lli *lli;
42 enum dma_data_direction dir; 42 enum dma_data_direction dir;
43 unsigned long flags; 43 unsigned long flags;
44 u32 head_config;
45 u32 head_ctrl;
44}; 46};
45 47
46struct coh901318_base { 48struct coh901318_base {
@@ -661,6 +663,9 @@ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
661 663
662 coh901318_desc_submit(cohc, cohd); 664 coh901318_desc_submit(cohc, cohd);
663 665
666 /* Program the transaction head */
667 coh901318_set_conf(cohc, cohd->head_config);
668 coh901318_set_ctrl(cohc, cohd->head_ctrl);
664 coh901318_prep_linked_list(cohc, cohd->lli); 669 coh901318_prep_linked_list(cohc, cohd->lli);
665 670
666 /* start dma job on this channel */ 671 /* start dma job on this channel */
@@ -1091,8 +1096,6 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
1091 } else 1096 } else
1092 goto err_direction; 1097 goto err_direction;
1093 1098
1094 coh901318_set_conf(cohc, config);
1095
1096 /* The dma only supports transmitting packages up to 1099 /* The dma only supports transmitting packages up to
1097 * MAX_DMA_PACKET_SIZE. Calculate to total number of 1100 * MAX_DMA_PACKET_SIZE. Calculate to total number of
1098 * dma elemts required to send the entire sg list 1101 * dma elemts required to send the entire sg list
@@ -1129,16 +1132,18 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
1129 if (ret) 1132 if (ret)
1130 goto err_lli_fill; 1133 goto err_lli_fill;
1131 1134
1132 /*
1133 * Set the default ctrl for the channel to the one from the lli,
1134 * things may have changed due to odd buffer alignment etc.
1135 */
1136 coh901318_set_ctrl(cohc, lli->control);
1137 1135
1138 COH_DBG(coh901318_list_print(cohc, lli)); 1136 COH_DBG(coh901318_list_print(cohc, lli));
1139 1137
1140 /* Pick a descriptor to handle this transfer */ 1138 /* Pick a descriptor to handle this transfer */
1141 cohd = coh901318_desc_get(cohc); 1139 cohd = coh901318_desc_get(cohc);
1140 cohd->head_config = config;
1141 /*
1142 * Set the default head ctrl for the channel to the one from the
1143 * lli, things may have changed due to odd buffer alignment
1144 * etc.
1145 */
1146 cohd->head_ctrl = lli->control;
1142 cohd->dir = direction; 1147 cohd->dir = direction;
1143 cohd->flags = flags; 1148 cohd->flags = flags;
1144 cohd->desc.tx_submit = coh901318_tx_submit; 1149 cohd->desc.tx_submit = coh901318_tx_submit;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 48694c34d96..b48967b499d 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -62,9 +62,9 @@
62#include <linux/slab.h> 62#include <linux/slab.h>
63 63
64static DEFINE_MUTEX(dma_list_mutex); 64static DEFINE_MUTEX(dma_list_mutex);
65static DEFINE_IDR(dma_idr);
65static LIST_HEAD(dma_device_list); 66static LIST_HEAD(dma_device_list);
66static long dmaengine_ref_count; 67static long dmaengine_ref_count;
67static struct idr dma_idr;
68 68
69/* --- sysfs implementation --- */ 69/* --- sysfs implementation --- */
70 70
@@ -510,8 +510,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
510 dma_chan_name(chan)); 510 dma_chan_name(chan));
511 list_del_rcu(&device->global_node); 511 list_del_rcu(&device->global_node);
512 } else if (err) 512 } else if (err)
513 pr_err("dmaengine: failed to get %s: (%d)\n", 513 pr_debug("dmaengine: failed to get %s: (%d)\n",
514 dma_chan_name(chan), err); 514 dma_chan_name(chan), err);
515 else 515 else
516 break; 516 break;
517 if (--device->privatecnt == 0) 517 if (--device->privatecnt == 0)
@@ -1050,8 +1050,6 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies);
1050 1050
1051static int __init dma_bus_init(void) 1051static int __init dma_bus_init(void)
1052{ 1052{
1053 idr_init(&dma_idr);
1054 mutex_init(&dma_list_mutex);
1055 return class_register(&dma_devclass); 1053 return class_register(&dma_devclass);
1056} 1054}
1057arch_initcall(dma_bus_init); 1055arch_initcall(dma_bus_init);
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 0766c1e53b1..5d7a49bd7c2 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -902,7 +902,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
902 * 902 *
903 * Returns a valid DMA descriptor or %NULL in case of failure. 903 * Returns a valid DMA descriptor or %NULL in case of failure.
904 */ 904 */
905struct dma_async_tx_descriptor * 905static struct dma_async_tx_descriptor *
906ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, 906ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
907 dma_addr_t src, size_t len, unsigned long flags) 907 dma_addr_t src, size_t len, unsigned long flags)
908{ 908{
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 1eb60ded2f0..7bd7e98548c 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1305,8 +1305,10 @@ static int __init sdma_probe(struct platform_device *pdev)
1305 goto err_request_irq; 1305 goto err_request_irq;
1306 1306
1307 sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); 1307 sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
1308 if (!sdma->script_addrs) 1308 if (!sdma->script_addrs) {
1309 ret = -ENOMEM;
1309 goto err_alloc; 1310 goto err_alloc;
1311 }
1310 1312
1311 if (of_id) 1313 if (of_id)
1312 pdev->id_entry = of_id->data; 1314 pdev->id_entry = of_id->data;
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index f653517ef74..8a3fdd87db9 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -1351,7 +1351,6 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state)
1351 return -EAGAIN; 1351 return -EAGAIN;
1352 } 1352 }
1353 device->state = SUSPENDED; 1353 device->state = SUSPENDED;
1354 pci_set_drvdata(pci, device);
1355 pci_save_state(pci); 1354 pci_save_state(pci);
1356 pci_disable_device(pci); 1355 pci_disable_device(pci);
1357 pci_set_power_state(pci, PCI_D3hot); 1356 pci_set_power_state(pci, PCI_D3hot);
@@ -1380,7 +1379,6 @@ int dma_resume(struct pci_dev *pci)
1380 } 1379 }
1381 device->state = RUNNING; 1380 device->state = RUNNING;
1382 iowrite32(REG_BIT0, device->dma_base + DMA_CFG); 1381 iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
1383 pci_set_drvdata(pci, device);
1384 return 0; 1382 return 0;
1385} 1383}
1386 1384
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index d845dc4b710..f519c93a61e 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -73,10 +73,10 @@
73/* provide a lookup table for setting the source address in the base or 73/* provide a lookup table for setting the source address in the base or
74 * extended descriptor of an xor or pq descriptor 74 * extended descriptor of an xor or pq descriptor
75 */ 75 */
76static const u8 xor_idx_to_desc __read_mostly = 0xd0; 76static const u8 xor_idx_to_desc = 0xe0;
77static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; 77static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
78static const u8 pq_idx_to_desc __read_mostly = 0xf8; 78static const u8 pq_idx_to_desc = 0xf8;
79static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; 79static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
80 80
81static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) 81static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
82{ 82{
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index fab37d1cf48..5e3a40f7994 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -72,6 +72,17 @@ static struct pci_device_id ioat_pci_tbl[] = {
72 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, 72 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
73 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, 73 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
74 74
75 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
76 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
77 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
78 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
79 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
80 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
81 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
82 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
83 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
84 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
85
75 { 0, } 86 { 0, }
76}; 87};
77MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); 88MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index fd7d2b308cf..6815905a772 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1706,16 +1706,14 @@ static int __init ipu_probe(struct platform_device *pdev)
1706 ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base); 1706 ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base);
1707 1707
1708 /* Remap IPU common registers */ 1708 /* Remap IPU common registers */
1709 ipu_data.reg_ipu = ioremap(mem_ipu->start, 1709 ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu));
1710 mem_ipu->end - mem_ipu->start + 1);
1711 if (!ipu_data.reg_ipu) { 1710 if (!ipu_data.reg_ipu) {
1712 ret = -ENOMEM; 1711 ret = -ENOMEM;
1713 goto err_ioremap_ipu; 1712 goto err_ioremap_ipu;
1714 } 1713 }
1715 1714
1716 /* Remap Image Converter and Image DMA Controller registers */ 1715 /* Remap Image Converter and Image DMA Controller registers */
1717 ipu_data.reg_ic = ioremap(mem_ic->start, 1716 ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic));
1718 mem_ic->end - mem_ic->start + 1);
1719 if (!ipu_data.reg_ic) { 1717 if (!ipu_data.reg_ic) {
1720 ret = -ENOMEM; 1718 ret = -ENOMEM;
1721 goto err_ioremap_ic; 1719 goto err_ioremap_ic;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 06f9f27dbe7..9a353c2216d 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1304,7 +1304,8 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
1304 if (!res) 1304 if (!res)
1305 return -ENODEV; 1305 return -ENODEV;
1306 1306
1307 msp->xor_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); 1307 msp->xor_base = devm_ioremap(&pdev->dev, res->start,
1308 resource_size(res));
1308 if (!msp->xor_base) 1309 if (!msp->xor_base)
1309 return -EBUSY; 1310 return -EBUSY;
1310 1311
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 88aad4f5400..be641cbd36f 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -327,10 +327,12 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
327 327
328 memset(mxs_chan->ccw, 0, PAGE_SIZE); 328 memset(mxs_chan->ccw, 0, PAGE_SIZE);
329 329
330 ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, 330 if (mxs_chan->chan_irq != NO_IRQ) {
331 0, "mxs-dma", mxs_dma); 331 ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
332 if (ret) 332 0, "mxs-dma", mxs_dma);
333 goto err_irq; 333 if (ret)
334 goto err_irq;
335 }
334 336
335 ret = clk_enable(mxs_dma->clk); 337 ret = clk_enable(mxs_dma->clk);
336 if (ret) 338 if (ret)
@@ -535,6 +537,7 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
535 switch (cmd) { 537 switch (cmd) {
536 case DMA_TERMINATE_ALL: 538 case DMA_TERMINATE_ALL:
537 mxs_dma_disable_chan(mxs_chan); 539 mxs_dma_disable_chan(mxs_chan);
540 mxs_dma_reset_chan(mxs_chan);
538 break; 541 break;
539 case DMA_PAUSE: 542 case DMA_PAUSE:
540 mxs_dma_pause_chan(mxs_chan); 543 mxs_dma_pause_chan(mxs_chan);
@@ -707,6 +710,8 @@ static struct platform_device_id mxs_dma_type[] = {
707 }, { 710 }, {
708 .name = "mxs-dma-apbx", 711 .name = "mxs-dma-apbx",
709 .driver_data = MXS_DMA_APBX, 712 .driver_data = MXS_DMA_APBX,
713 }, {
714 /* end of list */
710 } 715 }
711}; 716};
712 717
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index ff5b38f9d45..1ac8d4b580b 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -45,7 +45,8 @@
45#define DMA_STATUS_MASK_BITS 0x3 45#define DMA_STATUS_MASK_BITS 0x3
46#define DMA_STATUS_SHIFT_BITS 16 46#define DMA_STATUS_SHIFT_BITS 16
47#define DMA_STATUS_IRQ(x) (0x1 << (x)) 47#define DMA_STATUS_IRQ(x) (0x1 << (x))
48#define DMA_STATUS_ERR(x) (0x1 << ((x) + 8)) 48#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8))
49#define DMA_STATUS2_ERR(x) (0x1 << (x))
49 50
50#define DMA_DESC_WIDTH_SHIFT_BITS 12 51#define DMA_DESC_WIDTH_SHIFT_BITS 12
51#define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS) 52#define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
@@ -61,6 +62,9 @@
61 62
62#define MAX_CHAN_NR 8 63#define MAX_CHAN_NR 8
63 64
65#define DMA_MASK_CTL0_MODE 0x33333333
66#define DMA_MASK_CTL2_MODE 0x00003333
67
64static unsigned int init_nr_desc_per_channel = 64; 68static unsigned int init_nr_desc_per_channel = 64;
65module_param(init_nr_desc_per_channel, uint, 0644); 69module_param(init_nr_desc_per_channel, uint, 0644);
66MODULE_PARM_DESC(init_nr_desc_per_channel, 70MODULE_PARM_DESC(init_nr_desc_per_channel,
@@ -133,6 +137,7 @@ struct pch_dma {
133#define PCH_DMA_CTL3 0x0C 137#define PCH_DMA_CTL3 0x0C
134#define PCH_DMA_STS0 0x10 138#define PCH_DMA_STS0 0x10
135#define PCH_DMA_STS1 0x14 139#define PCH_DMA_STS1 0x14
140#define PCH_DMA_STS2 0x18
136 141
137#define dma_readl(pd, name) \ 142#define dma_readl(pd, name) \
138 readl((pd)->membase + PCH_DMA_##name) 143 readl((pd)->membase + PCH_DMA_##name)
@@ -183,13 +188,19 @@ static void pdc_enable_irq(struct dma_chan *chan, int enable)
183{ 188{
184 struct pch_dma *pd = to_pd(chan->device); 189 struct pch_dma *pd = to_pd(chan->device);
185 u32 val; 190 u32 val;
191 int pos;
192
193 if (chan->chan_id < 8)
194 pos = chan->chan_id;
195 else
196 pos = chan->chan_id + 8;
186 197
187 val = dma_readl(pd, CTL2); 198 val = dma_readl(pd, CTL2);
188 199
189 if (enable) 200 if (enable)
190 val |= 0x1 << chan->chan_id; 201 val |= 0x1 << pos;
191 else 202 else
192 val &= ~(0x1 << chan->chan_id); 203 val &= ~(0x1 << pos);
193 204
194 dma_writel(pd, CTL2, val); 205 dma_writel(pd, CTL2, val);
195 206
@@ -202,10 +213,17 @@ static void pdc_set_dir(struct dma_chan *chan)
202 struct pch_dma_chan *pd_chan = to_pd_chan(chan); 213 struct pch_dma_chan *pd_chan = to_pd_chan(chan);
203 struct pch_dma *pd = to_pd(chan->device); 214 struct pch_dma *pd = to_pd(chan->device);
204 u32 val; 215 u32 val;
216 u32 mask_mode;
217 u32 mask_ctl;
205 218
206 if (chan->chan_id < 8) { 219 if (chan->chan_id < 8) {
207 val = dma_readl(pd, CTL0); 220 val = dma_readl(pd, CTL0);
208 221
222 mask_mode = DMA_CTL0_MODE_MASK_BITS <<
223 (DMA_CTL0_BITS_PER_CH * chan->chan_id);
224 mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
225 (DMA_CTL0_BITS_PER_CH * chan->chan_id));
226 val &= mask_mode;
209 if (pd_chan->dir == DMA_TO_DEVICE) 227 if (pd_chan->dir == DMA_TO_DEVICE)
210 val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + 228 val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
211 DMA_CTL0_DIR_SHIFT_BITS); 229 DMA_CTL0_DIR_SHIFT_BITS);
@@ -213,18 +231,24 @@ static void pdc_set_dir(struct dma_chan *chan)
213 val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + 231 val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
214 DMA_CTL0_DIR_SHIFT_BITS)); 232 DMA_CTL0_DIR_SHIFT_BITS));
215 233
234 val |= mask_ctl;
216 dma_writel(pd, CTL0, val); 235 dma_writel(pd, CTL0, val);
217 } else { 236 } else {
218 int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ 237 int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
219 val = dma_readl(pd, CTL3); 238 val = dma_readl(pd, CTL3);
220 239
240 mask_mode = DMA_CTL0_MODE_MASK_BITS <<
241 (DMA_CTL0_BITS_PER_CH * ch);
242 mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
243 (DMA_CTL0_BITS_PER_CH * ch));
244 val &= mask_mode;
221 if (pd_chan->dir == DMA_TO_DEVICE) 245 if (pd_chan->dir == DMA_TO_DEVICE)
222 val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + 246 val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
223 DMA_CTL0_DIR_SHIFT_BITS); 247 DMA_CTL0_DIR_SHIFT_BITS);
224 else 248 else
225 val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + 249 val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
226 DMA_CTL0_DIR_SHIFT_BITS)); 250 DMA_CTL0_DIR_SHIFT_BITS));
227 251 val |= mask_ctl;
228 dma_writel(pd, CTL3, val); 252 dma_writel(pd, CTL3, val);
229 } 253 }
230 254
@@ -236,33 +260,37 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode)
236{ 260{
237 struct pch_dma *pd = to_pd(chan->device); 261 struct pch_dma *pd = to_pd(chan->device);
238 u32 val; 262 u32 val;
263 u32 mask_ctl;
264 u32 mask_dir;
239 265
240 if (chan->chan_id < 8) { 266 if (chan->chan_id < 8) {
267 mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
268 (DMA_CTL0_BITS_PER_CH * chan->chan_id));
269 mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\
270 DMA_CTL0_DIR_SHIFT_BITS);
241 val = dma_readl(pd, CTL0); 271 val = dma_readl(pd, CTL0);
242 272 val &= mask_dir;
243 val &= ~(DMA_CTL0_MODE_MASK_BITS <<
244 (DMA_CTL0_BITS_PER_CH * chan->chan_id));
245 val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); 273 val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
246 274 val |= mask_ctl;
247 dma_writel(pd, CTL0, val); 275 dma_writel(pd, CTL0, val);
248 } else { 276 } else {
249 int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ 277 int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
250 278 mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
279 (DMA_CTL0_BITS_PER_CH * ch));
280 mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\
281 DMA_CTL0_DIR_SHIFT_BITS);
251 val = dma_readl(pd, CTL3); 282 val = dma_readl(pd, CTL3);
252 283 val &= mask_dir;
253 val &= ~(DMA_CTL0_MODE_MASK_BITS <<
254 (DMA_CTL0_BITS_PER_CH * ch));
255 val |= mode << (DMA_CTL0_BITS_PER_CH * ch); 284 val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
256 285 val |= mask_ctl;
257 dma_writel(pd, CTL3, val); 286 dma_writel(pd, CTL3, val);
258
259 } 287 }
260 288
261 dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", 289 dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
262 chan->chan_id, val); 290 chan->chan_id, val);
263} 291}
264 292
265static u32 pdc_get_status(struct pch_dma_chan *pd_chan) 293static u32 pdc_get_status0(struct pch_dma_chan *pd_chan)
266{ 294{
267 struct pch_dma *pd = to_pd(pd_chan->chan.device); 295 struct pch_dma *pd = to_pd(pd_chan->chan.device);
268 u32 val; 296 u32 val;
@@ -272,9 +300,27 @@ static u32 pdc_get_status(struct pch_dma_chan *pd_chan)
272 DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id)); 300 DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
273} 301}
274 302
303static u32 pdc_get_status2(struct pch_dma_chan *pd_chan)
304{
305 struct pch_dma *pd = to_pd(pd_chan->chan.device);
306 u32 val;
307
308 val = dma_readl(pd, STS2);
309 return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
310 DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8)));
311}
312
275static bool pdc_is_idle(struct pch_dma_chan *pd_chan) 313static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
276{ 314{
277 if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE) 315 u32 sts;
316
317 if (pd_chan->chan.chan_id < 8)
318 sts = pdc_get_status0(pd_chan);
319 else
320 sts = pdc_get_status2(pd_chan);
321
322
323 if (sts == DMA_STATUS_IDLE)
278 return true; 324 return true;
279 else 325 else
280 return false; 326 return false;
@@ -495,11 +541,11 @@ static int pd_alloc_chan_resources(struct dma_chan *chan)
495 list_add_tail(&desc->desc_node, &tmp_list); 541 list_add_tail(&desc->desc_node, &tmp_list);
496 } 542 }
497 543
498 spin_lock_bh(&pd_chan->lock); 544 spin_lock_irq(&pd_chan->lock);
499 list_splice(&tmp_list, &pd_chan->free_list); 545 list_splice(&tmp_list, &pd_chan->free_list);
500 pd_chan->descs_allocated = i; 546 pd_chan->descs_allocated = i;
501 pd_chan->completed_cookie = chan->cookie = 1; 547 pd_chan->completed_cookie = chan->cookie = 1;
502 spin_unlock_bh(&pd_chan->lock); 548 spin_unlock_irq(&pd_chan->lock);
503 549
504 pdc_enable_irq(chan, 1); 550 pdc_enable_irq(chan, 1);
505 551
@@ -517,10 +563,10 @@ static void pd_free_chan_resources(struct dma_chan *chan)
517 BUG_ON(!list_empty(&pd_chan->active_list)); 563 BUG_ON(!list_empty(&pd_chan->active_list));
518 BUG_ON(!list_empty(&pd_chan->queue)); 564 BUG_ON(!list_empty(&pd_chan->queue));
519 565
520 spin_lock_bh(&pd_chan->lock); 566 spin_lock_irq(&pd_chan->lock);
521 list_splice_init(&pd_chan->free_list, &tmp_list); 567 list_splice_init(&pd_chan->free_list, &tmp_list);
522 pd_chan->descs_allocated = 0; 568 pd_chan->descs_allocated = 0;
523 spin_unlock_bh(&pd_chan->lock); 569 spin_unlock_irq(&pd_chan->lock);
524 570
525 list_for_each_entry_safe(desc, _d, &tmp_list, desc_node) 571 list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
526 pci_pool_free(pd->pool, desc, desc->txd.phys); 572 pci_pool_free(pd->pool, desc, desc->txd.phys);
@@ -536,10 +582,10 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
536 dma_cookie_t last_completed; 582 dma_cookie_t last_completed;
537 int ret; 583 int ret;
538 584
539 spin_lock_bh(&pd_chan->lock); 585 spin_lock_irq(&pd_chan->lock);
540 last_completed = pd_chan->completed_cookie; 586 last_completed = pd_chan->completed_cookie;
541 last_used = chan->cookie; 587 last_used = chan->cookie;
542 spin_unlock_bh(&pd_chan->lock); 588 spin_unlock_irq(&pd_chan->lock);
543 589
544 ret = dma_async_is_complete(cookie, last_completed, last_used); 590 ret = dma_async_is_complete(cookie, last_completed, last_used);
545 591
@@ -654,7 +700,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
654 if (cmd != DMA_TERMINATE_ALL) 700 if (cmd != DMA_TERMINATE_ALL)
655 return -ENXIO; 701 return -ENXIO;
656 702
657 spin_lock_bh(&pd_chan->lock); 703 spin_lock_irq(&pd_chan->lock);
658 704
659 pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE); 705 pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
660 706
@@ -664,7 +710,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
664 list_for_each_entry_safe(desc, _d, &list, desc_node) 710 list_for_each_entry_safe(desc, _d, &list, desc_node)
665 pdc_chain_complete(pd_chan, desc); 711 pdc_chain_complete(pd_chan, desc);
666 712
667 spin_unlock_bh(&pd_chan->lock); 713 spin_unlock_irq(&pd_chan->lock);
668 714
669 return 0; 715 return 0;
670} 716}
@@ -693,30 +739,45 @@ static irqreturn_t pd_irq(int irq, void *devid)
693 struct pch_dma *pd = (struct pch_dma *)devid; 739 struct pch_dma *pd = (struct pch_dma *)devid;
694 struct pch_dma_chan *pd_chan; 740 struct pch_dma_chan *pd_chan;
695 u32 sts0; 741 u32 sts0;
742 u32 sts2;
696 int i; 743 int i;
697 int ret = IRQ_NONE; 744 int ret0 = IRQ_NONE;
745 int ret2 = IRQ_NONE;
698 746
699 sts0 = dma_readl(pd, STS0); 747 sts0 = dma_readl(pd, STS0);
748 sts2 = dma_readl(pd, STS2);
700 749
701 dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0); 750 dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
702 751
703 for (i = 0; i < pd->dma.chancnt; i++) { 752 for (i = 0; i < pd->dma.chancnt; i++) {
704 pd_chan = &pd->channels[i]; 753 pd_chan = &pd->channels[i];
705 754
706 if (sts0 & DMA_STATUS_IRQ(i)) { 755 if (i < 8) {
707 if (sts0 & DMA_STATUS_ERR(i)) 756 if (sts0 & DMA_STATUS_IRQ(i)) {
708 set_bit(0, &pd_chan->err_status); 757 if (sts0 & DMA_STATUS0_ERR(i))
758 set_bit(0, &pd_chan->err_status);
709 759
710 tasklet_schedule(&pd_chan->tasklet); 760 tasklet_schedule(&pd_chan->tasklet);
711 ret = IRQ_HANDLED; 761 ret0 = IRQ_HANDLED;
712 } 762 }
763 } else {
764 if (sts2 & DMA_STATUS_IRQ(i - 8)) {
765 if (sts2 & DMA_STATUS2_ERR(i))
766 set_bit(0, &pd_chan->err_status);
713 767
768 tasklet_schedule(&pd_chan->tasklet);
769 ret2 = IRQ_HANDLED;
770 }
771 }
714 } 772 }
715 773
716 /* clear interrupt bits in status register */ 774 /* clear interrupt bits in status register */
717 dma_writel(pd, STS0, sts0); 775 if (ret0)
776 dma_writel(pd, STS0, sts0);
777 if (ret2)
778 dma_writel(pd, STS2, sts2);
718 779
719 return ret; 780 return ret0 | ret2;
720} 781}
721 782
722#ifdef CONFIG_PM 783#ifdef CONFIG_PM
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 6abe1ec1f2c..00eee59e8b3 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -82,7 +82,7 @@ struct dma_pl330_dmac {
82 spinlock_t pool_lock; 82 spinlock_t pool_lock;
83 83
84 /* Peripheral channels connected to this DMAC */ 84 /* Peripheral channels connected to this DMAC */
85 struct dma_pl330_chan peripherals[0]; /* keep at end */ 85 struct dma_pl330_chan *peripherals; /* keep at end */
86}; 86};
87 87
88struct dma_pl330_desc { 88struct dma_pl330_desc {
@@ -451,8 +451,13 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
451 desc->txd.cookie = 0; 451 desc->txd.cookie = 0;
452 async_tx_ack(&desc->txd); 452 async_tx_ack(&desc->txd);
453 453
454 desc->req.rqtype = peri->rqtype; 454 if (peri) {
455 desc->req.peri = peri->peri_id; 455 desc->req.rqtype = peri->rqtype;
456 desc->req.peri = peri->peri_id;
457 } else {
458 desc->req.rqtype = MEMTOMEM;
459 desc->req.peri = 0;
460 }
456 461
457 dma_async_tx_descriptor_init(&desc->txd, &pch->chan); 462 dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
458 463
@@ -529,10 +534,10 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
529 struct pl330_info *pi; 534 struct pl330_info *pi;
530 int burst; 535 int burst;
531 536
532 if (unlikely(!pch || !len || !peri)) 537 if (unlikely(!pch || !len))
533 return NULL; 538 return NULL;
534 539
535 if (peri->rqtype != MEMTOMEM) 540 if (peri && peri->rqtype != MEMTOMEM)
536 return NULL; 541 return NULL;
537 542
538 pi = &pch->dmac->pif; 543 pi = &pch->dmac->pif;
@@ -577,7 +582,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
577 int i, burst_size; 582 int i, burst_size;
578 dma_addr_t addr; 583 dma_addr_t addr;
579 584
580 if (unlikely(!pch || !sgl || !sg_len)) 585 if (unlikely(!pch || !sgl || !sg_len || !peri))
581 return NULL; 586 return NULL;
582 587
583 /* Make sure the direction is consistent */ 588 /* Make sure the direction is consistent */
@@ -666,17 +671,12 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
666 struct dma_device *pd; 671 struct dma_device *pd;
667 struct resource *res; 672 struct resource *res;
668 int i, ret, irq; 673 int i, ret, irq;
674 int num_chan;
669 675
670 pdat = adev->dev.platform_data; 676 pdat = adev->dev.platform_data;
671 677
672 if (!pdat || !pdat->nr_valid_peri) {
673 dev_err(&adev->dev, "platform data missing\n");
674 return -ENODEV;
675 }
676
677 /* Allocate a new DMAC and its Channels */ 678 /* Allocate a new DMAC and its Channels */
678 pdmac = kzalloc(pdat->nr_valid_peri * sizeof(*pch) 679 pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL);
679 + sizeof(*pdmac), GFP_KERNEL);
680 if (!pdmac) { 680 if (!pdmac) {
681 dev_err(&adev->dev, "unable to allocate mem\n"); 681 dev_err(&adev->dev, "unable to allocate mem\n");
682 return -ENOMEM; 682 return -ENOMEM;
@@ -685,7 +685,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
685 pi = &pdmac->pif; 685 pi = &pdmac->pif;
686 pi->dev = &adev->dev; 686 pi->dev = &adev->dev;
687 pi->pl330_data = NULL; 687 pi->pl330_data = NULL;
688 pi->mcbufsz = pdat->mcbuf_sz; 688 pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
689 689
690 res = &adev->res; 690 res = &adev->res;
691 request_mem_region(res->start, resource_size(res), "dma-pl330"); 691 request_mem_region(res->start, resource_size(res), "dma-pl330");
@@ -717,27 +717,35 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
717 INIT_LIST_HEAD(&pd->channels); 717 INIT_LIST_HEAD(&pd->channels);
718 718
719 /* Initialize channel parameters */ 719 /* Initialize channel parameters */
720 for (i = 0; i < pdat->nr_valid_peri; i++) { 720 num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan);
721 struct dma_pl330_peri *peri = &pdat->peri[i]; 721 pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
722 pch = &pdmac->peripherals[i];
723 722
724 switch (peri->rqtype) { 723 for (i = 0; i < num_chan; i++) {
725 case MEMTOMEM: 724 pch = &pdmac->peripherals[i];
725 if (pdat) {
726 struct dma_pl330_peri *peri = &pdat->peri[i];
727
728 switch (peri->rqtype) {
729 case MEMTOMEM:
730 dma_cap_set(DMA_MEMCPY, pd->cap_mask);
731 break;
732 case MEMTODEV:
733 case DEVTOMEM:
734 dma_cap_set(DMA_SLAVE, pd->cap_mask);
735 break;
736 default:
737 dev_err(&adev->dev, "DEVTODEV Not Supported\n");
738 continue;
739 }
740 pch->chan.private = peri;
741 } else {
726 dma_cap_set(DMA_MEMCPY, pd->cap_mask); 742 dma_cap_set(DMA_MEMCPY, pd->cap_mask);
727 break; 743 pch->chan.private = NULL;
728 case MEMTODEV:
729 case DEVTOMEM:
730 dma_cap_set(DMA_SLAVE, pd->cap_mask);
731 break;
732 default:
733 dev_err(&adev->dev, "DEVTODEV Not Supported\n");
734 continue;
735 } 744 }
736 745
737 INIT_LIST_HEAD(&pch->work_list); 746 INIT_LIST_HEAD(&pch->work_list);
738 spin_lock_init(&pch->lock); 747 spin_lock_init(&pch->lock);
739 pch->pl330_chid = NULL; 748 pch->pl330_chid = NULL;
740 pch->chan.private = peri;
741 pch->chan.device = pd; 749 pch->chan.device = pd;
742 pch->chan.chan_id = i; 750 pch->chan.chan_id = i;
743 pch->dmac = pdmac; 751 pch->dmac = pdmac;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 29d1addbe0c..cd3a7c726bf 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -14,6 +14,7 @@
14#include <linux/clk.h> 14#include <linux/clk.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/amba/bus.h>
17 18
18#include <plat/ste_dma40.h> 19#include <plat/ste_dma40.h>
19 20
@@ -45,9 +46,6 @@
45#define D40_ALLOC_PHY (1 << 30) 46#define D40_ALLOC_PHY (1 << 30)
46#define D40_ALLOC_LOG_FREE 0 47#define D40_ALLOC_LOG_FREE 0
47 48
48/* Hardware designer of the block */
49#define D40_HW_DESIGNER 0x8
50
51/** 49/**
52 * enum 40_command - The different commands and/or statuses. 50 * enum 40_command - The different commands and/or statuses.
53 * 51 *
@@ -186,6 +184,8 @@ struct d40_base;
186 * @log_def: Default logical channel settings. 184 * @log_def: Default logical channel settings.
187 * @lcla: Space for one dst src pair for logical channel transfers. 185 * @lcla: Space for one dst src pair for logical channel transfers.
188 * @lcpa: Pointer to dst and src lcpa settings. 186 * @lcpa: Pointer to dst and src lcpa settings.
187 * @runtime_addr: runtime configured address.
188 * @runtime_direction: runtime configured direction.
189 * 189 *
190 * This struct can either "be" a logical or a physical channel. 190 * This struct can either "be" a logical or a physical channel.
191 */ 191 */
@@ -200,6 +200,7 @@ struct d40_chan {
200 struct dma_chan chan; 200 struct dma_chan chan;
201 struct tasklet_struct tasklet; 201 struct tasklet_struct tasklet;
202 struct list_head client; 202 struct list_head client;
203 struct list_head pending_queue;
203 struct list_head active; 204 struct list_head active;
204 struct list_head queue; 205 struct list_head queue;
205 struct stedma40_chan_cfg dma_cfg; 206 struct stedma40_chan_cfg dma_cfg;
@@ -645,7 +646,20 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
645 646
646static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc) 647static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
647{ 648{
648 list_add_tail(&desc->node, &d40c->queue); 649 list_add_tail(&desc->node, &d40c->pending_queue);
650}
651
652static struct d40_desc *d40_first_pending(struct d40_chan *d40c)
653{
654 struct d40_desc *d;
655
656 if (list_empty(&d40c->pending_queue))
657 return NULL;
658
659 d = list_first_entry(&d40c->pending_queue,
660 struct d40_desc,
661 node);
662 return d;
649} 663}
650 664
651static struct d40_desc *d40_first_queued(struct d40_chan *d40c) 665static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
@@ -802,6 +816,11 @@ static void d40_term_all(struct d40_chan *d40c)
802 d40_desc_free(d40c, d40d); 816 d40_desc_free(d40c, d40d);
803 } 817 }
804 818
819 /* Release pending descriptors */
820 while ((d40d = d40_first_pending(d40c))) {
821 d40_desc_remove(d40d);
822 d40_desc_free(d40c, d40d);
823 }
805 824
806 d40c->pending_tx = 0; 825 d40c->pending_tx = 0;
807 d40c->busy = false; 826 d40c->busy = false;
@@ -2092,7 +2111,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
2092 struct scatterlist *sg; 2111 struct scatterlist *sg;
2093 int i; 2112 int i;
2094 2113
2095 sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL); 2114 sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
2096 for (i = 0; i < periods; i++) { 2115 for (i = 0; i < periods; i++) {
2097 sg_dma_address(&sg[i]) = dma_addr; 2116 sg_dma_address(&sg[i]) = dma_addr;
2098 sg_dma_len(&sg[i]) = period_len; 2117 sg_dma_len(&sg[i]) = period_len;
@@ -2152,24 +2171,87 @@ static void d40_issue_pending(struct dma_chan *chan)
2152 2171
2153 spin_lock_irqsave(&d40c->lock, flags); 2172 spin_lock_irqsave(&d40c->lock, flags);
2154 2173
2155 /* Busy means that pending jobs are already being processed */ 2174 list_splice_tail_init(&d40c->pending_queue, &d40c->queue);
2175
2176 /* Busy means that queued jobs are already being processed */
2156 if (!d40c->busy) 2177 if (!d40c->busy)
2157 (void) d40_queue_start(d40c); 2178 (void) d40_queue_start(d40c);
2158 2179
2159 spin_unlock_irqrestore(&d40c->lock, flags); 2180 spin_unlock_irqrestore(&d40c->lock, flags);
2160} 2181}
2161 2182
2183static int
2184dma40_config_to_halfchannel(struct d40_chan *d40c,
2185 struct stedma40_half_channel_info *info,
2186 enum dma_slave_buswidth width,
2187 u32 maxburst)
2188{
2189 enum stedma40_periph_data_width addr_width;
2190 int psize;
2191
2192 switch (width) {
2193 case DMA_SLAVE_BUSWIDTH_1_BYTE:
2194 addr_width = STEDMA40_BYTE_WIDTH;
2195 break;
2196 case DMA_SLAVE_BUSWIDTH_2_BYTES:
2197 addr_width = STEDMA40_HALFWORD_WIDTH;
2198 break;
2199 case DMA_SLAVE_BUSWIDTH_4_BYTES:
2200 addr_width = STEDMA40_WORD_WIDTH;
2201 break;
2202 case DMA_SLAVE_BUSWIDTH_8_BYTES:
2203 addr_width = STEDMA40_DOUBLEWORD_WIDTH;
2204 break;
2205 default:
2206 dev_err(d40c->base->dev,
2207 "illegal peripheral address width "
2208 "requested (%d)\n",
2209 width);
2210 return -EINVAL;
2211 }
2212
2213 if (chan_is_logical(d40c)) {
2214 if (maxburst >= 16)
2215 psize = STEDMA40_PSIZE_LOG_16;
2216 else if (maxburst >= 8)
2217 psize = STEDMA40_PSIZE_LOG_8;
2218 else if (maxburst >= 4)
2219 psize = STEDMA40_PSIZE_LOG_4;
2220 else
2221 psize = STEDMA40_PSIZE_LOG_1;
2222 } else {
2223 if (maxburst >= 16)
2224 psize = STEDMA40_PSIZE_PHY_16;
2225 else if (maxburst >= 8)
2226 psize = STEDMA40_PSIZE_PHY_8;
2227 else if (maxburst >= 4)
2228 psize = STEDMA40_PSIZE_PHY_4;
2229 else
2230 psize = STEDMA40_PSIZE_PHY_1;
2231 }
2232
2233 info->data_width = addr_width;
2234 info->psize = psize;
2235 info->flow_ctrl = STEDMA40_NO_FLOW_CTRL;
2236
2237 return 0;
2238}
2239
2162/* Runtime reconfiguration extension */ 2240/* Runtime reconfiguration extension */
2163static void d40_set_runtime_config(struct dma_chan *chan, 2241static int d40_set_runtime_config(struct dma_chan *chan,
2164 struct dma_slave_config *config) 2242 struct dma_slave_config *config)
2165{ 2243{
2166 struct d40_chan *d40c = container_of(chan, struct d40_chan, chan); 2244 struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2167 struct stedma40_chan_cfg *cfg = &d40c->dma_cfg; 2245 struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
2168 enum dma_slave_buswidth config_addr_width; 2246 enum dma_slave_buswidth src_addr_width, dst_addr_width;
2169 dma_addr_t config_addr; 2247 dma_addr_t config_addr;
2170 u32 config_maxburst; 2248 u32 src_maxburst, dst_maxburst;
2171 enum stedma40_periph_data_width addr_width; 2249 int ret;
2172 int psize; 2250
2251 src_addr_width = config->src_addr_width;
2252 src_maxburst = config->src_maxburst;
2253 dst_addr_width = config->dst_addr_width;
2254 dst_maxburst = config->dst_maxburst;
2173 2255
2174 if (config->direction == DMA_FROM_DEVICE) { 2256 if (config->direction == DMA_FROM_DEVICE) {
2175 dma_addr_t dev_addr_rx = 2257 dma_addr_t dev_addr_rx =
@@ -2188,8 +2270,11 @@ static void d40_set_runtime_config(struct dma_chan *chan,
2188 cfg->dir); 2270 cfg->dir);
2189 cfg->dir = STEDMA40_PERIPH_TO_MEM; 2271 cfg->dir = STEDMA40_PERIPH_TO_MEM;
2190 2272
2191 config_addr_width = config->src_addr_width; 2273 /* Configure the memory side */
2192 config_maxburst = config->src_maxburst; 2274 if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
2275 dst_addr_width = src_addr_width;
2276 if (dst_maxburst == 0)
2277 dst_maxburst = src_maxburst;
2193 2278
2194 } else if (config->direction == DMA_TO_DEVICE) { 2279 } else if (config->direction == DMA_TO_DEVICE) {
2195 dma_addr_t dev_addr_tx = 2280 dma_addr_t dev_addr_tx =
@@ -2208,68 +2293,39 @@ static void d40_set_runtime_config(struct dma_chan *chan,
2208 cfg->dir); 2293 cfg->dir);
2209 cfg->dir = STEDMA40_MEM_TO_PERIPH; 2294 cfg->dir = STEDMA40_MEM_TO_PERIPH;
2210 2295
2211 config_addr_width = config->dst_addr_width; 2296 /* Configure the memory side */
2212 config_maxburst = config->dst_maxburst; 2297 if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
2213 2298 src_addr_width = dst_addr_width;
2299 if (src_maxburst == 0)
2300 src_maxburst = dst_maxburst;
2214 } else { 2301 } else {
2215 dev_err(d40c->base->dev, 2302 dev_err(d40c->base->dev,
2216 "unrecognized channel direction %d\n", 2303 "unrecognized channel direction %d\n",
2217 config->direction); 2304 config->direction);
2218 return; 2305 return -EINVAL;
2219 } 2306 }
2220 2307
2221 switch (config_addr_width) { 2308 if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) {
2222 case DMA_SLAVE_BUSWIDTH_1_BYTE:
2223 addr_width = STEDMA40_BYTE_WIDTH;
2224 break;
2225 case DMA_SLAVE_BUSWIDTH_2_BYTES:
2226 addr_width = STEDMA40_HALFWORD_WIDTH;
2227 break;
2228 case DMA_SLAVE_BUSWIDTH_4_BYTES:
2229 addr_width = STEDMA40_WORD_WIDTH;
2230 break;
2231 case DMA_SLAVE_BUSWIDTH_8_BYTES:
2232 addr_width = STEDMA40_DOUBLEWORD_WIDTH;
2233 break;
2234 default:
2235 dev_err(d40c->base->dev, 2309 dev_err(d40c->base->dev,
2236 "illegal peripheral address width " 2310 "src/dst width/maxburst mismatch: %d*%d != %d*%d\n",
2237 "requested (%d)\n", 2311 src_maxburst,
2238 config->src_addr_width); 2312 src_addr_width,
2239 return; 2313 dst_maxburst,
2314 dst_addr_width);
2315 return -EINVAL;
2240 } 2316 }
2241 2317
2242 if (chan_is_logical(d40c)) { 2318 ret = dma40_config_to_halfchannel(d40c, &cfg->src_info,
2243 if (config_maxburst >= 16) 2319 src_addr_width,
2244 psize = STEDMA40_PSIZE_LOG_16; 2320 src_maxburst);
2245 else if (config_maxburst >= 8) 2321 if (ret)
2246 psize = STEDMA40_PSIZE_LOG_8; 2322 return ret;
2247 else if (config_maxburst >= 4)
2248 psize = STEDMA40_PSIZE_LOG_4;
2249 else
2250 psize = STEDMA40_PSIZE_LOG_1;
2251 } else {
2252 if (config_maxburst >= 16)
2253 psize = STEDMA40_PSIZE_PHY_16;
2254 else if (config_maxburst >= 8)
2255 psize = STEDMA40_PSIZE_PHY_8;
2256 else if (config_maxburst >= 4)
2257 psize = STEDMA40_PSIZE_PHY_4;
2258 else if (config_maxburst >= 2)
2259 psize = STEDMA40_PSIZE_PHY_2;
2260 else
2261 psize = STEDMA40_PSIZE_PHY_1;
2262 }
2263 2323
2264 /* Set up all the endpoint configs */ 2324 ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info,
2265 cfg->src_info.data_width = addr_width; 2325 dst_addr_width,
2266 cfg->src_info.psize = psize; 2326 dst_maxburst);
2267 cfg->src_info.big_endian = false; 2327 if (ret)
2268 cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL; 2328 return ret;
2269 cfg->dst_info.data_width = addr_width;
2270 cfg->dst_info.psize = psize;
2271 cfg->dst_info.big_endian = false;
2272 cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
2273 2329
2274 /* Fill in register values */ 2330 /* Fill in register values */
2275 if (chan_is_logical(d40c)) 2331 if (chan_is_logical(d40c))
@@ -2282,12 +2338,14 @@ static void d40_set_runtime_config(struct dma_chan *chan,
2282 d40c->runtime_addr = config_addr; 2338 d40c->runtime_addr = config_addr;
2283 d40c->runtime_direction = config->direction; 2339 d40c->runtime_direction = config->direction;
2284 dev_dbg(d40c->base->dev, 2340 dev_dbg(d40c->base->dev,
2285 "configured channel %s for %s, data width %d, " 2341 "configured channel %s for %s, data width %d/%d, "
2286 "maxburst %d bytes, LE, no flow control\n", 2342 "maxburst %d/%d elements, LE, no flow control\n",
2287 dma_chan_name(chan), 2343 dma_chan_name(chan),
2288 (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX", 2344 (config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
2289 config_addr_width, 2345 src_addr_width, dst_addr_width,
2290 config_maxburst); 2346 src_maxburst, dst_maxburst);
2347
2348 return 0;
2291} 2349}
2292 2350
2293static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, 2351static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -2308,9 +2366,8 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
2308 case DMA_RESUME: 2366 case DMA_RESUME:
2309 return d40_resume(d40c); 2367 return d40_resume(d40c);
2310 case DMA_SLAVE_CONFIG: 2368 case DMA_SLAVE_CONFIG:
2311 d40_set_runtime_config(chan, 2369 return d40_set_runtime_config(chan,
2312 (struct dma_slave_config *) arg); 2370 (struct dma_slave_config *) arg);
2313 return 0;
2314 default: 2371 default:
2315 break; 2372 break;
2316 } 2373 }
@@ -2341,6 +2398,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
2341 2398
2342 INIT_LIST_HEAD(&d40c->active); 2399 INIT_LIST_HEAD(&d40c->active);
2343 INIT_LIST_HEAD(&d40c->queue); 2400 INIT_LIST_HEAD(&d40c->queue);
2401 INIT_LIST_HEAD(&d40c->pending_queue);
2344 INIT_LIST_HEAD(&d40c->client); 2402 INIT_LIST_HEAD(&d40c->client);
2345 2403
2346 tasklet_init(&d40c->tasklet, dma_tasklet, 2404 tasklet_init(&d40c->tasklet, dma_tasklet,
@@ -2502,25 +2560,6 @@ static int __init d40_phy_res_init(struct d40_base *base)
2502 2560
2503static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev) 2561static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
2504{ 2562{
2505 static const struct d40_reg_val dma_id_regs[] = {
2506 /* Peripheral Id */
2507 { .reg = D40_DREG_PERIPHID0, .val = 0x0040},
2508 { .reg = D40_DREG_PERIPHID1, .val = 0x0000},
2509 /*
2510 * D40_DREG_PERIPHID2 Depends on HW revision:
2511 * DB8500ed has 0x0008,
2512 * ? has 0x0018,
2513 * DB8500v1 has 0x0028
2514 * DB8500v2 has 0x0038
2515 */
2516 { .reg = D40_DREG_PERIPHID3, .val = 0x0000},
2517
2518 /* PCell Id */
2519 { .reg = D40_DREG_CELLID0, .val = 0x000d},
2520 { .reg = D40_DREG_CELLID1, .val = 0x00f0},
2521 { .reg = D40_DREG_CELLID2, .val = 0x0005},
2522 { .reg = D40_DREG_CELLID3, .val = 0x00b1}
2523 };
2524 struct stedma40_platform_data *plat_data; 2563 struct stedma40_platform_data *plat_data;
2525 struct clk *clk = NULL; 2564 struct clk *clk = NULL;
2526 void __iomem *virtbase = NULL; 2565 void __iomem *virtbase = NULL;
@@ -2529,8 +2568,9 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
2529 int num_log_chans = 0; 2568 int num_log_chans = 0;
2530 int num_phy_chans; 2569 int num_phy_chans;
2531 int i; 2570 int i;
2532 u32 val; 2571 u32 pid;
2533 u32 rev; 2572 u32 cid;
2573 u8 rev;
2534 2574
2535 clk = clk_get(&pdev->dev, NULL); 2575 clk = clk_get(&pdev->dev, NULL);
2536 2576
@@ -2554,32 +2594,32 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
2554 if (!virtbase) 2594 if (!virtbase)
2555 goto failure; 2595 goto failure;
2556 2596
2557 /* HW version check */ 2597 /* This is just a regular AMBA PrimeCell ID actually */
2558 for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) { 2598 for (pid = 0, i = 0; i < 4; i++)
2559 if (dma_id_regs[i].val != 2599 pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i)
2560 readl(virtbase + dma_id_regs[i].reg)) { 2600 & 255) << (i * 8);
2561 d40_err(&pdev->dev, 2601 for (cid = 0, i = 0; i < 4; i++)
2562 "Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n", 2602 cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i)
2563 dma_id_regs[i].val, 2603 & 255) << (i * 8);
2564 dma_id_regs[i].reg,
2565 readl(virtbase + dma_id_regs[i].reg));
2566 goto failure;
2567 }
2568 }
2569 2604
2570 /* Get silicon revision and designer */ 2605 if (cid != AMBA_CID) {
2571 val = readl(virtbase + D40_DREG_PERIPHID2); 2606 d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n");
2572 2607 goto failure;
2573 if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) != 2608 }
2574 D40_HW_DESIGNER) { 2609 if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) {
2575 d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n", 2610 d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
2576 val & D40_DREG_PERIPHID2_DESIGNER_MASK, 2611 AMBA_MANF_BITS(pid),
2577 D40_HW_DESIGNER); 2612 AMBA_VENDOR_ST);
2578 goto failure; 2613 goto failure;
2579 } 2614 }
2580 2615 /*
2581 rev = (val & D40_DREG_PERIPHID2_REV_MASK) >> 2616 * HW revision:
2582 D40_DREG_PERIPHID2_REV_POS; 2617 * DB8500ed has revision 0
2618 * ? has revision 1
2619 * DB8500v1 has revision 2
2620 * DB8500v2 has revision 3
2621 */
2622 rev = AMBA_REV_BITS(pid);
2583 2623
2584 /* The number of physical channels on this HW */ 2624 /* The number of physical channels on this HW */
2585 num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4; 2625 num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 195ee65ee7f..b44c455158d 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -184,9 +184,6 @@
184#define D40_DREG_PERIPHID0 0xFE0 184#define D40_DREG_PERIPHID0 0xFE0
185#define D40_DREG_PERIPHID1 0xFE4 185#define D40_DREG_PERIPHID1 0xFE4
186#define D40_DREG_PERIPHID2 0xFE8 186#define D40_DREG_PERIPHID2 0xFE8
187#define D40_DREG_PERIPHID2_REV_POS 4
188#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS)
189#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf
190#define D40_DREG_PERIPHID3 0xFEC 187#define D40_DREG_PERIPHID3 0xFEC
191#define D40_DREG_CELLID0 0xFF0 188#define D40_DREG_CELLID0 0xFF0
192#define D40_DREG_CELLID1 0xFF4 189#define D40_DREG_CELLID1 0xFF4
diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 30da70d06a6..cdae207028a 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev,
45 return 0; 45 return 0;
46} 46}
47 47
48static struct pci_device_id __initdata pci_eisa_pci_tbl[] = { 48static struct pci_device_id pci_eisa_pci_tbl[] = {
49 { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 49 { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
50 PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, 50 PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 },
51 { 0, } 51 { 0, }
52}; 52};
53 53
54static struct pci_driver __initdata pci_eisa_driver = { 54static struct pci_driver __refdata pci_eisa_driver = {
55 .name = "pci_eisa", 55 .name = "pci_eisa",
56 .id_table = pci_eisa_pci_tbl, 56 .id_table = pci_eisa_pci_tbl,
57 .probe = pci_eisa_init, 57 .probe = pci_eisa_init,
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 5f29aafd446..eb80b549ed8 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -78,6 +78,7 @@
78#include <linux/kobject.h> 78#include <linux/kobject.h>
79#include <linux/device.h> 79#include <linux/device.h>
80#include <linux/slab.h> 80#include <linux/slab.h>
81#include <linux/pstore.h>
81 82
82#include <asm/uaccess.h> 83#include <asm/uaccess.h>
83 84
@@ -89,6 +90,8 @@ MODULE_DESCRIPTION("sysfs interface to EFI Variables");
89MODULE_LICENSE("GPL"); 90MODULE_LICENSE("GPL");
90MODULE_VERSION(EFIVARS_VERSION); 91MODULE_VERSION(EFIVARS_VERSION);
91 92
93#define DUMP_NAME_LEN 52
94
92/* 95/*
93 * The maximum size of VariableName + Data = 1024 96 * The maximum size of VariableName + Data = 1024
94 * Therefore, it's reasonable to save that much 97 * Therefore, it's reasonable to save that much
@@ -119,6 +122,10 @@ struct efivar_attribute {
119 ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count); 122 ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
120}; 123};
121 124
125#define PSTORE_EFI_ATTRIBUTES \
126 (EFI_VARIABLE_NON_VOLATILE | \
127 EFI_VARIABLE_BOOTSERVICE_ACCESS | \
128 EFI_VARIABLE_RUNTIME_ACCESS)
122 129
123#define EFIVAR_ATTR(_name, _mode, _show, _store) \ 130#define EFIVAR_ATTR(_name, _mode, _show, _store) \
124struct efivar_attribute efivar_attr_##_name = { \ 131struct efivar_attribute efivar_attr_##_name = { \
@@ -141,38 +148,72 @@ efivar_create_sysfs_entry(struct efivars *efivars,
141 148
142/* Return the number of unicode characters in data */ 149/* Return the number of unicode characters in data */
143static unsigned long 150static unsigned long
144utf8_strlen(efi_char16_t *data, unsigned long maxlength) 151utf16_strnlen(efi_char16_t *s, size_t maxlength)
145{ 152{
146 unsigned long length = 0; 153 unsigned long length = 0;
147 154
148 while (*data++ != 0 && length < maxlength) 155 while (*s++ != 0 && length < maxlength)
149 length++; 156 length++;
150 return length; 157 return length;
151} 158}
152 159
160static inline unsigned long
161utf16_strlen(efi_char16_t *s)
162{
163 return utf16_strnlen(s, ~0UL);
164}
165
153/* 166/*
154 * Return the number of bytes is the length of this string 167 * Return the number of bytes is the length of this string
155 * Note: this is NOT the same as the number of unicode characters 168 * Note: this is NOT the same as the number of unicode characters
156 */ 169 */
157static inline unsigned long 170static inline unsigned long
158utf8_strsize(efi_char16_t *data, unsigned long maxlength) 171utf16_strsize(efi_char16_t *data, unsigned long maxlength)
159{ 172{
160 return utf8_strlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t); 173 return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t);
174}
175
176static inline int
177utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len)
178{
179 while (1) {
180 if (len == 0)
181 return 0;
182 if (*a < *b)
183 return -1;
184 if (*a > *b)
185 return 1;
186 if (*a == 0) /* implies *b == 0 */
187 return 0;
188 a++;
189 b++;
190 len--;
191 }
161} 192}
162 193
163static efi_status_t 194static efi_status_t
164get_var_data(struct efivars *efivars, struct efi_variable *var) 195get_var_data_locked(struct efivars *efivars, struct efi_variable *var)
165{ 196{
166 efi_status_t status; 197 efi_status_t status;
167 198
168 spin_lock(&efivars->lock);
169 var->DataSize = 1024; 199 var->DataSize = 1024;
170 status = efivars->ops->get_variable(var->VariableName, 200 status = efivars->ops->get_variable(var->VariableName,
171 &var->VendorGuid, 201 &var->VendorGuid,
172 &var->Attributes, 202 &var->Attributes,
173 &var->DataSize, 203 &var->DataSize,
174 var->Data); 204 var->Data);
205 return status;
206}
207
208static efi_status_t
209get_var_data(struct efivars *efivars, struct efi_variable *var)
210{
211 efi_status_t status;
212
213 spin_lock(&efivars->lock);
214 status = get_var_data_locked(efivars, var);
175 spin_unlock(&efivars->lock); 215 spin_unlock(&efivars->lock);
216
176 if (status != EFI_SUCCESS) { 217 if (status != EFI_SUCCESS) {
177 printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n", 218 printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n",
178 status); 219 status);
@@ -387,12 +428,180 @@ static struct kobj_type efivar_ktype = {
387 .default_attrs = def_attrs, 428 .default_attrs = def_attrs,
388}; 429};
389 430
431static struct pstore_info efi_pstore_info;
432
390static inline void 433static inline void
391efivar_unregister(struct efivar_entry *var) 434efivar_unregister(struct efivar_entry *var)
392{ 435{
393 kobject_put(&var->kobj); 436 kobject_put(&var->kobj);
394} 437}
395 438
439#ifdef CONFIG_PSTORE
440
441static int efi_pstore_open(struct pstore_info *psi)
442{
443 struct efivars *efivars = psi->data;
444
445 spin_lock(&efivars->lock);
446 efivars->walk_entry = list_first_entry(&efivars->list,
447 struct efivar_entry, list);
448 return 0;
449}
450
451static int efi_pstore_close(struct pstore_info *psi)
452{
453 struct efivars *efivars = psi->data;
454
455 spin_unlock(&efivars->lock);
456 return 0;
457}
458
459static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
460 struct timespec *timespec, struct pstore_info *psi)
461{
462 efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
463 struct efivars *efivars = psi->data;
464 char name[DUMP_NAME_LEN];
465 int i;
466 unsigned int part, size;
467 unsigned long time;
468
469 while (&efivars->walk_entry->list != &efivars->list) {
470 if (!efi_guidcmp(efivars->walk_entry->var.VendorGuid,
471 vendor)) {
472 for (i = 0; i < DUMP_NAME_LEN; i++) {
473 name[i] = efivars->walk_entry->var.VariableName[i];
474 }
475 if (sscanf(name, "dump-type%u-%u-%lu", type, &part, &time) == 3) {
476 *id = part;
477 timespec->tv_sec = time;
478 timespec->tv_nsec = 0;
479 get_var_data_locked(efivars, &efivars->walk_entry->var);
480 size = efivars->walk_entry->var.DataSize;
481 memcpy(psi->buf, efivars->walk_entry->var.Data, size);
482 efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
483 struct efivar_entry, list);
484 return size;
485 }
486 }
487 efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
488 struct efivar_entry, list);
489 }
490 return 0;
491}
492
493static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
494 size_t size, struct pstore_info *psi)
495{
496 char name[DUMP_NAME_LEN];
497 char stub_name[DUMP_NAME_LEN];
498 efi_char16_t efi_name[DUMP_NAME_LEN];
499 efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
500 struct efivars *efivars = psi->data;
501 struct efivar_entry *entry, *found = NULL;
502 int i;
503
504 sprintf(stub_name, "dump-type%u-%u-", type, part);
505 sprintf(name, "%s%lu", stub_name, get_seconds());
506
507 spin_lock(&efivars->lock);
508
509 for (i = 0; i < DUMP_NAME_LEN; i++)
510 efi_name[i] = stub_name[i];
511
512 /*
513 * Clean up any entries with the same name
514 */
515
516 list_for_each_entry(entry, &efivars->list, list) {
517 get_var_data_locked(efivars, &entry->var);
518
519 if (efi_guidcmp(entry->var.VendorGuid, vendor))
520 continue;
521 if (utf16_strncmp(entry->var.VariableName, efi_name,
522 utf16_strlen(efi_name)))
523 continue;
524 /* Needs to be a prefix */
525 if (entry->var.VariableName[utf16_strlen(efi_name)] == 0)
526 continue;
527
528 /* found */
529 found = entry;
530 efivars->ops->set_variable(entry->var.VariableName,
531 &entry->var.VendorGuid,
532 PSTORE_EFI_ATTRIBUTES,
533 0, NULL);
534 }
535
536 if (found)
537 list_del(&found->list);
538
539 for (i = 0; i < DUMP_NAME_LEN; i++)
540 efi_name[i] = name[i];
541
542 efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES,
543 size, psi->buf);
544
545 spin_unlock(&efivars->lock);
546
547 if (found)
548 efivar_unregister(found);
549
550 if (size)
551 efivar_create_sysfs_entry(efivars,
552 utf16_strsize(efi_name,
553 DUMP_NAME_LEN * 2),
554 efi_name, &vendor);
555
556 return part;
557};
558
559static int efi_pstore_erase(enum pstore_type_id type, u64 id,
560 struct pstore_info *psi)
561{
562 efi_pstore_write(type, id, 0, psi);
563
564 return 0;
565}
566#else
567static int efi_pstore_open(struct pstore_info *psi)
568{
569 return 0;
570}
571
572static int efi_pstore_close(struct pstore_info *psi)
573{
574 return 0;
575}
576
577static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
578 struct timespec *time, struct pstore_info *psi)
579{
580 return -1;
581}
582
583static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
584 size_t size, struct pstore_info *psi)
585{
586 return 0;
587}
588
589static int efi_pstore_erase(enum pstore_type_id type, u64 id,
590 struct pstore_info *psi)
591{
592 return 0;
593}
594#endif
595
596static struct pstore_info efi_pstore_info = {
597 .owner = THIS_MODULE,
598 .name = "efi",
599 .open = efi_pstore_open,
600 .close = efi_pstore_close,
601 .read = efi_pstore_read,
602 .write = efi_pstore_write,
603 .erase = efi_pstore_erase,
604};
396 605
397static ssize_t efivar_create(struct file *filp, struct kobject *kobj, 606static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
398 struct bin_attribute *bin_attr, 607 struct bin_attribute *bin_attr,
@@ -414,8 +623,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
414 * Does this variable already exist? 623 * Does this variable already exist?
415 */ 624 */
416 list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { 625 list_for_each_entry_safe(search_efivar, n, &efivars->list, list) {
417 strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); 626 strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024);
418 strsize2 = utf8_strsize(new_var->VariableName, 1024); 627 strsize2 = utf16_strsize(new_var->VariableName, 1024);
419 if (strsize1 == strsize2 && 628 if (strsize1 == strsize2 &&
420 !memcmp(&(search_efivar->var.VariableName), 629 !memcmp(&(search_efivar->var.VariableName),
421 new_var->VariableName, strsize1) && 630 new_var->VariableName, strsize1) &&
@@ -447,8 +656,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
447 656
448 /* Create the entry in sysfs. Locking is not required here */ 657 /* Create the entry in sysfs. Locking is not required here */
449 status = efivar_create_sysfs_entry(efivars, 658 status = efivar_create_sysfs_entry(efivars,
450 utf8_strsize(new_var->VariableName, 659 utf16_strsize(new_var->VariableName,
451 1024), 660 1024),
452 new_var->VariableName, 661 new_var->VariableName,
453 &new_var->VendorGuid); 662 &new_var->VendorGuid);
454 if (status) { 663 if (status) {
@@ -477,8 +686,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
477 * Does this variable already exist? 686 * Does this variable already exist?
478 */ 687 */
479 list_for_each_entry_safe(search_efivar, n, &efivars->list, list) { 688 list_for_each_entry_safe(search_efivar, n, &efivars->list, list) {
480 strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024); 689 strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024);
481 strsize2 = utf8_strsize(del_var->VariableName, 1024); 690 strsize2 = utf16_strsize(del_var->VariableName, 1024);
482 if (strsize1 == strsize2 && 691 if (strsize1 == strsize2 &&
483 !memcmp(&(search_efivar->var.VariableName), 692 !memcmp(&(search_efivar->var.VariableName),
484 del_var->VariableName, strsize1) && 693 del_var->VariableName, strsize1) &&
@@ -763,6 +972,16 @@ int register_efivars(struct efivars *efivars,
763 if (error) 972 if (error)
764 unregister_efivars(efivars); 973 unregister_efivars(efivars);
765 974
975 efivars->efi_pstore_info = efi_pstore_info;
976
977 efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL);
978 if (efivars->efi_pstore_info.buf) {
979 efivars->efi_pstore_info.bufsize = 1024;
980 efivars->efi_pstore_info.data = efivars;
981 mutex_init(&efivars->efi_pstore_info.buf_mutex);
982 pstore_register(&efivars->efi_pstore_info);
983 }
984
766out: 985out:
767 kfree(variable_name); 986 kfree(variable_name);
768 987
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 9d8c892d07c..9d2668a5087 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -90,7 +90,6 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
90 struct drm_device *dev = minor->dev; 90 struct drm_device *dev = minor->dev;
91 struct dentry *ent; 91 struct dentry *ent;
92 struct drm_info_node *tmp; 92 struct drm_info_node *tmp;
93 char name[64];
94 int i, ret; 93 int i, ret;
95 94
96 for (i = 0; i < count; i++) { 95 for (i = 0; i < count; i++) {
@@ -108,6 +107,9 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
108 ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO, 107 ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
109 root, tmp, &drm_debugfs_fops); 108 root, tmp, &drm_debugfs_fops);
110 if (!ent) { 109 if (!ent) {
110 char name[64];
111 strncpy(name, root->d_name.name,
112 min(root->d_name.len, 64U));
111 DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n", 113 DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n",
112 name, files[i].name); 114 name, files[i].name);
113 kfree(tmp); 115 kfree(tmp);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 756af4d7ec7..7425e5c9bd7 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -127,6 +127,23 @@ static const u8 edid_header[] = {
127 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 127 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00
128}; 128};
129 129
130 /*
131 * Sanity check the header of the base EDID block. Return 8 if the header
132 * is perfect, down to 0 if it's totally wrong.
133 */
134int drm_edid_header_is_valid(const u8 *raw_edid)
135{
136 int i, score = 0;
137
138 for (i = 0; i < sizeof(edid_header); i++)
139 if (raw_edid[i] == edid_header[i])
140 score++;
141
142 return score;
143}
144EXPORT_SYMBOL(drm_edid_header_is_valid);
145
146
130/* 147/*
131 * Sanity check the EDID block (base or extension). Return 0 if the block 148 * Sanity check the EDID block (base or extension). Return 0 if the block
132 * doesn't check out, or 1 if it's valid. 149 * doesn't check out, or 1 if it's valid.
@@ -139,12 +156,7 @@ drm_edid_block_valid(u8 *raw_edid)
139 struct edid *edid = (struct edid *)raw_edid; 156 struct edid *edid = (struct edid *)raw_edid;
140 157
141 if (raw_edid[0] == 0x00) { 158 if (raw_edid[0] == 0x00) {
142 int score = 0; 159 int score = drm_edid_header_is_valid(raw_edid);
143
144 for (i = 0; i < sizeof(edid_header); i++)
145 if (raw_edid[i] == edid_header[i])
146 score++;
147
148 if (score == 8) ; 160 if (score == 8) ;
149 else if (score >= 6) { 161 else if (score >= 6) {
150 DRM_DEBUG("Fixing EDID header, your hardware may be failing\n"); 162 DRM_DEBUG("Fixing EDID header, your hardware may be failing\n");
@@ -1439,6 +1451,8 @@ EXPORT_SYMBOL(drm_detect_monitor_audio);
1439static void drm_add_display_info(struct edid *edid, 1451static void drm_add_display_info(struct edid *edid,
1440 struct drm_display_info *info) 1452 struct drm_display_info *info)
1441{ 1453{
1454 u8 *edid_ext;
1455
1442 info->width_mm = edid->width_cm * 10; 1456 info->width_mm = edid->width_cm * 10;
1443 info->height_mm = edid->height_cm * 10; 1457 info->height_mm = edid->height_cm * 10;
1444 1458
@@ -1483,6 +1497,13 @@ static void drm_add_display_info(struct edid *edid,
1483 info->color_formats = DRM_COLOR_FORMAT_YCRCB444; 1497 info->color_formats = DRM_COLOR_FORMAT_YCRCB444;
1484 if (info->color_formats & DRM_EDID_FEATURE_RGB_YCRCB422) 1498 if (info->color_formats & DRM_EDID_FEATURE_RGB_YCRCB422)
1485 info->color_formats = DRM_COLOR_FORMAT_YCRCB422; 1499 info->color_formats = DRM_COLOR_FORMAT_YCRCB422;
1500
1501 /* Get data from CEA blocks if present */
1502 edid_ext = drm_find_cea_extension(edid);
1503 if (!edid_ext)
1504 return;
1505
1506 info->cea_rev = edid_ext[1];
1486} 1507}
1487 1508
1488/** 1509/**
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 2022a5c966b..3830e9e478c 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -291,11 +291,14 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state)
291 if (!dev->irq_enabled) 291 if (!dev->irq_enabled)
292 return; 292 return;
293 293
294 if (state) 294 if (state) {
295 dev->driver->irq_uninstall(dev); 295 if (dev->driver->irq_uninstall)
296 else { 296 dev->driver->irq_uninstall(dev);
297 dev->driver->irq_preinstall(dev); 297 } else {
298 dev->driver->irq_postinstall(dev); 298 if (dev->driver->irq_preinstall)
299 dev->driver->irq_preinstall(dev);
300 if (dev->driver->irq_postinstall)
301 dev->driver->irq_postinstall(dev);
299 } 302 }
300} 303}
301 304
@@ -338,7 +341,8 @@ int drm_irq_install(struct drm_device *dev)
338 DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev)); 341 DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
339 342
340 /* Before installing handler */ 343 /* Before installing handler */
341 dev->driver->irq_preinstall(dev); 344 if (dev->driver->irq_preinstall)
345 dev->driver->irq_preinstall(dev);
342 346
343 /* Install handler */ 347 /* Install handler */
344 if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED)) 348 if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
@@ -363,11 +367,16 @@ int drm_irq_install(struct drm_device *dev)
363 vga_client_register(dev->pdev, (void *)dev, drm_irq_vgaarb_nokms, NULL); 367 vga_client_register(dev->pdev, (void *)dev, drm_irq_vgaarb_nokms, NULL);
364 368
365 /* After installing handler */ 369 /* After installing handler */
366 ret = dev->driver->irq_postinstall(dev); 370 if (dev->driver->irq_postinstall)
371 ret = dev->driver->irq_postinstall(dev);
372
367 if (ret < 0) { 373 if (ret < 0) {
368 mutex_lock(&dev->struct_mutex); 374 mutex_lock(&dev->struct_mutex);
369 dev->irq_enabled = 0; 375 dev->irq_enabled = 0;
370 mutex_unlock(&dev->struct_mutex); 376 mutex_unlock(&dev->struct_mutex);
377 if (!drm_core_check_feature(dev, DRIVER_MODESET))
378 vga_client_register(dev->pdev, NULL, NULL, NULL);
379 free_irq(drm_dev_to_irq(dev), dev);
371 } 380 }
372 381
373 return ret; 382 return ret;
@@ -413,7 +422,8 @@ int drm_irq_uninstall(struct drm_device *dev)
413 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 422 if (!drm_core_check_feature(dev, DRIVER_MODESET))
414 vga_client_register(dev->pdev, NULL, NULL, NULL); 423 vga_client_register(dev->pdev, NULL, NULL, NULL);
415 424
416 dev->driver->irq_uninstall(dev); 425 if (dev->driver->irq_uninstall)
426 dev->driver->irq_uninstall(dev);
417 427
418 free_irq(drm_dev_to_irq(dev), dev); 428 free_irq(drm_dev_to_irq(dev), dev);
419 429
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e2662497d50..a8ab6263e0d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1338,6 +1338,155 @@ static const struct file_operations i915_wedged_fops = {
1338 .llseek = default_llseek, 1338 .llseek = default_llseek,
1339}; 1339};
1340 1340
1341static int
1342i915_max_freq_open(struct inode *inode,
1343 struct file *filp)
1344{
1345 filp->private_data = inode->i_private;
1346 return 0;
1347}
1348
1349static ssize_t
1350i915_max_freq_read(struct file *filp,
1351 char __user *ubuf,
1352 size_t max,
1353 loff_t *ppos)
1354{
1355 struct drm_device *dev = filp->private_data;
1356 drm_i915_private_t *dev_priv = dev->dev_private;
1357 char buf[80];
1358 int len;
1359
1360 len = snprintf(buf, sizeof (buf),
1361 "max freq: %d\n", dev_priv->max_delay * 50);
1362
1363 if (len > sizeof (buf))
1364 len = sizeof (buf);
1365
1366 return simple_read_from_buffer(ubuf, max, ppos, buf, len);
1367}
1368
1369static ssize_t
1370i915_max_freq_write(struct file *filp,
1371 const char __user *ubuf,
1372 size_t cnt,
1373 loff_t *ppos)
1374{
1375 struct drm_device *dev = filp->private_data;
1376 struct drm_i915_private *dev_priv = dev->dev_private;
1377 char buf[20];
1378 int val = 1;
1379
1380 if (cnt > 0) {
1381 if (cnt > sizeof (buf) - 1)
1382 return -EINVAL;
1383
1384 if (copy_from_user(buf, ubuf, cnt))
1385 return -EFAULT;
1386 buf[cnt] = 0;
1387
1388 val = simple_strtoul(buf, NULL, 0);
1389 }
1390
1391 DRM_DEBUG_DRIVER("Manually setting max freq to %d\n", val);
1392
1393 /*
1394 * Turbo will still be enabled, but won't go above the set value.
1395 */
1396 dev_priv->max_delay = val / 50;
1397
1398 gen6_set_rps(dev, val / 50);
1399
1400 return cnt;
1401}
1402
1403static const struct file_operations i915_max_freq_fops = {
1404 .owner = THIS_MODULE,
1405 .open = i915_max_freq_open,
1406 .read = i915_max_freq_read,
1407 .write = i915_max_freq_write,
1408 .llseek = default_llseek,
1409};
1410
1411static int
1412i915_cache_sharing_open(struct inode *inode,
1413 struct file *filp)
1414{
1415 filp->private_data = inode->i_private;
1416 return 0;
1417}
1418
1419static ssize_t
1420i915_cache_sharing_read(struct file *filp,
1421 char __user *ubuf,
1422 size_t max,
1423 loff_t *ppos)
1424{
1425 struct drm_device *dev = filp->private_data;
1426 drm_i915_private_t *dev_priv = dev->dev_private;
1427 char buf[80];
1428 u32 snpcr;
1429 int len;
1430
1431 mutex_lock(&dev_priv->dev->struct_mutex);
1432 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
1433 mutex_unlock(&dev_priv->dev->struct_mutex);
1434
1435 len = snprintf(buf, sizeof (buf),
1436 "%d\n", (snpcr & GEN6_MBC_SNPCR_MASK) >>
1437 GEN6_MBC_SNPCR_SHIFT);
1438
1439 if (len > sizeof (buf))
1440 len = sizeof (buf);
1441
1442 return simple_read_from_buffer(ubuf, max, ppos, buf, len);
1443}
1444
1445static ssize_t
1446i915_cache_sharing_write(struct file *filp,
1447 const char __user *ubuf,
1448 size_t cnt,
1449 loff_t *ppos)
1450{
1451 struct drm_device *dev = filp->private_data;
1452 struct drm_i915_private *dev_priv = dev->dev_private;
1453 char buf[20];
1454 u32 snpcr;
1455 int val = 1;
1456
1457 if (cnt > 0) {
1458 if (cnt > sizeof (buf) - 1)
1459 return -EINVAL;
1460
1461 if (copy_from_user(buf, ubuf, cnt))
1462 return -EFAULT;
1463 buf[cnt] = 0;
1464
1465 val = simple_strtoul(buf, NULL, 0);
1466 }
1467
1468 if (val < 0 || val > 3)
1469 return -EINVAL;
1470
1471 DRM_DEBUG_DRIVER("Manually setting uncore sharing to %d\n", val);
1472
1473 /* Update the cache sharing policy here as well */
1474 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
1475 snpcr &= ~GEN6_MBC_SNPCR_MASK;
1476 snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
1477 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
1478
1479 return cnt;
1480}
1481
1482static const struct file_operations i915_cache_sharing_fops = {
1483 .owner = THIS_MODULE,
1484 .open = i915_cache_sharing_open,
1485 .read = i915_cache_sharing_read,
1486 .write = i915_cache_sharing_write,
1487 .llseek = default_llseek,
1488};
1489
1341/* As the drm_debugfs_init() routines are called before dev->dev_private is 1490/* As the drm_debugfs_init() routines are called before dev->dev_private is
1342 * allocated we need to hook into the minor for release. */ 1491 * allocated we need to hook into the minor for release. */
1343static int 1492static int
@@ -1437,6 +1586,36 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
1437 return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops); 1586 return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops);
1438} 1587}
1439 1588
1589static int i915_max_freq_create(struct dentry *root, struct drm_minor *minor)
1590{
1591 struct drm_device *dev = minor->dev;
1592 struct dentry *ent;
1593
1594 ent = debugfs_create_file("i915_max_freq",
1595 S_IRUGO | S_IWUSR,
1596 root, dev,
1597 &i915_max_freq_fops);
1598 if (IS_ERR(ent))
1599 return PTR_ERR(ent);
1600
1601 return drm_add_fake_info_node(minor, ent, &i915_max_freq_fops);
1602}
1603
1604static int i915_cache_sharing_create(struct dentry *root, struct drm_minor *minor)
1605{
1606 struct drm_device *dev = minor->dev;
1607 struct dentry *ent;
1608
1609 ent = debugfs_create_file("i915_cache_sharing",
1610 S_IRUGO | S_IWUSR,
1611 root, dev,
1612 &i915_cache_sharing_fops);
1613 if (IS_ERR(ent))
1614 return PTR_ERR(ent);
1615
1616 return drm_add_fake_info_node(minor, ent, &i915_cache_sharing_fops);
1617}
1618
1440static struct drm_info_list i915_debugfs_list[] = { 1619static struct drm_info_list i915_debugfs_list[] = {
1441 {"i915_capabilities", i915_capabilities, 0}, 1620 {"i915_capabilities", i915_capabilities, 0},
1442 {"i915_gem_objects", i915_gem_object_info, 0}, 1621 {"i915_gem_objects", i915_gem_object_info, 0},
@@ -1490,6 +1669,12 @@ int i915_debugfs_init(struct drm_minor *minor)
1490 ret = i915_forcewake_create(minor->debugfs_root, minor); 1669 ret = i915_forcewake_create(minor->debugfs_root, minor);
1491 if (ret) 1670 if (ret)
1492 return ret; 1671 return ret;
1672 ret = i915_max_freq_create(minor->debugfs_root, minor);
1673 if (ret)
1674 return ret;
1675 ret = i915_cache_sharing_create(minor->debugfs_root, minor);
1676 if (ret)
1677 return ret;
1493 1678
1494 return drm_debugfs_create_files(i915_debugfs_list, 1679 return drm_debugfs_create_files(i915_debugfs_list,
1495 I915_DEBUGFS_ENTRIES, 1680 I915_DEBUGFS_ENTRIES,
@@ -1504,6 +1689,10 @@ void i915_debugfs_cleanup(struct drm_minor *minor)
1504 1, minor); 1689 1, minor);
1505 drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops, 1690 drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
1506 1, minor); 1691 1, minor);
1692 drm_debugfs_remove_files((struct drm_info_list *) &i915_max_freq_fops,
1693 1, minor);
1694 drm_debugfs_remove_files((struct drm_info_list *) &i915_cache_sharing_fops,
1695 1, minor);
1507} 1696}
1508 1697
1509#endif /* CONFIG_DEBUG_FS */ 1698#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 12712824a6d..8a3942c4f09 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -61,7 +61,6 @@ static void i915_write_hws_pga(struct drm_device *dev)
61static int i915_init_phys_hws(struct drm_device *dev) 61static int i915_init_phys_hws(struct drm_device *dev)
62{ 62{
63 drm_i915_private_t *dev_priv = dev->dev_private; 63 drm_i915_private_t *dev_priv = dev->dev_private;
64 struct intel_ring_buffer *ring = LP_RING(dev_priv);
65 64
66 /* Program Hardware Status Page */ 65 /* Program Hardware Status Page */
67 dev_priv->status_page_dmah = 66 dev_priv->status_page_dmah =
@@ -71,10 +70,9 @@ static int i915_init_phys_hws(struct drm_device *dev)
71 DRM_ERROR("Can not allocate hardware status page\n"); 70 DRM_ERROR("Can not allocate hardware status page\n");
72 return -ENOMEM; 71 return -ENOMEM;
73 } 72 }
74 ring->status_page.page_addr =
75 (void __force __iomem *)dev_priv->status_page_dmah->vaddr;
76 73
77 memset_io(ring->status_page.page_addr, 0, PAGE_SIZE); 74 memset_io((void __force __iomem *)dev_priv->status_page_dmah->vaddr,
75 0, PAGE_SIZE);
78 76
79 i915_write_hws_pga(dev); 77 i915_write_hws_pga(dev);
80 78
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6867e193d85..feb4f164fd1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -544,6 +544,7 @@ typedef struct drm_i915_private {
544 u32 savePIPEB_LINK_M1; 544 u32 savePIPEB_LINK_M1;
545 u32 savePIPEB_LINK_N1; 545 u32 savePIPEB_LINK_N1;
546 u32 saveMCHBAR_RENDER_STANDBY; 546 u32 saveMCHBAR_RENDER_STANDBY;
547 u32 savePCH_PORT_HOTPLUG;
547 548
548 struct { 549 struct {
549 /** Bridge to intel-gtt-ko */ 550 /** Bridge to intel-gtt-ko */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d1cd8b89f47..a546a71fb06 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3112,7 +3112,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3112 3112
3113 if (pipelined != obj->ring) { 3113 if (pipelined != obj->ring) {
3114 ret = i915_gem_object_wait_rendering(obj); 3114 ret = i915_gem_object_wait_rendering(obj);
3115 if (ret) 3115 if (ret == -ERESTARTSYS)
3116 return ret; 3116 return ret;
3117 } 3117 }
3118 3118
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 23d1ae67d27..02f96fd0d52 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -306,12 +306,15 @@ static void i915_hotplug_work_func(struct work_struct *work)
306 struct drm_mode_config *mode_config = &dev->mode_config; 306 struct drm_mode_config *mode_config = &dev->mode_config;
307 struct intel_encoder *encoder; 307 struct intel_encoder *encoder;
308 308
309 mutex_lock(&mode_config->mutex);
309 DRM_DEBUG_KMS("running encoder hotplug functions\n"); 310 DRM_DEBUG_KMS("running encoder hotplug functions\n");
310 311
311 list_for_each_entry(encoder, &mode_config->encoder_list, base.head) 312 list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
312 if (encoder->hot_plug) 313 if (encoder->hot_plug)
313 encoder->hot_plug(encoder); 314 encoder->hot_plug(encoder);
314 315
316 mutex_unlock(&mode_config->mutex);
317
315 /* Just fire off a uevent and let userspace tell us what to do */ 318 /* Just fire off a uevent and let userspace tell us what to do */
316 drm_helper_hpd_irq_event(dev); 319 drm_helper_hpd_irq_event(dev);
317} 320}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 02db299f621..d1331f771e2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -78,6 +78,14 @@
78#define GRDOM_RENDER (1<<2) 78#define GRDOM_RENDER (1<<2)
79#define GRDOM_MEDIA (3<<2) 79#define GRDOM_MEDIA (3<<2)
80 80
81#define GEN6_MBCUNIT_SNPCR 0x900c /* for LLC config */
82#define GEN6_MBC_SNPCR_SHIFT 21
83#define GEN6_MBC_SNPCR_MASK (3<<21)
84#define GEN6_MBC_SNPCR_MAX (0<<21)
85#define GEN6_MBC_SNPCR_MED (1<<21)
86#define GEN6_MBC_SNPCR_LOW (2<<21)
87#define GEN6_MBC_SNPCR_MIN (3<<21) /* only 1/16th of the cache is shared */
88
81#define GEN6_GDRST 0x941c 89#define GEN6_GDRST 0x941c
82#define GEN6_GRDOM_FULL (1 << 0) 90#define GEN6_GRDOM_FULL (1 << 0)
83#define GEN6_GRDOM_RENDER (1 << 1) 91#define GEN6_GRDOM_RENDER (1 << 1)
@@ -1506,6 +1514,7 @@
1506#define VIDEO_DIP_SELECT_AVI (0 << 19) 1514#define VIDEO_DIP_SELECT_AVI (0 << 19)
1507#define VIDEO_DIP_SELECT_VENDOR (1 << 19) 1515#define VIDEO_DIP_SELECT_VENDOR (1 << 19)
1508#define VIDEO_DIP_SELECT_SPD (3 << 19) 1516#define VIDEO_DIP_SELECT_SPD (3 << 19)
1517#define VIDEO_DIP_SELECT_MASK (3 << 19)
1509#define VIDEO_DIP_FREQ_ONCE (0 << 16) 1518#define VIDEO_DIP_FREQ_ONCE (0 << 16)
1510#define VIDEO_DIP_FREQ_VSYNC (1 << 16) 1519#define VIDEO_DIP_FREQ_VSYNC (1 << 16)
1511#define VIDEO_DIP_FREQ_2VSYNC (2 << 16) 1520#define VIDEO_DIP_FREQ_2VSYNC (2 << 16)
@@ -2084,9 +2093,6 @@
2084#define DP_PIPEB_SELECT (1 << 30) 2093#define DP_PIPEB_SELECT (1 << 30)
2085#define DP_PIPE_MASK (1 << 30) 2094#define DP_PIPE_MASK (1 << 30)
2086 2095
2087#define DP_PIPE_ENABLED(V, P) \
2088 (((V) & (DP_PIPE_MASK | DP_PORT_EN)) == ((P) << 30 | DP_PORT_EN))
2089
2090/* Link training mode - select a suitable mode for each stage */ 2096/* Link training mode - select a suitable mode for each stage */
2091#define DP_LINK_TRAIN_PAT_1 (0 << 28) 2097#define DP_LINK_TRAIN_PAT_1 (0 << 28)
2092#define DP_LINK_TRAIN_PAT_2 (1 << 28) 2098#define DP_LINK_TRAIN_PAT_2 (1 << 28)
@@ -3024,6 +3030,20 @@
3024#define _TRANSA_DP_LINK_M2 0xe0048 3030#define _TRANSA_DP_LINK_M2 0xe0048
3025#define _TRANSA_DP_LINK_N2 0xe004c 3031#define _TRANSA_DP_LINK_N2 0xe004c
3026 3032
3033/* Per-transcoder DIP controls */
3034
3035#define _VIDEO_DIP_CTL_A 0xe0200
3036#define _VIDEO_DIP_DATA_A 0xe0208
3037#define _VIDEO_DIP_GCP_A 0xe0210
3038
3039#define _VIDEO_DIP_CTL_B 0xe1200
3040#define _VIDEO_DIP_DATA_B 0xe1208
3041#define _VIDEO_DIP_GCP_B 0xe1210
3042
3043#define TVIDEO_DIP_CTL(pipe) _PIPE(pipe, _VIDEO_DIP_CTL_A, _VIDEO_DIP_CTL_B)
3044#define TVIDEO_DIP_DATA(pipe) _PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B)
3045#define TVIDEO_DIP_GCP(pipe) _PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B)
3046
3027#define _TRANS_HTOTAL_B 0xe1000 3047#define _TRANS_HTOTAL_B 0xe1000
3028#define _TRANS_HBLANK_B 0xe1004 3048#define _TRANS_HBLANK_B 0xe1004
3029#define _TRANS_HSYNC_B 0xe1008 3049#define _TRANS_HSYNC_B 0xe1008
@@ -3076,6 +3096,16 @@
3076#define TRANS_6BPC (2<<5) 3096#define TRANS_6BPC (2<<5)
3077#define TRANS_12BPC (3<<5) 3097#define TRANS_12BPC (3<<5)
3078 3098
3099#define _TRANSA_CHICKEN2 0xf0064
3100#define _TRANSB_CHICKEN2 0xf1064
3101#define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
3102#define TRANS_AUTOTRAIN_GEN_STALL_DIS (1<<31)
3103
3104#define SOUTH_CHICKEN1 0xc2000
3105#define FDIA_PHASE_SYNC_SHIFT_OVR 19
3106#define FDIA_PHASE_SYNC_SHIFT_EN 18
3107#define FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2)))
3108#define FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2)))
3079#define SOUTH_CHICKEN2 0xc2004 3109#define SOUTH_CHICKEN2 0xc2004
3080#define DPLS_EDP_PPS_FIX_DIS (1<<0) 3110#define DPLS_EDP_PPS_FIX_DIS (1<<0)
3081 3111
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 285758603ac..87677d60d0d 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -812,6 +812,7 @@ int i915_save_state(struct drm_device *dev)
812 dev_priv->saveFDI_RXB_IMR = I915_READ(_FDI_RXB_IMR); 812 dev_priv->saveFDI_RXB_IMR = I915_READ(_FDI_RXB_IMR);
813 dev_priv->saveMCHBAR_RENDER_STANDBY = 813 dev_priv->saveMCHBAR_RENDER_STANDBY =
814 I915_READ(RSTDBYCTL); 814 I915_READ(RSTDBYCTL);
815 dev_priv->savePCH_PORT_HOTPLUG = I915_READ(PCH_PORT_HOTPLUG);
815 } else { 816 } else {
816 dev_priv->saveIER = I915_READ(IER); 817 dev_priv->saveIER = I915_READ(IER);
817 dev_priv->saveIMR = I915_READ(IMR); 818 dev_priv->saveIMR = I915_READ(IMR);
@@ -863,6 +864,7 @@ int i915_restore_state(struct drm_device *dev)
863 I915_WRITE(GTIMR, dev_priv->saveGTIMR); 864 I915_WRITE(GTIMR, dev_priv->saveGTIMR);
864 I915_WRITE(_FDI_RXA_IMR, dev_priv->saveFDI_RXA_IMR); 865 I915_WRITE(_FDI_RXA_IMR, dev_priv->saveFDI_RXA_IMR);
865 I915_WRITE(_FDI_RXB_IMR, dev_priv->saveFDI_RXB_IMR); 866 I915_WRITE(_FDI_RXB_IMR, dev_priv->saveFDI_RXB_IMR);
867 I915_WRITE(PCH_PORT_HOTPLUG, dev_priv->savePCH_PORT_HOTPLUG);
866 } else { 868 } else {
867 I915_WRITE(IER, dev_priv->saveIER); 869 I915_WRITE(IER, dev_priv->saveIER);
868 I915_WRITE(IMR, dev_priv->saveIMR); 870 I915_WRITE(IMR, dev_priv->saveIMR);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 393a39922e5..35364e68a09 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -980,11 +980,29 @@ static void assert_transcoder_disabled(struct drm_i915_private *dev_priv,
980 pipe_name(pipe)); 980 pipe_name(pipe));
981} 981}
982 982
983static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe,
984 int reg, u32 port_sel, u32 val)
985{
986 if ((val & DP_PORT_EN) == 0)
987 return false;
988
989 if (HAS_PCH_CPT(dev_priv->dev)) {
990 u32 trans_dp_ctl_reg = TRANS_DP_CTL(pipe);
991 u32 trans_dp_ctl = I915_READ(trans_dp_ctl_reg);
992 if ((trans_dp_ctl & TRANS_DP_PORT_SEL_MASK) != port_sel)
993 return false;
994 } else {
995 if ((val & DP_PIPE_MASK) != (pipe << 30))
996 return false;
997 }
998 return true;
999}
1000
983static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, 1001static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv,
984 enum pipe pipe, int reg) 1002 enum pipe pipe, int reg, u32 port_sel)
985{ 1003{
986 u32 val = I915_READ(reg); 1004 u32 val = I915_READ(reg);
987 WARN(DP_PIPE_ENABLED(val, pipe), 1005 WARN(dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val),
988 "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", 1006 "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
989 reg, pipe_name(pipe)); 1007 reg, pipe_name(pipe));
990} 1008}
@@ -1004,9 +1022,9 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv,
1004 int reg; 1022 int reg;
1005 u32 val; 1023 u32 val;
1006 1024
1007 assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_B); 1025 assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_B, TRANS_DP_PORT_SEL_B);
1008 assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_C); 1026 assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_C, TRANS_DP_PORT_SEL_C);
1009 assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_D); 1027 assert_pch_dp_disabled(dev_priv, pipe, PCH_DP_D, TRANS_DP_PORT_SEL_D);
1010 1028
1011 reg = PCH_ADPA; 1029 reg = PCH_ADPA;
1012 val = I915_READ(reg); 1030 val = I915_READ(reg);
@@ -1276,6 +1294,17 @@ static void intel_disable_pipe(struct drm_i915_private *dev_priv,
1276 intel_wait_for_pipe_off(dev_priv->dev, pipe); 1294 intel_wait_for_pipe_off(dev_priv->dev, pipe);
1277} 1295}
1278 1296
1297/*
1298 * Plane regs are double buffered, going from enabled->disabled needs a
1299 * trigger in order to latch. The display address reg provides this.
1300 */
1301static void intel_flush_display_plane(struct drm_i915_private *dev_priv,
1302 enum plane plane)
1303{
1304 I915_WRITE(DSPADDR(plane), I915_READ(DSPADDR(plane)));
1305 I915_WRITE(DSPSURF(plane), I915_READ(DSPSURF(plane)));
1306}
1307
1279/** 1308/**
1280 * intel_enable_plane - enable a display plane on a given pipe 1309 * intel_enable_plane - enable a display plane on a given pipe
1281 * @dev_priv: i915 private structure 1310 * @dev_priv: i915 private structure
@@ -1299,20 +1328,10 @@ static void intel_enable_plane(struct drm_i915_private *dev_priv,
1299 return; 1328 return;
1300 1329
1301 I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); 1330 I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
1331 intel_flush_display_plane(dev_priv, plane);
1302 intel_wait_for_vblank(dev_priv->dev, pipe); 1332 intel_wait_for_vblank(dev_priv->dev, pipe);
1303} 1333}
1304 1334
1305/*
1306 * Plane regs are double buffered, going from enabled->disabled needs a
1307 * trigger in order to latch. The display address reg provides this.
1308 */
1309static void intel_flush_display_plane(struct drm_i915_private *dev_priv,
1310 enum plane plane)
1311{
1312 u32 reg = DSPADDR(plane);
1313 I915_WRITE(reg, I915_READ(reg));
1314}
1315
1316/** 1335/**
1317 * intel_disable_plane - disable a display plane 1336 * intel_disable_plane - disable a display plane
1318 * @dev_priv: i915 private structure 1337 * @dev_priv: i915 private structure
@@ -1338,19 +1357,24 @@ static void intel_disable_plane(struct drm_i915_private *dev_priv,
1338} 1357}
1339 1358
1340static void disable_pch_dp(struct drm_i915_private *dev_priv, 1359static void disable_pch_dp(struct drm_i915_private *dev_priv,
1341 enum pipe pipe, int reg) 1360 enum pipe pipe, int reg, u32 port_sel)
1342{ 1361{
1343 u32 val = I915_READ(reg); 1362 u32 val = I915_READ(reg);
1344 if (DP_PIPE_ENABLED(val, pipe)) 1363 if (dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val)) {
1364 DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe);
1345 I915_WRITE(reg, val & ~DP_PORT_EN); 1365 I915_WRITE(reg, val & ~DP_PORT_EN);
1366 }
1346} 1367}
1347 1368
1348static void disable_pch_hdmi(struct drm_i915_private *dev_priv, 1369static void disable_pch_hdmi(struct drm_i915_private *dev_priv,
1349 enum pipe pipe, int reg) 1370 enum pipe pipe, int reg)
1350{ 1371{
1351 u32 val = I915_READ(reg); 1372 u32 val = I915_READ(reg);
1352 if (HDMI_PIPE_ENABLED(val, pipe)) 1373 if (HDMI_PIPE_ENABLED(val, pipe)) {
1374 DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n",
1375 reg, pipe);
1353 I915_WRITE(reg, val & ~PORT_ENABLE); 1376 I915_WRITE(reg, val & ~PORT_ENABLE);
1377 }
1354} 1378}
1355 1379
1356/* Disable any ports connected to this transcoder */ 1380/* Disable any ports connected to this transcoder */
@@ -1362,9 +1386,9 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv,
1362 val = I915_READ(PCH_PP_CONTROL); 1386 val = I915_READ(PCH_PP_CONTROL);
1363 I915_WRITE(PCH_PP_CONTROL, val | PANEL_UNLOCK_REGS); 1387 I915_WRITE(PCH_PP_CONTROL, val | PANEL_UNLOCK_REGS);
1364 1388
1365 disable_pch_dp(dev_priv, pipe, PCH_DP_B); 1389 disable_pch_dp(dev_priv, pipe, PCH_DP_B, TRANS_DP_PORT_SEL_B);
1366 disable_pch_dp(dev_priv, pipe, PCH_DP_C); 1390 disable_pch_dp(dev_priv, pipe, PCH_DP_C, TRANS_DP_PORT_SEL_C);
1367 disable_pch_dp(dev_priv, pipe, PCH_DP_D); 1391 disable_pch_dp(dev_priv, pipe, PCH_DP_D, TRANS_DP_PORT_SEL_D);
1368 1392
1369 reg = PCH_ADPA; 1393 reg = PCH_ADPA;
1370 val = I915_READ(reg); 1394 val = I915_READ(reg);
@@ -2096,7 +2120,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
2096 2120
2097 /* no fb bound */ 2121 /* no fb bound */
2098 if (!crtc->fb) { 2122 if (!crtc->fb) {
2099 DRM_DEBUG_KMS("No FB bound\n"); 2123 DRM_ERROR("No FB bound\n");
2100 return 0; 2124 return 0;
2101 } 2125 }
2102 2126
@@ -2105,6 +2129,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
2105 case 1: 2129 case 1:
2106 break; 2130 break;
2107 default: 2131 default:
2132 DRM_ERROR("no plane for crtc\n");
2108 return -EINVAL; 2133 return -EINVAL;
2109 } 2134 }
2110 2135
@@ -2114,6 +2139,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
2114 NULL); 2139 NULL);
2115 if (ret != 0) { 2140 if (ret != 0) {
2116 mutex_unlock(&dev->struct_mutex); 2141 mutex_unlock(&dev->struct_mutex);
2142 DRM_ERROR("pin & fence failed\n");
2117 return ret; 2143 return ret;
2118 } 2144 }
2119 2145
@@ -2142,6 +2168,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
2142 if (ret) { 2168 if (ret) {
2143 i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); 2169 i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
2144 mutex_unlock(&dev->struct_mutex); 2170 mutex_unlock(&dev->struct_mutex);
2171 DRM_ERROR("failed to update base address\n");
2145 return ret; 2172 return ret;
2146 } 2173 }
2147 2174
@@ -2248,6 +2275,18 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc)
2248 FDI_FE_ERRC_ENABLE); 2275 FDI_FE_ERRC_ENABLE);
2249} 2276}
2250 2277
2278static void cpt_phase_pointer_enable(struct drm_device *dev, int pipe)
2279{
2280 struct drm_i915_private *dev_priv = dev->dev_private;
2281 u32 flags = I915_READ(SOUTH_CHICKEN1);
2282
2283 flags |= FDI_PHASE_SYNC_OVR(pipe);
2284 I915_WRITE(SOUTH_CHICKEN1, flags); /* once to unlock... */
2285 flags |= FDI_PHASE_SYNC_EN(pipe);
2286 I915_WRITE(SOUTH_CHICKEN1, flags); /* then again to enable */
2287 POSTING_READ(SOUTH_CHICKEN1);
2288}
2289
2251/* The FDI link training functions for ILK/Ibexpeak. */ 2290/* The FDI link training functions for ILK/Ibexpeak. */
2252static void ironlake_fdi_link_train(struct drm_crtc *crtc) 2291static void ironlake_fdi_link_train(struct drm_crtc *crtc)
2253{ 2292{
@@ -2398,6 +2437,9 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc)
2398 POSTING_READ(reg); 2437 POSTING_READ(reg);
2399 udelay(150); 2438 udelay(150);
2400 2439
2440 if (HAS_PCH_CPT(dev))
2441 cpt_phase_pointer_enable(dev, pipe);
2442
2401 for (i = 0; i < 4; i++ ) { 2443 for (i = 0; i < 4; i++ ) {
2402 reg = FDI_TX_CTL(pipe); 2444 reg = FDI_TX_CTL(pipe);
2403 temp = I915_READ(reg); 2445 temp = I915_READ(reg);
@@ -2514,6 +2556,9 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
2514 POSTING_READ(reg); 2556 POSTING_READ(reg);
2515 udelay(150); 2557 udelay(150);
2516 2558
2559 if (HAS_PCH_CPT(dev))
2560 cpt_phase_pointer_enable(dev, pipe);
2561
2517 for (i = 0; i < 4; i++ ) { 2562 for (i = 0; i < 4; i++ ) {
2518 reg = FDI_TX_CTL(pipe); 2563 reg = FDI_TX_CTL(pipe);
2519 temp = I915_READ(reg); 2564 temp = I915_READ(reg);
@@ -2623,6 +2668,17 @@ static void ironlake_fdi_pll_enable(struct drm_crtc *crtc)
2623 } 2668 }
2624} 2669}
2625 2670
2671static void cpt_phase_pointer_disable(struct drm_device *dev, int pipe)
2672{
2673 struct drm_i915_private *dev_priv = dev->dev_private;
2674 u32 flags = I915_READ(SOUTH_CHICKEN1);
2675
2676 flags &= ~(FDI_PHASE_SYNC_EN(pipe));
2677 I915_WRITE(SOUTH_CHICKEN1, flags); /* once to disable... */
2678 flags &= ~(FDI_PHASE_SYNC_OVR(pipe));
2679 I915_WRITE(SOUTH_CHICKEN1, flags); /* then again to lock */
2680 POSTING_READ(SOUTH_CHICKEN1);
2681}
2626static void ironlake_fdi_disable(struct drm_crtc *crtc) 2682static void ironlake_fdi_disable(struct drm_crtc *crtc)
2627{ 2683{
2628 struct drm_device *dev = crtc->dev; 2684 struct drm_device *dev = crtc->dev;
@@ -2652,6 +2708,8 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc)
2652 I915_WRITE(FDI_RX_CHICKEN(pipe), 2708 I915_WRITE(FDI_RX_CHICKEN(pipe),
2653 I915_READ(FDI_RX_CHICKEN(pipe) & 2709 I915_READ(FDI_RX_CHICKEN(pipe) &
2654 ~FDI_RX_PHASE_SYNC_POINTER_EN)); 2710 ~FDI_RX_PHASE_SYNC_POINTER_EN));
2711 } else if (HAS_PCH_CPT(dev)) {
2712 cpt_phase_pointer_disable(dev, pipe);
2655 } 2713 }
2656 2714
2657 /* still set train pattern 1 */ 2715 /* still set train pattern 1 */
@@ -2862,14 +2920,18 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2862 I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size); 2920 I915_WRITE(PF_WIN_SZ(pipe), dev_priv->pch_pf_size);
2863 } 2921 }
2864 2922
2923 /*
2924 * On ILK+ LUT must be loaded before the pipe is running but with
2925 * clocks enabled
2926 */
2927 intel_crtc_load_lut(crtc);
2928
2865 intel_enable_pipe(dev_priv, pipe, is_pch_port); 2929 intel_enable_pipe(dev_priv, pipe, is_pch_port);
2866 intel_enable_plane(dev_priv, plane, pipe); 2930 intel_enable_plane(dev_priv, plane, pipe);
2867 2931
2868 if (is_pch_port) 2932 if (is_pch_port)
2869 ironlake_pch_enable(crtc); 2933 ironlake_pch_enable(crtc);
2870 2934
2871 intel_crtc_load_lut(crtc);
2872
2873 mutex_lock(&dev->struct_mutex); 2935 mutex_lock(&dev->struct_mutex);
2874 intel_update_fbc(dev); 2936 intel_update_fbc(dev);
2875 mutex_unlock(&dev->struct_mutex); 2937 mutex_unlock(&dev->struct_mutex);
@@ -4538,7 +4600,9 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc,
4538 if (connector->encoder != encoder) 4600 if (connector->encoder != encoder)
4539 continue; 4601 continue;
4540 4602
4541 if (connector->display_info.bpc < display_bpc) { 4603 /* Don't use an invalid EDID bpc value */
4604 if (connector->display_info.bpc &&
4605 connector->display_info.bpc < display_bpc) {
4542 DRM_DEBUG_DRIVER("clamping display bpc (was %d) to EDID reported max of %d\n", display_bpc, connector->display_info.bpc); 4606 DRM_DEBUG_DRIVER("clamping display bpc (was %d) to EDID reported max of %d\n", display_bpc, connector->display_info.bpc);
4543 display_bpc = connector->display_info.bpc; 4607 display_bpc = connector->display_info.bpc;
4544 } 4608 }
@@ -5153,7 +5217,8 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
5153 temp |= PIPE_12BPC; 5217 temp |= PIPE_12BPC;
5154 break; 5218 break;
5155 default: 5219 default:
5156 WARN(1, "intel_choose_pipe_bpp returned invalid value\n"); 5220 WARN(1, "intel_choose_pipe_bpp returned invalid value %d\n",
5221 pipe_bpp);
5157 temp |= PIPE_8BPC; 5222 temp |= PIPE_8BPC;
5158 pipe_bpp = 24; 5223 pipe_bpp = 24;
5159 break; 5224 break;
@@ -5238,7 +5303,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
5238 } else if (is_sdvo && is_tv) 5303 } else if (is_sdvo && is_tv)
5239 factor = 20; 5304 factor = 20;
5240 5305
5241 if (clock.m1 < factor * clock.n) 5306 if (clock.m < factor * clock.n)
5242 fp |= FP_CB_TUNE; 5307 fp |= FP_CB_TUNE;
5243 5308
5244 dpll = 0; 5309 dpll = 0;
@@ -5516,6 +5581,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
5516 5581
5517 drm_vblank_post_modeset(dev, pipe); 5582 drm_vblank_post_modeset(dev, pipe);
5518 5583
5584 intel_crtc->dpms_mode = DRM_MODE_DPMS_ON;
5585
5519 return ret; 5586 return ret;
5520} 5587}
5521 5588
@@ -7714,10 +7781,12 @@ static void gen6_init_clock_gating(struct drm_device *dev)
7714 ILK_DPARB_CLK_GATE | 7781 ILK_DPARB_CLK_GATE |
7715 ILK_DPFD_CLK_GATE); 7782 ILK_DPFD_CLK_GATE);
7716 7783
7717 for_each_pipe(pipe) 7784 for_each_pipe(pipe) {
7718 I915_WRITE(DSPCNTR(pipe), 7785 I915_WRITE(DSPCNTR(pipe),
7719 I915_READ(DSPCNTR(pipe)) | 7786 I915_READ(DSPCNTR(pipe)) |
7720 DISPPLANE_TRICKLE_FEED_DISABLE); 7787 DISPPLANE_TRICKLE_FEED_DISABLE);
7788 intel_flush_display_plane(dev_priv, pipe);
7789 }
7721} 7790}
7722 7791
7723static void ivybridge_init_clock_gating(struct drm_device *dev) 7792static void ivybridge_init_clock_gating(struct drm_device *dev)
@@ -7734,10 +7803,12 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
7734 7803
7735 I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); 7804 I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
7736 7805
7737 for_each_pipe(pipe) 7806 for_each_pipe(pipe) {
7738 I915_WRITE(DSPCNTR(pipe), 7807 I915_WRITE(DSPCNTR(pipe),
7739 I915_READ(DSPCNTR(pipe)) | 7808 I915_READ(DSPCNTR(pipe)) |
7740 DISPPLANE_TRICKLE_FEED_DISABLE); 7809 DISPPLANE_TRICKLE_FEED_DISABLE);
7810 intel_flush_display_plane(dev_priv, pipe);
7811 }
7741} 7812}
7742 7813
7743static void g4x_init_clock_gating(struct drm_device *dev) 7814static void g4x_init_clock_gating(struct drm_device *dev)
@@ -7820,6 +7891,7 @@ static void ibx_init_clock_gating(struct drm_device *dev)
7820static void cpt_init_clock_gating(struct drm_device *dev) 7891static void cpt_init_clock_gating(struct drm_device *dev)
7821{ 7892{
7822 struct drm_i915_private *dev_priv = dev->dev_private; 7893 struct drm_i915_private *dev_priv = dev->dev_private;
7894 int pipe;
7823 7895
7824 /* 7896 /*
7825 * On Ibex Peak and Cougar Point, we need to disable clock 7897 * On Ibex Peak and Cougar Point, we need to disable clock
@@ -7829,6 +7901,9 @@ static void cpt_init_clock_gating(struct drm_device *dev)
7829 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 7901 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
7830 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 7902 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
7831 DPLS_EDP_PPS_FIX_DIS); 7903 DPLS_EDP_PPS_FIX_DIS);
7904 /* Without this, mode sets may fail silently on FDI */
7905 for_each_pipe(pipe)
7906 I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_AUTOTRAIN_GEN_STALL_DIS);
7832} 7907}
7833 7908
7834static void ironlake_teardown_rc6(struct drm_device *dev) 7909static void ironlake_teardown_rc6(struct drm_device *dev)
@@ -8178,6 +8253,9 @@ struct intel_quirk intel_quirks[] = {
8178 8253
8179 /* Lenovo U160 cannot use SSC on LVDS */ 8254 /* Lenovo U160 cannot use SSC on LVDS */
8180 { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, 8255 { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable },
8256
8257 /* Sony Vaio Y cannot use SSC on LVDS */
8258 { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable },
8181}; 8259};
8182 8260
8183static void intel_init_quirks(struct drm_device *dev) 8261static void intel_init_quirks(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index f797fb58ba9..0feae908bb3 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -50,9 +50,10 @@ struct intel_dp {
50 bool has_audio; 50 bool has_audio;
51 int force_audio; 51 int force_audio;
52 uint32_t color_range; 52 uint32_t color_range;
53 int dpms_mode;
53 uint8_t link_bw; 54 uint8_t link_bw;
54 uint8_t lane_count; 55 uint8_t lane_count;
55 uint8_t dpcd[4]; 56 uint8_t dpcd[8];
56 struct i2c_adapter adapter; 57 struct i2c_adapter adapter;
57 struct i2c_algo_dp_aux_data algo; 58 struct i2c_algo_dp_aux_data algo;
58 bool is_pch_edp; 59 bool is_pch_edp;
@@ -316,9 +317,17 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
316 else 317 else
317 precharge = 5; 318 precharge = 5;
318 319
319 if (I915_READ(ch_ctl) & DP_AUX_CH_CTL_SEND_BUSY) { 320 /* Try to wait for any previous AUX channel activity */
320 DRM_ERROR("dp_aux_ch not started status 0x%08x\n", 321 for (try = 0; try < 3; try++) {
321 I915_READ(ch_ctl)); 322 status = I915_READ(ch_ctl);
323 if ((status & DP_AUX_CH_CTL_SEND_BUSY) == 0)
324 break;
325 msleep(1);
326 }
327
328 if (try == 3) {
329 WARN(1, "dp_aux_ch not started status 0x%08x\n",
330 I915_READ(ch_ctl));
322 return -EBUSY; 331 return -EBUSY;
323 } 332 }
324 333
@@ -770,6 +779,7 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
770 memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE); 779 memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE);
771 intel_dp->link_configuration[0] = intel_dp->link_bw; 780 intel_dp->link_configuration[0] = intel_dp->link_bw;
772 intel_dp->link_configuration[1] = intel_dp->lane_count; 781 intel_dp->link_configuration[1] = intel_dp->lane_count;
782 intel_dp->link_configuration[8] = DP_SET_ANSI_8B10B;
773 783
774 /* 784 /*
775 * Check for DPCD version > 1.1 and enhanced framing support 785 * Check for DPCD version > 1.1 and enhanced framing support
@@ -1011,6 +1021,8 @@ static void intel_dp_commit(struct drm_encoder *encoder)
1011 1021
1012 if (is_edp(intel_dp)) 1022 if (is_edp(intel_dp))
1013 ironlake_edp_backlight_on(dev); 1023 ironlake_edp_backlight_on(dev);
1024
1025 intel_dp->dpms_mode = DRM_MODE_DPMS_ON;
1014} 1026}
1015 1027
1016static void 1028static void
@@ -1045,6 +1057,7 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode)
1045 if (is_edp(intel_dp)) 1057 if (is_edp(intel_dp))
1046 ironlake_edp_backlight_on(dev); 1058 ironlake_edp_backlight_on(dev);
1047 } 1059 }
1060 intel_dp->dpms_mode = mode;
1048} 1061}
1049 1062
1050/* 1063/*
@@ -1334,10 +1347,16 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
1334 u32 reg; 1347 u32 reg;
1335 uint32_t DP = intel_dp->DP; 1348 uint32_t DP = intel_dp->DP;
1336 1349
1337 /* Enable output, wait for it to become active */ 1350 /*
1338 I915_WRITE(intel_dp->output_reg, intel_dp->DP); 1351 * On CPT we have to enable the port in training pattern 1, which
1339 POSTING_READ(intel_dp->output_reg); 1352 * will happen below in intel_dp_set_link_train. Otherwise, enable
1340 intel_wait_for_vblank(dev, intel_crtc->pipe); 1353 * the port and wait for it to become active.
1354 */
1355 if (!HAS_PCH_CPT(dev)) {
1356 I915_WRITE(intel_dp->output_reg, intel_dp->DP);
1357 POSTING_READ(intel_dp->output_reg);
1358 intel_wait_for_vblank(dev, intel_crtc->pipe);
1359 }
1341 1360
1342 /* Write the link configuration data */ 1361 /* Write the link configuration data */
1343 intel_dp_aux_native_write(intel_dp, DP_LINK_BW_SET, 1362 intel_dp_aux_native_write(intel_dp, DP_LINK_BW_SET,
@@ -1370,7 +1389,8 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
1370 reg = DP | DP_LINK_TRAIN_PAT_1; 1389 reg = DP | DP_LINK_TRAIN_PAT_1;
1371 1390
1372 if (!intel_dp_set_link_train(intel_dp, reg, 1391 if (!intel_dp_set_link_train(intel_dp, reg,
1373 DP_TRAINING_PATTERN_1)) 1392 DP_TRAINING_PATTERN_1 |
1393 DP_LINK_SCRAMBLING_DISABLE))
1374 break; 1394 break;
1375 /* Set training pattern 1 */ 1395 /* Set training pattern 1 */
1376 1396
@@ -1445,7 +1465,8 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
1445 1465
1446 /* channel eq pattern */ 1466 /* channel eq pattern */
1447 if (!intel_dp_set_link_train(intel_dp, reg, 1467 if (!intel_dp_set_link_train(intel_dp, reg,
1448 DP_TRAINING_PATTERN_2)) 1468 DP_TRAINING_PATTERN_2 |
1469 DP_LINK_SCRAMBLING_DISABLE))
1449 break; 1470 break;
1450 1471
1451 udelay(400); 1472 udelay(400);
@@ -1559,6 +1580,18 @@ intel_dp_link_down(struct intel_dp *intel_dp)
1559 POSTING_READ(intel_dp->output_reg); 1580 POSTING_READ(intel_dp->output_reg);
1560} 1581}
1561 1582
1583static bool
1584intel_dp_get_dpcd(struct intel_dp *intel_dp)
1585{
1586 if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd,
1587 sizeof (intel_dp->dpcd)) &&
1588 (intel_dp->dpcd[DP_DPCD_REV] != 0)) {
1589 return true;
1590 }
1591
1592 return false;
1593}
1594
1562/* 1595/*
1563 * According to DP spec 1596 * According to DP spec
1564 * 5.1.2: 1597 * 5.1.2:
@@ -1571,36 +1604,44 @@ intel_dp_link_down(struct intel_dp *intel_dp)
1571static void 1604static void
1572intel_dp_check_link_status(struct intel_dp *intel_dp) 1605intel_dp_check_link_status(struct intel_dp *intel_dp)
1573{ 1606{
1574 int ret; 1607 if (intel_dp->dpms_mode != DRM_MODE_DPMS_ON)
1608 return;
1575 1609
1576 if (!intel_dp->base.base.crtc) 1610 if (!intel_dp->base.base.crtc)
1577 return; 1611 return;
1578 1612
1613 /* Try to read receiver status if the link appears to be up */
1579 if (!intel_dp_get_link_status(intel_dp)) { 1614 if (!intel_dp_get_link_status(intel_dp)) {
1580 intel_dp_link_down(intel_dp); 1615 intel_dp_link_down(intel_dp);
1581 return; 1616 return;
1582 } 1617 }
1583 1618
1584 /* Try to read receiver status if the link appears to be up */ 1619 /* Now read the DPCD to see if it's actually running */
1585 ret = intel_dp_aux_native_read(intel_dp, 1620 if (!intel_dp_get_dpcd(intel_dp)) {
1586 0x000, intel_dp->dpcd,
1587 sizeof (intel_dp->dpcd));
1588 if (ret != sizeof(intel_dp->dpcd)) {
1589 intel_dp_link_down(intel_dp); 1621 intel_dp_link_down(intel_dp);
1590 return; 1622 return;
1591 } 1623 }
1592 1624
1593 if (!intel_channel_eq_ok(intel_dp)) { 1625 if (!intel_channel_eq_ok(intel_dp)) {
1626 DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
1627 drm_get_encoder_name(&intel_dp->base.base));
1594 intel_dp_start_link_train(intel_dp); 1628 intel_dp_start_link_train(intel_dp);
1595 intel_dp_complete_link_train(intel_dp); 1629 intel_dp_complete_link_train(intel_dp);
1596 } 1630 }
1597} 1631}
1598 1632
1599static enum drm_connector_status 1633static enum drm_connector_status
1634intel_dp_detect_dpcd(struct intel_dp *intel_dp)
1635{
1636 if (intel_dp_get_dpcd(intel_dp))
1637 return connector_status_connected;
1638 return connector_status_disconnected;
1639}
1640
1641static enum drm_connector_status
1600ironlake_dp_detect(struct intel_dp *intel_dp) 1642ironlake_dp_detect(struct intel_dp *intel_dp)
1601{ 1643{
1602 enum drm_connector_status status; 1644 enum drm_connector_status status;
1603 bool ret;
1604 1645
1605 /* Can't disconnect eDP, but you can close the lid... */ 1646 /* Can't disconnect eDP, but you can close the lid... */
1606 if (is_edp(intel_dp)) { 1647 if (is_edp(intel_dp)) {
@@ -1610,15 +1651,7 @@ ironlake_dp_detect(struct intel_dp *intel_dp)
1610 return status; 1651 return status;
1611 } 1652 }
1612 1653
1613 status = connector_status_disconnected; 1654 return intel_dp_detect_dpcd(intel_dp);
1614 ret = intel_dp_aux_native_read_retry(intel_dp,
1615 0x000, intel_dp->dpcd,
1616 sizeof (intel_dp->dpcd));
1617 if (ret && intel_dp->dpcd[DP_DPCD_REV] != 0)
1618 status = connector_status_connected;
1619 DRM_DEBUG_KMS("DPCD: %hx%hx%hx%hx\n", intel_dp->dpcd[0],
1620 intel_dp->dpcd[1], intel_dp->dpcd[2], intel_dp->dpcd[3]);
1621 return status;
1622} 1655}
1623 1656
1624static enum drm_connector_status 1657static enum drm_connector_status
@@ -1626,7 +1659,6 @@ g4x_dp_detect(struct intel_dp *intel_dp)
1626{ 1659{
1627 struct drm_device *dev = intel_dp->base.base.dev; 1660 struct drm_device *dev = intel_dp->base.base.dev;
1628 struct drm_i915_private *dev_priv = dev->dev_private; 1661 struct drm_i915_private *dev_priv = dev->dev_private;
1629 enum drm_connector_status status;
1630 uint32_t temp, bit; 1662 uint32_t temp, bit;
1631 1663
1632 switch (intel_dp->output_reg) { 1664 switch (intel_dp->output_reg) {
@@ -1648,15 +1680,7 @@ g4x_dp_detect(struct intel_dp *intel_dp)
1648 if ((temp & bit) == 0) 1680 if ((temp & bit) == 0)
1649 return connector_status_disconnected; 1681 return connector_status_disconnected;
1650 1682
1651 status = connector_status_disconnected; 1683 return intel_dp_detect_dpcd(intel_dp);
1652 if (intel_dp_aux_native_read(intel_dp, 0x000, intel_dp->dpcd,
1653 sizeof (intel_dp->dpcd)) == sizeof (intel_dp->dpcd))
1654 {
1655 if (intel_dp->dpcd[DP_DPCD_REV] != 0)
1656 status = connector_status_connected;
1657 }
1658
1659 return status;
1660} 1684}
1661 1685
1662/** 1686/**
@@ -1679,6 +1703,12 @@ intel_dp_detect(struct drm_connector *connector, bool force)
1679 status = ironlake_dp_detect(intel_dp); 1703 status = ironlake_dp_detect(intel_dp);
1680 else 1704 else
1681 status = g4x_dp_detect(intel_dp); 1705 status = g4x_dp_detect(intel_dp);
1706
1707 DRM_DEBUG_KMS("DPCD: %02hx%02hx%02hx%02hx%02hx%02hx%02hx%02hx\n",
1708 intel_dp->dpcd[0], intel_dp->dpcd[1], intel_dp->dpcd[2],
1709 intel_dp->dpcd[3], intel_dp->dpcd[4], intel_dp->dpcd[5],
1710 intel_dp->dpcd[6], intel_dp->dpcd[7]);
1711
1682 if (status != connector_status_connected) 1712 if (status != connector_status_connected)
1683 return status; 1713 return status;
1684 1714
@@ -1924,6 +1954,7 @@ intel_dp_init(struct drm_device *dev, int output_reg)
1924 return; 1954 return;
1925 1955
1926 intel_dp->output_reg = output_reg; 1956 intel_dp->output_reg = output_reg;
1957 intel_dp->dpms_mode = -1;
1927 1958
1928 intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL); 1959 intel_connector = kzalloc(sizeof(struct intel_connector), GFP_KERNEL);
1929 if (!intel_connector) { 1960 if (!intel_connector) {
@@ -2000,7 +2031,7 @@ intel_dp_init(struct drm_device *dev, int output_reg)
2000 2031
2001 /* Cache some DPCD data in the eDP case */ 2032 /* Cache some DPCD data in the eDP case */
2002 if (is_edp(intel_dp)) { 2033 if (is_edp(intel_dp)) {
2003 int ret; 2034 bool ret;
2004 u32 pp_on, pp_div; 2035 u32 pp_on, pp_div;
2005 2036
2006 pp_on = I915_READ(PCH_PP_ON_DELAYS); 2037 pp_on = I915_READ(PCH_PP_ON_DELAYS);
@@ -2013,11 +2044,9 @@ intel_dp_init(struct drm_device *dev, int output_reg)
2013 dev_priv->panel_t12 *= 100; /* t12 in 100ms units */ 2044 dev_priv->panel_t12 *= 100; /* t12 in 100ms units */
2014 2045
2015 ironlake_edp_panel_vdd_on(intel_dp); 2046 ironlake_edp_panel_vdd_on(intel_dp);
2016 ret = intel_dp_aux_native_read(intel_dp, DP_DPCD_REV, 2047 ret = intel_dp_get_dpcd(intel_dp);
2017 intel_dp->dpcd,
2018 sizeof(intel_dp->dpcd));
2019 ironlake_edp_panel_vdd_off(intel_dp); 2048 ironlake_edp_panel_vdd_off(intel_dp);
2020 if (ret == sizeof(intel_dp->dpcd)) { 2049 if (ret) {
2021 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) 2050 if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
2022 dev_priv->no_aux_handshake = 2051 dev_priv->no_aux_handshake =
2023 intel_dp->dpcd[DP_MAX_DOWNSPREAD] & 2052 intel_dp->dpcd[DP_MAX_DOWNSPREAD] &
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 6e990f9760e..7b330e76a43 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -178,10 +178,28 @@ struct intel_crtc {
178#define to_intel_encoder(x) container_of(x, struct intel_encoder, base) 178#define to_intel_encoder(x) container_of(x, struct intel_encoder, base)
179#define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base) 179#define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base)
180 180
181#define DIP_HEADER_SIZE 5
182
181#define DIP_TYPE_AVI 0x82 183#define DIP_TYPE_AVI 0x82
182#define DIP_VERSION_AVI 0x2 184#define DIP_VERSION_AVI 0x2
183#define DIP_LEN_AVI 13 185#define DIP_LEN_AVI 13
184 186
187#define DIP_TYPE_SPD 0x3
188#define DIP_VERSION_SPD 0x1
189#define DIP_LEN_SPD 25
190#define DIP_SPD_UNKNOWN 0
191#define DIP_SPD_DSTB 0x1
192#define DIP_SPD_DVDP 0x2
193#define DIP_SPD_DVHS 0x3
194#define DIP_SPD_HDDVR 0x4
195#define DIP_SPD_DVC 0x5
196#define DIP_SPD_DSC 0x6
197#define DIP_SPD_VCD 0x7
198#define DIP_SPD_GAME 0x8
199#define DIP_SPD_PC 0x9
200#define DIP_SPD_BD 0xa
201#define DIP_SPD_SCD 0xb
202
185struct dip_infoframe { 203struct dip_infoframe {
186 uint8_t type; /* HB0 */ 204 uint8_t type; /* HB0 */
187 uint8_t ver; /* HB1 */ 205 uint8_t ver; /* HB1 */
@@ -206,6 +224,11 @@ struct dip_infoframe {
206 uint16_t left_bar_end; 224 uint16_t left_bar_end;
207 uint16_t right_bar_start; 225 uint16_t right_bar_start;
208 } avi; 226 } avi;
227 struct {
228 uint8_t vn[8];
229 uint8_t pd[16];
230 uint8_t sdi;
231 } spd;
209 uint8_t payload[27]; 232 uint8_t payload[27];
210 } __attribute__ ((packed)) body; 233 } __attribute__ ((packed)) body;
211} __attribute__((packed)); 234} __attribute__((packed));
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 1ed8e690391..226ba830f38 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -45,6 +45,8 @@ struct intel_hdmi {
45 bool has_hdmi_sink; 45 bool has_hdmi_sink;
46 bool has_audio; 46 bool has_audio;
47 int force_audio; 47 int force_audio;
48 void (*write_infoframe)(struct drm_encoder *encoder,
49 struct dip_infoframe *frame);
48}; 50};
49 51
50static struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder) 52static struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder)
@@ -58,37 +60,70 @@ static struct intel_hdmi *intel_attached_hdmi(struct drm_connector *connector)
58 struct intel_hdmi, base); 60 struct intel_hdmi, base);
59} 61}
60 62
61void intel_dip_infoframe_csum(struct dip_infoframe *avi_if) 63void intel_dip_infoframe_csum(struct dip_infoframe *frame)
62{ 64{
63 uint8_t *data = (uint8_t *)avi_if; 65 uint8_t *data = (uint8_t *)frame;
64 uint8_t sum = 0; 66 uint8_t sum = 0;
65 unsigned i; 67 unsigned i;
66 68
67 avi_if->checksum = 0; 69 frame->checksum = 0;
68 avi_if->ecc = 0; 70 frame->ecc = 0;
69 71
70 for (i = 0; i < sizeof(*avi_if); i++) 72 /* Header isn't part of the checksum */
73 for (i = 5; i < frame->len; i++)
71 sum += data[i]; 74 sum += data[i];
72 75
73 avi_if->checksum = 0x100 - sum; 76 frame->checksum = 0x100 - sum;
74} 77}
75 78
76static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder) 79static u32 intel_infoframe_index(struct dip_infoframe *frame)
77{ 80{
78 struct dip_infoframe avi_if = { 81 u32 flags = 0;
79 .type = DIP_TYPE_AVI, 82
80 .ver = DIP_VERSION_AVI, 83 switch (frame->type) {
81 .len = DIP_LEN_AVI, 84 case DIP_TYPE_AVI:
82 }; 85 flags |= VIDEO_DIP_SELECT_AVI;
83 uint32_t *data = (uint32_t *)&avi_if; 86 break;
87 case DIP_TYPE_SPD:
88 flags |= VIDEO_DIP_SELECT_SPD;
89 break;
90 default:
91 DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
92 break;
93 }
94
95 return flags;
96}
97
98static u32 intel_infoframe_flags(struct dip_infoframe *frame)
99{
100 u32 flags = 0;
101
102 switch (frame->type) {
103 case DIP_TYPE_AVI:
104 flags |= VIDEO_DIP_ENABLE_AVI | VIDEO_DIP_FREQ_VSYNC;
105 break;
106 case DIP_TYPE_SPD:
107 flags |= VIDEO_DIP_ENABLE_SPD | VIDEO_DIP_FREQ_2VSYNC;
108 break;
109 default:
110 DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
111 break;
112 }
113
114 return flags;
115}
116
117static void i9xx_write_infoframe(struct drm_encoder *encoder,
118 struct dip_infoframe *frame)
119{
120 uint32_t *data = (uint32_t *)frame;
84 struct drm_device *dev = encoder->dev; 121 struct drm_device *dev = encoder->dev;
85 struct drm_i915_private *dev_priv = dev->dev_private; 122 struct drm_i915_private *dev_priv = dev->dev_private;
86 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); 123 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
87 u32 port; 124 u32 port, flags, val = I915_READ(VIDEO_DIP_CTL);
88 unsigned i; 125 unsigned i, len = DIP_HEADER_SIZE + frame->len;
89 126
90 if (!intel_hdmi->has_hdmi_sink)
91 return;
92 127
93 /* XXX first guess at handling video port, is this corrent? */ 128 /* XXX first guess at handling video port, is this corrent? */
94 if (intel_hdmi->sdvox_reg == SDVOB) 129 if (intel_hdmi->sdvox_reg == SDVOB)
@@ -98,18 +133,87 @@ static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder)
98 else 133 else
99 return; 134 return;
100 135
101 I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | port | 136 flags = intel_infoframe_index(frame);
102 VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC); 137
138 val &= ~VIDEO_DIP_SELECT_MASK;
103 139
104 intel_dip_infoframe_csum(&avi_if); 140 I915_WRITE(VIDEO_DIP_CTL, val | port | flags);
105 for (i = 0; i < sizeof(avi_if); i += 4) { 141
142 for (i = 0; i < len; i += 4) {
106 I915_WRITE(VIDEO_DIP_DATA, *data); 143 I915_WRITE(VIDEO_DIP_DATA, *data);
107 data++; 144 data++;
108 } 145 }
109 146
110 I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | port | 147 flags |= intel_infoframe_flags(frame);
111 VIDEO_DIP_SELECT_AVI | VIDEO_DIP_FREQ_VSYNC | 148
112 VIDEO_DIP_ENABLE_AVI); 149 I915_WRITE(VIDEO_DIP_CTL, VIDEO_DIP_ENABLE | val | port | flags);
150}
151
152static void ironlake_write_infoframe(struct drm_encoder *encoder,
153 struct dip_infoframe *frame)
154{
155 uint32_t *data = (uint32_t *)frame;
156 struct drm_device *dev = encoder->dev;
157 struct drm_i915_private *dev_priv = dev->dev_private;
158 struct drm_crtc *crtc = encoder->crtc;
159 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
160 int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
161 unsigned i, len = DIP_HEADER_SIZE + frame->len;
162 u32 flags, val = I915_READ(reg);
163
164 intel_wait_for_vblank(dev, intel_crtc->pipe);
165
166 flags = intel_infoframe_index(frame);
167
168 val &= ~VIDEO_DIP_SELECT_MASK;
169
170 I915_WRITE(reg, val | flags);
171
172 for (i = 0; i < len; i += 4) {
173 I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data);
174 data++;
175 }
176
177 flags |= intel_infoframe_flags(frame);
178
179 I915_WRITE(reg, VIDEO_DIP_ENABLE | val | flags);
180}
181static void intel_set_infoframe(struct drm_encoder *encoder,
182 struct dip_infoframe *frame)
183{
184 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
185
186 if (!intel_hdmi->has_hdmi_sink)
187 return;
188
189 intel_dip_infoframe_csum(frame);
190 intel_hdmi->write_infoframe(encoder, frame);
191}
192
193static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder)
194{
195 struct dip_infoframe avi_if = {
196 .type = DIP_TYPE_AVI,
197 .ver = DIP_VERSION_AVI,
198 .len = DIP_LEN_AVI,
199 };
200
201 intel_set_infoframe(encoder, &avi_if);
202}
203
204static void intel_hdmi_set_spd_infoframe(struct drm_encoder *encoder)
205{
206 struct dip_infoframe spd_if;
207
208 memset(&spd_if, 0, sizeof(spd_if));
209 spd_if.type = DIP_TYPE_SPD;
210 spd_if.ver = DIP_VERSION_SPD;
211 spd_if.len = DIP_LEN_SPD;
212 strcpy(spd_if.body.spd.vn, "Intel");
213 strcpy(spd_if.body.spd.pd, "Integrated gfx");
214 spd_if.body.spd.sdi = DIP_SPD_PC;
215
216 intel_set_infoframe(encoder, &spd_if);
113} 217}
114 218
115static void intel_hdmi_mode_set(struct drm_encoder *encoder, 219static void intel_hdmi_mode_set(struct drm_encoder *encoder,
@@ -156,6 +260,7 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder,
156 POSTING_READ(intel_hdmi->sdvox_reg); 260 POSTING_READ(intel_hdmi->sdvox_reg);
157 261
158 intel_hdmi_set_avi_infoframe(encoder); 262 intel_hdmi_set_avi_infoframe(encoder);
263 intel_hdmi_set_spd_infoframe(encoder);
159} 264}
160 265
161static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode) 266static void intel_hdmi_dpms(struct drm_encoder *encoder, int mode)
@@ -433,6 +538,11 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg)
433 538
434 intel_hdmi->sdvox_reg = sdvox_reg; 539 intel_hdmi->sdvox_reg = sdvox_reg;
435 540
541 if (!HAS_PCH_SPLIT(dev))
542 intel_hdmi->write_infoframe = i9xx_write_infoframe;
543 else
544 intel_hdmi->write_infoframe = ironlake_write_infoframe;
545
436 drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs); 546 drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs);
437 547
438 intel_hdmi_add_properties(intel_hdmi, connector); 548 intel_hdmi_add_properties(intel_hdmi, connector);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index b28f7bd9f88..2e8ddfcba40 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -690,6 +690,14 @@ static const struct dmi_system_id intel_no_lvds[] = {
690 }, 690 },
691 { 691 {
692 .callback = intel_no_lvds_dmi_callback, 692 .callback = intel_no_lvds_dmi_callback,
693 .ident = "Dell OptiPlex FX170",
694 .matches = {
695 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
696 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex FX170"),
697 },
698 },
699 {
700 .callback = intel_no_lvds_dmi_callback,
693 .ident = "AOpen Mini PC", 701 .ident = "AOpen Mini PC",
694 .matches = { 702 .matches = {
695 DMI_MATCH(DMI_SYS_VENDOR, "AOpen"), 703 DMI_MATCH(DMI_SYS_VENDOR, "AOpen"),
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a06ff07a4d3..05f500cd9c2 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -83,11 +83,15 @@ intel_pch_panel_fitting(struct drm_device *dev,
83 u32 scaled_height = mode->hdisplay * adjusted_mode->vdisplay; 83 u32 scaled_height = mode->hdisplay * adjusted_mode->vdisplay;
84 if (scaled_width > scaled_height) { /* pillar */ 84 if (scaled_width > scaled_height) { /* pillar */
85 width = scaled_height / mode->vdisplay; 85 width = scaled_height / mode->vdisplay;
86 if (width & 1)
87 width++;
86 x = (adjusted_mode->hdisplay - width + 1) / 2; 88 x = (adjusted_mode->hdisplay - width + 1) / 2;
87 y = 0; 89 y = 0;
88 height = adjusted_mode->vdisplay; 90 height = adjusted_mode->vdisplay;
89 } else if (scaled_width < scaled_height) { /* letter */ 91 } else if (scaled_width < scaled_height) { /* letter */
90 height = scaled_width / mode->hdisplay; 92 height = scaled_width / mode->hdisplay;
93 if (height & 1)
94 height++;
91 y = (adjusted_mode->vdisplay - height + 1) / 2; 95 y = (adjusted_mode->vdisplay - height + 1) / 2;
92 x = 0; 96 x = 0;
93 width = adjusted_mode->hdisplay; 97 width = adjusted_mode->hdisplay;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e9615685a39..47b9b277703 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1321,6 +1321,9 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1321 ring->get_seqno = pc_render_get_seqno; 1321 ring->get_seqno = pc_render_get_seqno;
1322 } 1322 }
1323 1323
1324 if (!I915_NEED_GFX_HWS(dev))
1325 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1326
1324 ring->dev = dev; 1327 ring->dev = dev;
1325 INIT_LIST_HEAD(&ring->active_list); 1328 INIT_LIST_HEAD(&ring->active_list);
1326 INIT_LIST_HEAD(&ring->request_list); 1329 INIT_LIST_HEAD(&ring->request_list);
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 3896ef81110..9f363e0c4b6 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -5,6 +5,7 @@
5ccflags-y := -Iinclude/drm 5ccflags-y := -Iinclude/drm
6 6
7hostprogs-y := mkregtable 7hostprogs-y := mkregtable
8clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h
8 9
9quiet_cmd_mkregtable = MKREGTABLE $@ 10quiet_cmd_mkregtable = MKREGTABLE $@
10 cmd_mkregtable = $(obj)/mkregtable $< > $@ 11 cmd_mkregtable = $(obj)/mkregtable $< > $@
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index ebdb0fdb834..e88c64417a8 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -1245,6 +1245,9 @@ struct atom_context *atom_parse(struct card_info *card, void *bios)
1245 char name[512]; 1245 char name[512];
1246 int i; 1246 int i;
1247 1247
1248 if (!ctx)
1249 return NULL;
1250
1248 ctx->card = card; 1251 ctx->card = card;
1249 ctx->bios = bios; 1252 ctx->bios = bios;
1250 1253
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 189e86522b5..a134790903d 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -428,7 +428,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3
428 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); 428 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
429 429
430 i = (reg >> 7); 430 i = (reg >> 7);
431 if (i > last_reg) { 431 if (i >= last_reg) {
432 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 432 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
433 return -EINVAL; 433 return -EINVAL;
434 } 434 }
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index db8ef1905d5..cf83aa05a68 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -915,12 +915,11 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
915{ 915{
916 struct r600_cs_track *track = (struct r600_cs_track *)p->track; 916 struct r600_cs_track *track = (struct r600_cs_track *)p->track;
917 struct radeon_cs_reloc *reloc; 917 struct radeon_cs_reloc *reloc;
918 u32 last_reg = ARRAY_SIZE(r600_reg_safe_bm);
919 u32 m, i, tmp, *ib; 918 u32 m, i, tmp, *ib;
920 int r; 919 int r;
921 920
922 i = (reg >> 7); 921 i = (reg >> 7);
923 if (i > last_reg) { 922 if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
924 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 923 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
925 return -EINVAL; 924 return -EINVAL;
926 } 925 }
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index a74217cd192..e0138b674ac 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -2557,6 +2557,7 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
2557 u16 offset, misc, misc2 = 0; 2557 u16 offset, misc, misc2 = 0;
2558 u8 rev, blocks, tmp; 2558 u8 rev, blocks, tmp;
2559 int state_index = 0; 2559 int state_index = 0;
2560 struct radeon_i2c_bus_rec i2c_bus;
2560 2561
2561 rdev->pm.default_power_state_index = -1; 2562 rdev->pm.default_power_state_index = -1;
2562 2563
@@ -2575,7 +2576,6 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
2575 offset = combios_get_table_offset(dev, COMBIOS_OVERDRIVE_INFO_TABLE); 2576 offset = combios_get_table_offset(dev, COMBIOS_OVERDRIVE_INFO_TABLE);
2576 if (offset) { 2577 if (offset) {
2577 u8 thermal_controller = 0, gpio = 0, i2c_addr = 0, clk_bit = 0, data_bit = 0; 2578 u8 thermal_controller = 0, gpio = 0, i2c_addr = 0, clk_bit = 0, data_bit = 0;
2578 struct radeon_i2c_bus_rec i2c_bus;
2579 2579
2580 rev = RBIOS8(offset); 2580 rev = RBIOS8(offset);
2581 2581
@@ -2617,6 +2617,25 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
2617 i2c_new_device(&rdev->pm.i2c_bus->adapter, &info); 2617 i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
2618 } 2618 }
2619 } 2619 }
2620 } else {
2621 /* boards with a thermal chip, but no overdrive table */
2622
2623 /* Asus 9600xt has an f75375 on the monid bus */
2624 if ((dev->pdev->device == 0x4152) &&
2625 (dev->pdev->subsystem_vendor == 0x1043) &&
2626 (dev->pdev->subsystem_device == 0xc002)) {
2627 i2c_bus = combios_setup_i2c_bus(rdev, DDC_MONID, 0, 0);
2628 rdev->pm.i2c_bus = radeon_i2c_lookup(rdev, &i2c_bus);
2629 if (rdev->pm.i2c_bus) {
2630 struct i2c_board_info info = { };
2631 const char *name = "f75375";
2632 info.addr = 0x28;
2633 strlcpy(info.type, name, sizeof(info.type));
2634 i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
2635 DRM_INFO("Possible %s thermal controller at 0x%02x\n",
2636 name, info.addr);
2637 }
2638 }
2620 } 2639 }
2621 2640
2622 if (rdev->flags & RADEON_IS_MOBILITY) { 2641 if (rdev->flags & RADEON_IS_MOBILITY) {
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 9792d4ffdc8..6d6b5f16bc0 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -430,6 +430,45 @@ int radeon_connector_set_property(struct drm_connector *connector, struct drm_pr
430 return 0; 430 return 0;
431} 431}
432 432
433/*
434 * Some integrated ATI Radeon chipset implementations (e. g.
435 * Asus M2A-VM HDMI) may indicate the availability of a DDC,
436 * even when there's no monitor connected. For these connectors
437 * following DDC probe extension will be applied: check also for the
438 * availability of EDID with at least a correct EDID header. Only then,
439 * DDC is assumed to be available. This prevents drm_get_edid() and
440 * drm_edid_block_valid() from periodically dumping data and kernel
441 * errors into the logs and onto the terminal.
442 */
443static bool radeon_connector_needs_extended_probe(struct radeon_device *dev,
444 uint32_t supported_device,
445 int connector_type)
446{
447 /* Asus M2A-VM HDMI board sends data to i2c bus even,
448 * if HDMI add-on card is not plugged in or HDMI is disabled in
449 * BIOS. Valid DDC can only be assumed, if also a valid EDID header
450 * can be retrieved via i2c bus during DDC probe */
451 if ((dev->pdev->device == 0x791e) &&
452 (dev->pdev->subsystem_vendor == 0x1043) &&
453 (dev->pdev->subsystem_device == 0x826d)) {
454 if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
455 (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
456 return true;
457 }
458 /* ECS A740GM-M with ATI RADEON 2100 sends data to i2c bus
459 * for a DVI connector that is not implemented */
460 if ((dev->pdev->device == 0x796e) &&
461 (dev->pdev->subsystem_vendor == 0x1019) &&
462 (dev->pdev->subsystem_device == 0x2615)) {
463 if ((connector_type == DRM_MODE_CONNECTOR_DVID) &&
464 (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
465 return true;
466 }
467
468 /* Default: no EDID header probe required for DDC probing */
469 return false;
470}
471
433static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder, 472static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder,
434 struct drm_connector *connector) 473 struct drm_connector *connector)
435{ 474{
@@ -661,7 +700,8 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
661 ret = connector_status_disconnected; 700 ret = connector_status_disconnected;
662 701
663 if (radeon_connector->ddc_bus) 702 if (radeon_connector->ddc_bus)
664 dret = radeon_ddc_probe(radeon_connector); 703 dret = radeon_ddc_probe(radeon_connector,
704 radeon_connector->requires_extended_probe);
665 if (dret) { 705 if (dret) {
666 if (radeon_connector->edid) { 706 if (radeon_connector->edid) {
667 kfree(radeon_connector->edid); 707 kfree(radeon_connector->edid);
@@ -833,7 +873,8 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
833 bool dret = false; 873 bool dret = false;
834 874
835 if (radeon_connector->ddc_bus) 875 if (radeon_connector->ddc_bus)
836 dret = radeon_ddc_probe(radeon_connector); 876 dret = radeon_ddc_probe(radeon_connector,
877 radeon_connector->requires_extended_probe);
837 if (dret) { 878 if (dret) {
838 if (radeon_connector->edid) { 879 if (radeon_connector->edid) {
839 kfree(radeon_connector->edid); 880 kfree(radeon_connector->edid);
@@ -1251,7 +1292,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
1251 if (radeon_dp_getdpcd(radeon_connector)) 1292 if (radeon_dp_getdpcd(radeon_connector))
1252 ret = connector_status_connected; 1293 ret = connector_status_connected;
1253 } else { 1294 } else {
1254 if (radeon_ddc_probe(radeon_connector)) 1295 if (radeon_ddc_probe(radeon_connector,
1296 radeon_connector->requires_extended_probe))
1255 ret = connector_status_connected; 1297 ret = connector_status_connected;
1256 } 1298 }
1257 } 1299 }
@@ -1406,6 +1448,9 @@ radeon_add_atom_connector(struct drm_device *dev,
1406 radeon_connector->shared_ddc = shared_ddc; 1448 radeon_connector->shared_ddc = shared_ddc;
1407 radeon_connector->connector_object_id = connector_object_id; 1449 radeon_connector->connector_object_id = connector_object_id;
1408 radeon_connector->hpd = *hpd; 1450 radeon_connector->hpd = *hpd;
1451 radeon_connector->requires_extended_probe =
1452 radeon_connector_needs_extended_probe(rdev, supported_device,
1453 connector_type);
1409 radeon_connector->router = *router; 1454 radeon_connector->router = *router;
1410 if (router->ddc_valid || router->cd_valid) { 1455 if (router->ddc_valid || router->cd_valid) {
1411 radeon_connector->router_bus = radeon_i2c_lookup(rdev, &router->i2c_info); 1456 radeon_connector->router_bus = radeon_i2c_lookup(rdev, &router->i2c_info);
@@ -1752,6 +1797,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
1752 radeon_connector->devices = supported_device; 1797 radeon_connector->devices = supported_device;
1753 radeon_connector->connector_object_id = connector_object_id; 1798 radeon_connector->connector_object_id = connector_object_id;
1754 radeon_connector->hpd = *hpd; 1799 radeon_connector->hpd = *hpd;
1800 radeon_connector->requires_extended_probe =
1801 radeon_connector_needs_extended_probe(rdev, supported_device,
1802 connector_type);
1755 switch (connector_type) { 1803 switch (connector_type) {
1756 case DRM_MODE_CONNECTOR_VGA: 1804 case DRM_MODE_CONNECTOR_VGA:
1757 drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type); 1805 drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 7cfaa7e2f3b..440e6ecccc4 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -704,8 +704,9 @@ int radeon_device_init(struct radeon_device *rdev,
704 rdev->gpu_lockup = false; 704 rdev->gpu_lockup = false;
705 rdev->accel_working = false; 705 rdev->accel_working = false;
706 706
707 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X).\n", 707 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
708 radeon_family_name[rdev->family], pdev->vendor, pdev->device); 708 radeon_family_name[rdev->family], pdev->vendor, pdev->device,
709 pdev->subsystem_vendor, pdev->subsystem_device);
709 710
710 /* mutex initialization are all done here so we 711 /* mutex initialization are all done here so we
711 * can recall function without having locking issues */ 712 * can recall function without having locking issues */
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 28f4655905b..1a858944e4f 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -751,8 +751,17 @@ static int radeon_ddc_dump(struct drm_connector *connector)
751 if (!radeon_connector->ddc_bus) 751 if (!radeon_connector->ddc_bus)
752 return -1; 752 return -1;
753 edid = drm_get_edid(connector, &radeon_connector->ddc_bus->adapter); 753 edid = drm_get_edid(connector, &radeon_connector->ddc_bus->adapter);
754 /* Log EDID retrieval status here. In particular with regard to
755 * connectors with requires_extended_probe flag set, that will prevent
756 * function radeon_dvi_detect() to fetch EDID on this connector,
757 * as long as there is no valid EDID header found */
754 if (edid) { 758 if (edid) {
759 DRM_INFO("Radeon display connector %s: Found valid EDID",
760 drm_get_connector_name(connector));
755 kfree(edid); 761 kfree(edid);
762 } else {
763 DRM_INFO("Radeon display connector %s: No monitor connected or invalid EDID",
764 drm_get_connector_name(connector));
756 } 765 }
757 return ret; 766 return ret;
758} 767}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 85f033f19a8..e71d2ed7fa1 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -50,8 +50,8 @@
50 * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs 50 * 2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs
51 * 2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK, clock crystal query 51 * 2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK, clock crystal query
52 * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query 52 * 2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query
53 * 2.10.0 - fusion 2D tiling, initial compute support for the CS checker 53 * 2.10.0 - fusion 2D tiling
54 * 2.11.0 - backend map 54 * 2.11.0 - backend map, initial compute support for the CS checker
55 */ 55 */
56#define KMS_DRIVER_MAJOR 2 56#define KMS_DRIVER_MAJOR 2
57#define KMS_DRIVER_MINOR 11 57#define KMS_DRIVER_MINOR 11
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 781196db792..6c111c1fa3f 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -32,17 +32,17 @@
32 * radeon_ddc_probe 32 * radeon_ddc_probe
33 * 33 *
34 */ 34 */
35bool radeon_ddc_probe(struct radeon_connector *radeon_connector) 35bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_extended_probe)
36{ 36{
37 u8 out_buf[] = { 0x0, 0x0}; 37 u8 out = 0x0;
38 u8 buf[2]; 38 u8 buf[8];
39 int ret; 39 int ret;
40 struct i2c_msg msgs[] = { 40 struct i2c_msg msgs[] = {
41 { 41 {
42 .addr = 0x50, 42 .addr = 0x50,
43 .flags = 0, 43 .flags = 0,
44 .len = 1, 44 .len = 1,
45 .buf = out_buf, 45 .buf = &out,
46 }, 46 },
47 { 47 {
48 .addr = 0x50, 48 .addr = 0x50,
@@ -52,15 +52,31 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector)
52 } 52 }
53 }; 53 };
54 54
55 /* Read 8 bytes from i2c for extended probe of EDID header */
56 if (requires_extended_probe)
57 msgs[1].len = 8;
58
55 /* on hw with routers, select right port */ 59 /* on hw with routers, select right port */
56 if (radeon_connector->router.ddc_valid) 60 if (radeon_connector->router.ddc_valid)
57 radeon_router_select_ddc_port(radeon_connector); 61 radeon_router_select_ddc_port(radeon_connector);
58 62
59 ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2); 63 ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2);
60 if (ret == 2) 64 if (ret != 2)
61 return true; 65 /* Couldn't find an accessible DDC on this connector */
62 66 return false;
63 return false; 67 if (requires_extended_probe) {
68 /* Probe also for valid EDID header
69 * EDID header starts with:
70 * 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00.
71 * Only the first 6 bytes must be valid as
72 * drm_edid_block_valid() can fix the last 2 bytes */
73 if (drm_edid_header_is_valid(buf) < 6) {
74 /* Couldn't find an accessible EDID on this
75 * connector */
76 return false;
77 }
78 }
79 return true;
64} 80}
65 81
66/* bit banging i2c */ 82/* bit banging i2c */
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 6df4e3cec0c..d09031c03e2 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -438,6 +438,9 @@ struct radeon_connector {
438 struct radeon_i2c_chan *ddc_bus; 438 struct radeon_i2c_chan *ddc_bus;
439 /* some systems have an hdmi and vga port with a shared ddc line */ 439 /* some systems have an hdmi and vga port with a shared ddc line */
440 bool shared_ddc; 440 bool shared_ddc;
441 /* for some Radeon chip families we apply an additional EDID header
442 check as part of the DDC probe */
443 bool requires_extended_probe;
441 bool use_digital; 444 bool use_digital;
442 /* we need to mind the EDID between detect 445 /* we need to mind the EDID between detect
443 and get modes due to analog/digital/tvencoder */ 446 and get modes due to analog/digital/tvencoder */
@@ -514,7 +517,8 @@ extern void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c,
514 u8 val); 517 u8 val);
515extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector); 518extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector);
516extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector); 519extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector);
517extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector); 520extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector,
521 bool requires_extended_probe);
518extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector); 522extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
519 523
520extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector); 524extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector);
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index 3be60da5212..67cbcfa3512 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -141,6 +141,8 @@ static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
141 pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, time_16); 141 pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, time_16);
142 pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, time_8); 142 pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, time_8);
143 } 143 }
144 if (hwif->index > 0)
145 pci_dev_put(dev);
144} 146}
145 147
146static void __devinit init_iops_cy82c693(ide_hwif_t *hwif) 148static void __devinit init_iops_cy82c693(ide_hwif_t *hwif)
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
index 542603b394e..962693b10a1 100644
--- a/drivers/ide/ide_platform.c
+++ b/drivers/ide/ide_platform.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/ata_platform.h> 20#include <linux/ata_platform.h>
21#include <linux/platform_device.h> 21#include <linux/platform_device.h>
22#include <linux/interrupt.h>
22#include <linux/io.h> 23#include <linux/io.h>
23 24
24static void __devinit plat_ide_setup_ports(struct ide_hw *hw, 25static void __devinit plat_ide_setup_ports(struct ide_hw *hw,
@@ -95,7 +96,10 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
95 plat_ide_setup_ports(&hw, base, alt_base, pdata, res_irq->start); 96 plat_ide_setup_ports(&hw, base, alt_base, pdata, res_irq->start);
96 hw.dev = &pdev->dev; 97 hw.dev = &pdev->dev;
97 98
98 d.irq_flags = res_irq->flags; 99 d.irq_flags = res_irq->flags & IRQF_TRIGGER_MASK;
100 if (res_irq->flags & IORESOURCE_IRQ_SHAREABLE)
101 d.irq_flags |= IRQF_SHARED;
102
99 if (mmio) 103 if (mmio)
100 d.host_flags |= IDE_HFLAG_MMIO; 104 d.host_flags |= IDE_HFLAG_MMIO;
101 105
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index ce281d15227..67df91af842 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -483,7 +483,7 @@ static int gpio_keys_get_devtree_pdata(struct device *dev,
483 483
484 buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL); 484 buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
485 if (!buttons) 485 if (!buttons)
486 return -ENODEV; 486 return -ENOMEM;
487 487
488 pp = NULL; 488 pp = NULL;
489 i = 0; 489 i = 0;
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index ab0acaf7fe8..756348a7f93 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -754,8 +754,11 @@ fail3:
754 device_remove_file(&client->dev, &dev_attr_disable_kp); 754 device_remove_file(&client->dev, &dev_attr_disable_kp);
755fail2: 755fail2:
756 while (--pwm >= 0) 756 while (--pwm >= 0)
757 if (lm->pwm[pwm].enabled) 757 if (lm->pwm[pwm].enabled) {
758 device_remove_file(lm->pwm[pwm].cdev.dev,
759 &dev_attr_time);
758 led_classdev_unregister(&lm->pwm[pwm].cdev); 760 led_classdev_unregister(&lm->pwm[pwm].cdev);
761 }
759fail1: 762fail1:
760 input_free_device(idev); 763 input_free_device(idev);
761 kfree(lm); 764 kfree(lm);
@@ -775,8 +778,10 @@ static int __devexit lm8323_remove(struct i2c_client *client)
775 device_remove_file(&lm->client->dev, &dev_attr_disable_kp); 778 device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
776 779
777 for (i = 0; i < 3; i++) 780 for (i = 0; i < 3; i++)
778 if (lm->pwm[i].enabled) 781 if (lm->pwm[i].enabled) {
782 device_remove_file(lm->pwm[i].cdev.dev, &dev_attr_time);
779 led_classdev_unregister(&lm->pwm[i].cdev); 783 led_classdev_unregister(&lm->pwm[i].cdev);
784 }
780 785
781 kfree(lm); 786 kfree(lm);
782 787
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index da3828fc2c0..f270447ba95 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -19,6 +19,7 @@
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 */ 20 */
21 21
22#include <linux/kernel.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/input.h> 24#include <linux/input.h>
24#include <linux/platform_device.h> 25#include <linux/platform_device.h>
@@ -37,7 +38,7 @@
37#define KBC_ROW_SCAN_DLY 5 38#define KBC_ROW_SCAN_DLY 5
38 39
39/* KBC uses a 32KHz clock so a cycle = 1/32Khz */ 40/* KBC uses a 32KHz clock so a cycle = 1/32Khz */
40#define KBC_CYCLE_USEC 32 41#define KBC_CYCLE_MS 32
41 42
42/* KBC Registers */ 43/* KBC Registers */
43 44
@@ -647,7 +648,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
647 debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT); 648 debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
648 scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows; 649 scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows;
649 kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt; 650 kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt;
650 kbc->repoll_dly = ((kbc->repoll_dly * KBC_CYCLE_USEC) + 999) / 1000; 651 kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS);
651 652
652 input_dev->name = pdev->name; 653 input_dev->name = pdev->name;
653 input_dev->id.bustype = BUS_HOST; 654 input_dev->id.bustype = BUS_HOST;
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index c456f63b6ba..783597a9a64 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c
@@ -21,6 +21,7 @@
21#include <linux/i2c.h> 21#include <linux/i2c.h>
22#include <linux/input.h> 22#include <linux/input.h>
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/module.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25#include <linux/input/kxtj9.h> 26#include <linux/input/kxtj9.h>
26#include <linux/input-polldev.h> 27#include <linux/input-polldev.h>
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
index 20f8f9284f0..6c76cf79299 100644
--- a/drivers/input/misc/mma8450.c
+++ b/drivers/input/misc/mma8450.c
@@ -24,6 +24,7 @@
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/i2c.h> 25#include <linux/i2c.h>
26#include <linux/input-polldev.h> 26#include <linux/input-polldev.h>
27#include <linux/of_device.h>
27 28
28#define MMA8450_DRV_NAME "mma8450" 29#define MMA8450_DRV_NAME "mma8450"
29 30
@@ -229,10 +230,17 @@ static const struct i2c_device_id mma8450_id[] = {
229}; 230};
230MODULE_DEVICE_TABLE(i2c, mma8450_id); 231MODULE_DEVICE_TABLE(i2c, mma8450_id);
231 232
233static const struct of_device_id mma8450_dt_ids[] = {
234 { .compatible = "fsl,mma8450", },
235 { /* sentinel */ }
236};
237MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
238
232static struct i2c_driver mma8450_driver = { 239static struct i2c_driver mma8450_driver = {
233 .driver = { 240 .driver = {
234 .name = MMA8450_DRV_NAME, 241 .name = MMA8450_DRV_NAME,
235 .owner = THIS_MODULE, 242 .owner = THIS_MODULE,
243 .of_match_table = mma8450_dt_ids,
236 }, 244 },
237 .probe = mma8450_probe, 245 .probe = mma8450_probe,
238 .remove = __devexit_p(mma8450_remove), 246 .remove = __devexit_p(mma8450_remove),
diff --git a/drivers/input/mouse/hgpk.c b/drivers/input/mouse/hgpk.c
index 95577c15ae5..4d17d9f3320 100644
--- a/drivers/input/mouse/hgpk.c
+++ b/drivers/input/mouse/hgpk.c
@@ -32,6 +32,7 @@
32#define DEBUG 32#define DEBUG
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/input.h> 34#include <linux/input.h>
35#include <linux/module.h>
35#include <linux/serio.h> 36#include <linux/serio.h>
36#include <linux/libps2.h> 37#include <linux/libps2.h>
37#include <linux/delay.h> 38#include <linux/delay.h>
diff --git a/drivers/input/serio/xilinx_ps2.c b/drivers/input/serio/xilinx_ps2.c
index 80baa53da5b..d64c5a43aaa 100644
--- a/drivers/input/serio/xilinx_ps2.c
+++ b/drivers/input/serio/xilinx_ps2.c
@@ -23,7 +23,7 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/io.h> 25#include <linux/io.h>
26 26#include <linux/of_address.h>
27#include <linux/of_device.h> 27#include <linux/of_device.h>
28#include <linux/of_platform.h> 28#include <linux/of_platform.h>
29 29
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index bc3b5187f3a..131f9d1c921 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -249,12 +249,14 @@ static void __ad7879_enable(struct ad7879 *ts)
249 249
250static void __ad7879_disable(struct ad7879 *ts) 250static void __ad7879_disable(struct ad7879 *ts)
251{ 251{
252 u16 reg = (ts->cmd_crtl2 & ~AD7879_PM(-1)) |
253 AD7879_PM(AD7879_PM_SHUTDOWN);
252 disable_irq(ts->irq); 254 disable_irq(ts->irq);
253 255
254 if (del_timer_sync(&ts->timer)) 256 if (del_timer_sync(&ts->timer))
255 ad7879_ts_event_release(ts); 257 ad7879_ts_event_release(ts);
256 258
257 ad7879_write(ts, AD7879_REG_CTRL2, AD7879_PM(AD7879_PM_SHUTDOWN)); 259 ad7879_write(ts, AD7879_REG_CTRL2, reg);
258} 260}
259 261
260 262
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 8420129fc5e..f75a66e7d31 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -241,12 +241,13 @@ config DM_MIRROR
241 needed for live data migration tools such as 'pvmove'. 241 needed for live data migration tools such as 'pvmove'.
242 242
243config DM_RAID 243config DM_RAID
244 tristate "RAID 4/5/6 target (EXPERIMENTAL)" 244 tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
245 depends on BLK_DEV_DM && EXPERIMENTAL 245 depends on BLK_DEV_DM && EXPERIMENTAL
246 select MD_RAID1
246 select MD_RAID456 247 select MD_RAID456
247 select BLK_DEV_MD 248 select BLK_DEV_MD
248 ---help--- 249 ---help---
249 A dm target that supports RAID4, RAID5 and RAID6 mappings 250 A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
250 251
251 A RAID-5 set of N drives with a capacity of C MB per drive provides 252 A RAID-5 set of N drives with a capacity of C MB per drive provides
252 the capacity of C * (N - 1) MB, and protects against a failure 253 the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bae6c4e23d3..49da55c1528 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -30,7 +30,6 @@
30#include <linux/device-mapper.h> 30#include <linux/device-mapper.h>
31 31
32#define DM_MSG_PREFIX "crypt" 32#define DM_MSG_PREFIX "crypt"
33#define MESG_STR(x) x, sizeof(x)
34 33
35/* 34/*
36 * context holding the current state of a multi-part conversion 35 * context holding the current state of a multi-part conversion
@@ -239,7 +238,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
239 struct dm_crypt_request *dmreq) 238 struct dm_crypt_request *dmreq)
240{ 239{
241 memset(iv, 0, cc->iv_size); 240 memset(iv, 0, cc->iv_size);
242 *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff); 241 *(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
243 242
244 return 0; 243 return 0;
245} 244}
@@ -248,7 +247,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
248 struct dm_crypt_request *dmreq) 247 struct dm_crypt_request *dmreq)
249{ 248{
250 memset(iv, 0, cc->iv_size); 249 memset(iv, 0, cc->iv_size);
251 *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); 250 *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
252 251
253 return 0; 252 return 0;
254} 253}
@@ -415,7 +414,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
415 struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; 414 struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
416 415
417 memset(iv, 0, cc->iv_size); 416 memset(iv, 0, cc->iv_size);
418 *(u64 *)iv = cpu_to_le64(dmreq->iv_sector); 417 *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
419 crypto_cipher_encrypt_one(essiv_tfm, iv, iv); 418 crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
420 419
421 return 0; 420 return 0;
@@ -1575,11 +1574,17 @@ bad_mem:
1575static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) 1574static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1576{ 1575{
1577 struct crypt_config *cc; 1576 struct crypt_config *cc;
1578 unsigned int key_size; 1577 unsigned int key_size, opt_params;
1579 unsigned long long tmpll; 1578 unsigned long long tmpll;
1580 int ret; 1579 int ret;
1580 struct dm_arg_set as;
1581 const char *opt_string;
1582
1583 static struct dm_arg _args[] = {
1584 {0, 1, "Invalid number of feature args"},
1585 };
1581 1586
1582 if (argc != 5) { 1587 if (argc < 5) {
1583 ti->error = "Not enough arguments"; 1588 ti->error = "Not enough arguments";
1584 return -EINVAL; 1589 return -EINVAL;
1585 } 1590 }
@@ -1648,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1648 } 1653 }
1649 cc->start = tmpll; 1654 cc->start = tmpll;
1650 1655
1656 argv += 5;
1657 argc -= 5;
1658
1659 /* Optional parameters */
1660 if (argc) {
1661 as.argc = argc;
1662 as.argv = argv;
1663
1664 ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
1665 if (ret)
1666 goto bad;
1667
1668 opt_string = dm_shift_arg(&as);
1669
1670 if (opt_params == 1 && opt_string &&
1671 !strcasecmp(opt_string, "allow_discards"))
1672 ti->num_discard_requests = 1;
1673 else if (opt_params) {
1674 ret = -EINVAL;
1675 ti->error = "Invalid feature arguments";
1676 goto bad;
1677 }
1678 }
1679
1651 ret = -ENOMEM; 1680 ret = -ENOMEM;
1652 cc->io_queue = alloc_workqueue("kcryptd_io", 1681 cc->io_queue = alloc_workqueue("kcryptd_io",
1653 WQ_NON_REENTRANT| 1682 WQ_NON_REENTRANT|
@@ -1682,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
1682 struct dm_crypt_io *io; 1711 struct dm_crypt_io *io;
1683 struct crypt_config *cc; 1712 struct crypt_config *cc;
1684 1713
1685 if (bio->bi_rw & REQ_FLUSH) { 1714 /*
1715 * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
1716 * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
1717 * - for REQ_DISCARD caller must use flush if IO ordering matters
1718 */
1719 if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
1686 cc = ti->private; 1720 cc = ti->private;
1687 bio->bi_bdev = cc->dev->bdev; 1721 bio->bi_bdev = cc->dev->bdev;
1722 if (bio_sectors(bio))
1723 bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
1688 return DM_MAPIO_REMAPPED; 1724 return DM_MAPIO_REMAPPED;
1689 } 1725 }
1690 1726
@@ -1727,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
1727 1763
1728 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, 1764 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
1729 cc->dev->name, (unsigned long long)cc->start); 1765 cc->dev->name, (unsigned long long)cc->start);
1766
1767 if (ti->num_discard_requests)
1768 DMEMIT(" 1 allow_discards");
1769
1730 break; 1770 break;
1731 } 1771 }
1732 return 0; 1772 return 0;
@@ -1770,12 +1810,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
1770 if (argc < 2) 1810 if (argc < 2)
1771 goto error; 1811 goto error;
1772 1812
1773 if (!strnicmp(argv[0], MESG_STR("key"))) { 1813 if (!strcasecmp(argv[0], "key")) {
1774 if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) { 1814 if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
1775 DMWARN("not suspended during key manipulation."); 1815 DMWARN("not suspended during key manipulation.");
1776 return -EINVAL; 1816 return -EINVAL;
1777 } 1817 }
1778 if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) { 1818 if (argc == 3 && !strcasecmp(argv[1], "set")) {
1779 ret = crypt_set_key(cc, argv[2]); 1819 ret = crypt_set_key(cc, argv[2]);
1780 if (ret) 1820 if (ret)
1781 return ret; 1821 return ret;
@@ -1783,7 +1823,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
1783 ret = cc->iv_gen_ops->init(cc); 1823 ret = cc->iv_gen_ops->init(cc);
1784 return ret; 1824 return ret;
1785 } 1825 }
1786 if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) { 1826 if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
1787 if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) { 1827 if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
1788 ret = cc->iv_gen_ops->wipe(cc); 1828 ret = cc->iv_gen_ops->wipe(cc);
1789 if (ret) 1829 if (ret)
@@ -1823,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
1823 1863
1824static struct target_type crypt_target = { 1864static struct target_type crypt_target = {
1825 .name = "crypt", 1865 .name = "crypt",
1826 .version = {1, 10, 0}, 1866 .version = {1, 11, 0},
1827 .module = THIS_MODULE, 1867 .module = THIS_MODULE,
1828 .ctr = crypt_ctr, 1868 .ctr = crypt_ctr,
1829 .dtr = crypt_dtr, 1869 .dtr = crypt_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ea790623c30..89f73ca22cf 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2003 Sistina Software (UK) Limited. 2 * Copyright (C) 2003 Sistina Software (UK) Limited.
3 * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
@@ -15,6 +15,9 @@
15 15
16#define DM_MSG_PREFIX "flakey" 16#define DM_MSG_PREFIX "flakey"
17 17
18#define all_corrupt_bio_flags_match(bio, fc) \
19 (((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags)
20
18/* 21/*
19 * Flakey: Used for testing only, simulates intermittent, 22 * Flakey: Used for testing only, simulates intermittent,
20 * catastrophic device failure. 23 * catastrophic device failure.
@@ -25,60 +28,189 @@ struct flakey_c {
25 sector_t start; 28 sector_t start;
26 unsigned up_interval; 29 unsigned up_interval;
27 unsigned down_interval; 30 unsigned down_interval;
31 unsigned long flags;
32 unsigned corrupt_bio_byte;
33 unsigned corrupt_bio_rw;
34 unsigned corrupt_bio_value;
35 unsigned corrupt_bio_flags;
36};
37
38enum feature_flag_bits {
39 DROP_WRITES
28}; 40};
29 41
42static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
43 struct dm_target *ti)
44{
45 int r;
46 unsigned argc;
47 const char *arg_name;
48
49 static struct dm_arg _args[] = {
50 {0, 6, "Invalid number of feature args"},
51 {1, UINT_MAX, "Invalid corrupt bio byte"},
52 {0, 255, "Invalid corrupt value to write into bio byte (0-255)"},
53 {0, UINT_MAX, "Invalid corrupt bio flags mask"},
54 };
55
56 /* No feature arguments supplied. */
57 if (!as->argc)
58 return 0;
59
60 r = dm_read_arg_group(_args, as, &argc, &ti->error);
61 if (r)
62 return r;
63
64 while (argc) {
65 arg_name = dm_shift_arg(as);
66 argc--;
67
68 /*
69 * drop_writes
70 */
71 if (!strcasecmp(arg_name, "drop_writes")) {
72 if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
73 ti->error = "Feature drop_writes duplicated";
74 return -EINVAL;
75 }
76
77 continue;
78 }
79
80 /*
81 * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
82 */
83 if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
84 if (!argc)
85 ti->error = "Feature corrupt_bio_byte requires parameters";
86
87 r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
88 if (r)
89 return r;
90 argc--;
91
92 /*
93 * Direction r or w?
94 */
95 arg_name = dm_shift_arg(as);
96 if (!strcasecmp(arg_name, "w"))
97 fc->corrupt_bio_rw = WRITE;
98 else if (!strcasecmp(arg_name, "r"))
99 fc->corrupt_bio_rw = READ;
100 else {
101 ti->error = "Invalid corrupt bio direction (r or w)";
102 return -EINVAL;
103 }
104 argc--;
105
106 /*
107 * Value of byte (0-255) to write in place of correct one.
108 */
109 r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error);
110 if (r)
111 return r;
112 argc--;
113
114 /*
115 * Only corrupt bios with these flags set.
116 */
117 r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
118 if (r)
119 return r;
120 argc--;
121
122 continue;
123 }
124
125 ti->error = "Unrecognised flakey feature requested";
126 return -EINVAL;
127 }
128
129 if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
130 ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
131 return -EINVAL;
132 }
133
134 return 0;
135}
136
30/* 137/*
31 * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval> 138 * Construct a flakey mapping:
139 * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*]
140 *
141 * Feature args:
142 * [drop_writes]
143 * [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>]
144 *
145 * Nth_byte starts from 1 for the first byte.
146 * Direction is r for READ or w for WRITE.
147 * bio_flags is ignored if 0.
32 */ 148 */
33static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) 149static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
34{ 150{
151 static struct dm_arg _args[] = {
152 {0, UINT_MAX, "Invalid up interval"},
153 {0, UINT_MAX, "Invalid down interval"},
154 };
155
156 int r;
35 struct flakey_c *fc; 157 struct flakey_c *fc;
36 unsigned long long tmp; 158 unsigned long long tmpll;
159 struct dm_arg_set as;
160 const char *devname;
37 161
38 if (argc != 4) { 162 as.argc = argc;
39 ti->error = "dm-flakey: Invalid argument count"; 163 as.argv = argv;
164
165 if (argc < 4) {
166 ti->error = "Invalid argument count";
40 return -EINVAL; 167 return -EINVAL;
41 } 168 }
42 169
43 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 170 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
44 if (!fc) { 171 if (!fc) {
45 ti->error = "dm-flakey: Cannot allocate linear context"; 172 ti->error = "Cannot allocate linear context";
46 return -ENOMEM; 173 return -ENOMEM;
47 } 174 }
48 fc->start_time = jiffies; 175 fc->start_time = jiffies;
49 176
50 if (sscanf(argv[1], "%llu", &tmp) != 1) { 177 devname = dm_shift_arg(&as);
51 ti->error = "dm-flakey: Invalid device sector"; 178
179 if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) {
180 ti->error = "Invalid device sector";
52 goto bad; 181 goto bad;
53 } 182 }
54 fc->start = tmp; 183 fc->start = tmpll;
55 184
56 if (sscanf(argv[2], "%u", &fc->up_interval) != 1) { 185 r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error);
57 ti->error = "dm-flakey: Invalid up interval"; 186 if (r)
58 goto bad; 187 goto bad;
59 }
60 188
61 if (sscanf(argv[3], "%u", &fc->down_interval) != 1) { 189 r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
62 ti->error = "dm-flakey: Invalid down interval"; 190 if (r)
63 goto bad; 191 goto bad;
64 }
65 192
66 if (!(fc->up_interval + fc->down_interval)) { 193 if (!(fc->up_interval + fc->down_interval)) {
67 ti->error = "dm-flakey: Total (up + down) interval is zero"; 194 ti->error = "Total (up + down) interval is zero";
68 goto bad; 195 goto bad;
69 } 196 }
70 197
71 if (fc->up_interval + fc->down_interval < fc->up_interval) { 198 if (fc->up_interval + fc->down_interval < fc->up_interval) {
72 ti->error = "dm-flakey: Interval overflow"; 199 ti->error = "Interval overflow";
73 goto bad; 200 goto bad;
74 } 201 }
75 202
76 if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) { 203 r = parse_features(&as, fc, ti);
77 ti->error = "dm-flakey: Device lookup failed"; 204 if (r)
205 goto bad;
206
207 if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) {
208 ti->error = "Device lookup failed";
78 goto bad; 209 goto bad;
79 } 210 }
80 211
81 ti->num_flush_requests = 1; 212 ti->num_flush_requests = 1;
213 ti->num_discard_requests = 1;
82 ti->private = fc; 214 ti->private = fc;
83 return 0; 215 return 0;
84 216
@@ -99,7 +231,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
99{ 231{
100 struct flakey_c *fc = ti->private; 232 struct flakey_c *fc = ti->private;
101 233
102 return fc->start + (bi_sector - ti->begin); 234 return fc->start + dm_target_offset(ti, bi_sector);
103} 235}
104 236
105static void flakey_map_bio(struct dm_target *ti, struct bio *bio) 237static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
@@ -111,6 +243,25 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
111 bio->bi_sector = flakey_map_sector(ti, bio->bi_sector); 243 bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
112} 244}
113 245
246static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
247{
248 unsigned bio_bytes = bio_cur_bytes(bio);
249 char *data = bio_data(bio);
250
251 /*
252 * Overwrite the Nth byte of the data returned.
253 */
254 if (data && bio_bytes >= fc->corrupt_bio_byte) {
255 data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
256
257 DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
258 "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
259 bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
260 (bio_data_dir(bio) == WRITE) ? 'w' : 'r',
261 bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
262 }
263}
264
114static int flakey_map(struct dm_target *ti, struct bio *bio, 265static int flakey_map(struct dm_target *ti, struct bio *bio,
115 union map_info *map_context) 266 union map_info *map_context)
116{ 267{
@@ -119,18 +270,71 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
119 270
120 /* Are we alive ? */ 271 /* Are we alive ? */
121 elapsed = (jiffies - fc->start_time) / HZ; 272 elapsed = (jiffies - fc->start_time) / HZ;
122 if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) 273 if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
274 /*
275 * Flag this bio as submitted while down.
276 */
277 map_context->ll = 1;
278
279 /*
280 * Map reads as normal.
281 */
282 if (bio_data_dir(bio) == READ)
283 goto map_bio;
284
285 /*
286 * Drop writes?
287 */
288 if (test_bit(DROP_WRITES, &fc->flags)) {
289 bio_endio(bio, 0);
290 return DM_MAPIO_SUBMITTED;
291 }
292
293 /*
294 * Corrupt matching writes.
295 */
296 if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
297 if (all_corrupt_bio_flags_match(bio, fc))
298 corrupt_bio_data(bio, fc);
299 goto map_bio;
300 }
301
302 /*
303 * By default, error all I/O.
304 */
123 return -EIO; 305 return -EIO;
306 }
124 307
308map_bio:
125 flakey_map_bio(ti, bio); 309 flakey_map_bio(ti, bio);
126 310
127 return DM_MAPIO_REMAPPED; 311 return DM_MAPIO_REMAPPED;
128} 312}
129 313
314static int flakey_end_io(struct dm_target *ti, struct bio *bio,
315 int error, union map_info *map_context)
316{
317 struct flakey_c *fc = ti->private;
318 unsigned bio_submitted_while_down = map_context->ll;
319
320 /*
321 * Corrupt successful READs while in down state.
322 * If flags were specified, only corrupt those that match.
323 */
324 if (!error && bio_submitted_while_down &&
325 (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
326 all_corrupt_bio_flags_match(bio, fc))
327 corrupt_bio_data(bio, fc);
328
329 return error;
330}
331
130static int flakey_status(struct dm_target *ti, status_type_t type, 332static int flakey_status(struct dm_target *ti, status_type_t type,
131 char *result, unsigned int maxlen) 333 char *result, unsigned int maxlen)
132{ 334{
335 unsigned sz = 0;
133 struct flakey_c *fc = ti->private; 336 struct flakey_c *fc = ti->private;
337 unsigned drop_writes;
134 338
135 switch (type) { 339 switch (type) {
136 case STATUSTYPE_INFO: 340 case STATUSTYPE_INFO:
@@ -138,9 +342,22 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
138 break; 342 break;
139 343
140 case STATUSTYPE_TABLE: 344 case STATUSTYPE_TABLE:
141 snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name, 345 DMEMIT("%s %llu %u %u ", fc->dev->name,
142 (unsigned long long)fc->start, fc->up_interval, 346 (unsigned long long)fc->start, fc->up_interval,
143 fc->down_interval); 347 fc->down_interval);
348
349 drop_writes = test_bit(DROP_WRITES, &fc->flags);
350 DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
351
352 if (drop_writes)
353 DMEMIT("drop_writes ");
354
355 if (fc->corrupt_bio_byte)
356 DMEMIT("corrupt_bio_byte %u %c %u %u ",
357 fc->corrupt_bio_byte,
358 (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r',
359 fc->corrupt_bio_value, fc->corrupt_bio_flags);
360
144 break; 361 break;
145 } 362 }
146 return 0; 363 return 0;
@@ -177,11 +394,12 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
177 394
178static struct target_type flakey_target = { 395static struct target_type flakey_target = {
179 .name = "flakey", 396 .name = "flakey",
180 .version = {1, 1, 0}, 397 .version = {1, 2, 0},
181 .module = THIS_MODULE, 398 .module = THIS_MODULE,
182 .ctr = flakey_ctr, 399 .ctr = flakey_ctr,
183 .dtr = flakey_dtr, 400 .dtr = flakey_dtr,
184 .map = flakey_map, 401 .map = flakey_map,
402 .end_io = flakey_end_io,
185 .status = flakey_status, 403 .status = flakey_status,
186 .ioctl = flakey_ioctl, 404 .ioctl = flakey_ioctl,
187 .merge = flakey_merge, 405 .merge = flakey_merge,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2067288f61f..ad2eba40e31 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -38,6 +38,8 @@ struct io {
38 struct dm_io_client *client; 38 struct dm_io_client *client;
39 io_notify_fn callback; 39 io_notify_fn callback;
40 void *context; 40 void *context;
41 void *vma_invalidate_address;
42 unsigned long vma_invalidate_size;
41} __attribute__((aligned(DM_IO_MAX_REGIONS))); 43} __attribute__((aligned(DM_IO_MAX_REGIONS)));
42 44
43static struct kmem_cache *_dm_io_cache; 45static struct kmem_cache *_dm_io_cache;
@@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error)
116 set_bit(region, &io->error_bits); 118 set_bit(region, &io->error_bits);
117 119
118 if (atomic_dec_and_test(&io->count)) { 120 if (atomic_dec_and_test(&io->count)) {
121 if (io->vma_invalidate_size)
122 invalidate_kernel_vmap_range(io->vma_invalidate_address,
123 io->vma_invalidate_size);
124
119 if (io->sleeper) 125 if (io->sleeper)
120 wake_up_process(io->sleeper); 126 wake_up_process(io->sleeper);
121 127
@@ -159,6 +165,9 @@ struct dpages {
159 165
160 unsigned context_u; 166 unsigned context_u;
161 void *context_ptr; 167 void *context_ptr;
168
169 void *vma_invalidate_address;
170 unsigned long vma_invalidate_size;
162}; 171};
163 172
164/* 173/*
@@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
377 io->sleeper = current; 386 io->sleeper = current;
378 io->client = client; 387 io->client = client;
379 388
389 io->vma_invalidate_address = dp->vma_invalidate_address;
390 io->vma_invalidate_size = dp->vma_invalidate_size;
391
380 dispatch_io(rw, num_regions, where, dp, io, 1); 392 dispatch_io(rw, num_regions, where, dp, io, 1);
381 393
382 while (1) { 394 while (1) {
@@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
415 io->callback = fn; 427 io->callback = fn;
416 io->context = context; 428 io->context = context;
417 429
430 io->vma_invalidate_address = dp->vma_invalidate_address;
431 io->vma_invalidate_size = dp->vma_invalidate_size;
432
418 dispatch_io(rw, num_regions, where, dp, io, 0); 433 dispatch_io(rw, num_regions, where, dp, io, 0);
419 return 0; 434 return 0;
420} 435}
421 436
422static int dp_init(struct dm_io_request *io_req, struct dpages *dp) 437static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
438 unsigned long size)
423{ 439{
424 /* Set up dpages based on memory type */ 440 /* Set up dpages based on memory type */
441
442 dp->vma_invalidate_address = NULL;
443 dp->vma_invalidate_size = 0;
444
425 switch (io_req->mem.type) { 445 switch (io_req->mem.type) {
426 case DM_IO_PAGE_LIST: 446 case DM_IO_PAGE_LIST:
427 list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 447 list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
@@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
432 break; 452 break;
433 453
434 case DM_IO_VMA: 454 case DM_IO_VMA:
455 flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
456 if ((io_req->bi_rw & RW_MASK) == READ) {
457 dp->vma_invalidate_address = io_req->mem.ptr.vma;
458 dp->vma_invalidate_size = size;
459 }
435 vm_dp_init(dp, io_req->mem.ptr.vma); 460 vm_dp_init(dp, io_req->mem.ptr.vma);
436 break; 461 break;
437 462
@@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
460 int r; 485 int r;
461 struct dpages dp; 486 struct dpages dp;
462 487
463 r = dp_init(io_req, &dp); 488 r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
464 if (r) 489 if (r)
465 return r; 490 return r;
466 491
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 4cacdad2270..2e9a3ca37bd 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str)
128 return NULL; 128 return NULL;
129} 129}
130 130
131static struct hash_cell *__get_dev_cell(uint64_t dev)
132{
133 struct mapped_device *md;
134 struct hash_cell *hc;
135
136 md = dm_get_md(huge_decode_dev(dev));
137 if (!md)
138 return NULL;
139
140 hc = dm_get_mdptr(md);
141 if (!hc) {
142 dm_put(md);
143 return NULL;
144 }
145
146 return hc;
147}
148
131/*----------------------------------------------------------------- 149/*-----------------------------------------------------------------
132 * Inserting, removing and renaming a device. 150 * Inserting, removing and renaming a device.
133 *---------------------------------------------------------------*/ 151 *---------------------------------------------------------------*/
@@ -718,25 +736,45 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
718 */ 736 */
719static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) 737static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
720{ 738{
721 struct mapped_device *md; 739 struct hash_cell *hc = NULL;
722 void *mdptr = NULL;
723 740
724 if (*param->uuid) 741 if (*param->uuid) {
725 return __get_uuid_cell(param->uuid); 742 if (*param->name || param->dev)
743 return NULL;
726 744
727 if (*param->name) 745 hc = __get_uuid_cell(param->uuid);
728 return __get_name_cell(param->name); 746 if (!hc)
747 return NULL;
748 } else if (*param->name) {
749 if (param->dev)
750 return NULL;
729 751
730 md = dm_get_md(huge_decode_dev(param->dev)); 752 hc = __get_name_cell(param->name);
731 if (!md) 753 if (!hc)
732 goto out; 754 return NULL;
755 } else if (param->dev) {
756 hc = __get_dev_cell(param->dev);
757 if (!hc)
758 return NULL;
759 } else
760 return NULL;
733 761
734 mdptr = dm_get_mdptr(md); 762 /*
735 if (!mdptr) 763 * Sneakily write in both the name and the uuid
736 dm_put(md); 764 * while we have the cell.
765 */
766 strlcpy(param->name, hc->name, sizeof(param->name));
767 if (hc->uuid)
768 strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
769 else
770 param->uuid[0] = '\0';
737 771
738out: 772 if (hc->new_map)
739 return mdptr; 773 param->flags |= DM_INACTIVE_PRESENT_FLAG;
774 else
775 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
776
777 return hc;
740} 778}
741 779
742static struct mapped_device *find_device(struct dm_ioctl *param) 780static struct mapped_device *find_device(struct dm_ioctl *param)
@@ -746,24 +784,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
746 784
747 down_read(&_hash_lock); 785 down_read(&_hash_lock);
748 hc = __find_device_hash_cell(param); 786 hc = __find_device_hash_cell(param);
749 if (hc) { 787 if (hc)
750 md = hc->md; 788 md = hc->md;
751
752 /*
753 * Sneakily write in both the name and the uuid
754 * while we have the cell.
755 */
756 strlcpy(param->name, hc->name, sizeof(param->name));
757 if (hc->uuid)
758 strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
759 else
760 param->uuid[0] = '\0';
761
762 if (hc->new_map)
763 param->flags |= DM_INACTIVE_PRESENT_FLAG;
764 else
765 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
766 }
767 up_read(&_hash_lock); 789 up_read(&_hash_lock);
768 790
769 return md; 791 return md;
@@ -1402,6 +1424,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
1402 goto out; 1424 goto out;
1403 } 1425 }
1404 1426
1427 if (!argc) {
1428 DMWARN("Empty message received.");
1429 goto out;
1430 }
1431
1405 table = dm_get_live_table(md); 1432 table = dm_get_live_table(md);
1406 if (!table) 1433 if (!table)
1407 goto out_argv; 1434 goto out_argv;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 320401dec10..f8214702963 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -224,8 +224,6 @@ struct kcopyd_job {
224 unsigned int num_dests; 224 unsigned int num_dests;
225 struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; 225 struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
226 226
227 sector_t offset;
228 unsigned int nr_pages;
229 struct page_list *pages; 227 struct page_list *pages;
230 228
231 /* 229 /*
@@ -380,7 +378,7 @@ static int run_io_job(struct kcopyd_job *job)
380 .bi_rw = job->rw, 378 .bi_rw = job->rw,
381 .mem.type = DM_IO_PAGE_LIST, 379 .mem.type = DM_IO_PAGE_LIST,
382 .mem.ptr.pl = job->pages, 380 .mem.ptr.pl = job->pages,
383 .mem.offset = job->offset, 381 .mem.offset = 0,
384 .notify.fn = complete_io, 382 .notify.fn = complete_io,
385 .notify.context = job, 383 .notify.context = job,
386 .client = job->kc->io_client, 384 .client = job->kc->io_client,
@@ -397,10 +395,9 @@ static int run_io_job(struct kcopyd_job *job)
397static int run_pages_job(struct kcopyd_job *job) 395static int run_pages_job(struct kcopyd_job *job)
398{ 396{
399 int r; 397 int r;
398 unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
400 399
401 job->nr_pages = dm_div_up(job->dests[0].count + job->offset, 400 r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
402 PAGE_SIZE >> 9);
403 r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
404 if (!r) { 401 if (!r) {
405 /* this job is ready for io */ 402 /* this job is ready for io */
406 push(&job->kc->io_jobs, job); 403 push(&job->kc->io_jobs, job);
@@ -602,8 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
602 job->num_dests = num_dests; 599 job->num_dests = num_dests;
603 memcpy(&job->dests, dests, sizeof(*dests) * num_dests); 600 memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
604 601
605 job->offset = 0;
606 job->nr_pages = 0;
607 job->pages = NULL; 602 job->pages = NULL;
608 603
609 job->fn = fn; 604 job->fn = fn;
@@ -622,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
622} 617}
623EXPORT_SYMBOL(dm_kcopyd_copy); 618EXPORT_SYMBOL(dm_kcopyd_copy);
624 619
620void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
621 dm_kcopyd_notify_fn fn, void *context)
622{
623 struct kcopyd_job *job;
624
625 job = mempool_alloc(kc->job_pool, GFP_NOIO);
626
627 memset(job, 0, sizeof(struct kcopyd_job));
628 job->kc = kc;
629 job->fn = fn;
630 job->context = context;
631
632 atomic_inc(&kc->nr_jobs);
633
634 return job;
635}
636EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
637
638void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
639{
640 struct kcopyd_job *job = j;
641 struct dm_kcopyd_client *kc = job->kc;
642
643 job->read_err = read_err;
644 job->write_err = write_err;
645
646 push(&kc->complete_jobs, job);
647 wake(kc);
648}
649EXPORT_SYMBOL(dm_kcopyd_do_callback);
650
625/* 651/*
626 * Cancels a kcopyd job, eg. someone might be deactivating a 652 * Cancels a kcopyd job, eg. someone might be deactivating a
627 * mirror. 653 * mirror.
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index aa2e0c374ab..1021c898601 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
394 group[count] = fe->region; 394 group[count] = fe->region;
395 count++; 395 count++;
396 396
397 list_del(&fe->list); 397 list_move(&fe->list, &tmp_list);
398 list_add(&fe->list, &tmp_list);
399 398
400 type = fe->type; 399 type = fe->type;
401 if (count >= MAX_FLUSH_GROUP_COUNT) 400 if (count >= MAX_FLUSH_GROUP_COUNT)
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 948e3f4925b..3b52bb72bd1 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy);
197#define MIRROR_DISK_VERSION 2 197#define MIRROR_DISK_VERSION 2
198#define LOG_OFFSET 2 198#define LOG_OFFSET 2
199 199
200struct log_header { 200struct log_header_disk {
201 uint32_t magic; 201 __le32 magic;
202 202
203 /* 203 /*
204 * Simple, incrementing version. no backward 204 * Simple, incrementing version. no backward
205 * compatibility. 205 * compatibility.
206 */ 206 */
207 __le32 version;
208 __le64 nr_regions;
209} __packed;
210
211struct log_header_core {
212 uint32_t magic;
207 uint32_t version; 213 uint32_t version;
208 sector_t nr_regions; 214 uint64_t nr_regions;
209}; 215};
210 216
211struct log_c { 217struct log_c {
@@ -239,10 +245,10 @@ struct log_c {
239 int log_dev_failed; 245 int log_dev_failed;
240 int log_dev_flush_failed; 246 int log_dev_flush_failed;
241 struct dm_dev *log_dev; 247 struct dm_dev *log_dev;
242 struct log_header header; 248 struct log_header_core header;
243 249
244 struct dm_io_region header_location; 250 struct dm_io_region header_location;
245 struct log_header *disk_header; 251 struct log_header_disk *disk_header;
246}; 252};
247 253
248/* 254/*
@@ -251,34 +257,34 @@ struct log_c {
251 */ 257 */
252static inline int log_test_bit(uint32_t *bs, unsigned bit) 258static inline int log_test_bit(uint32_t *bs, unsigned bit)
253{ 259{
254 return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0; 260 return test_bit_le(bit, bs) ? 1 : 0;
255} 261}
256 262
257static inline void log_set_bit(struct log_c *l, 263static inline void log_set_bit(struct log_c *l,
258 uint32_t *bs, unsigned bit) 264 uint32_t *bs, unsigned bit)
259{ 265{
260 __test_and_set_bit_le(bit, (unsigned long *) bs); 266 __set_bit_le(bit, bs);
261 l->touched_cleaned = 1; 267 l->touched_cleaned = 1;
262} 268}
263 269
264static inline void log_clear_bit(struct log_c *l, 270static inline void log_clear_bit(struct log_c *l,
265 uint32_t *bs, unsigned bit) 271 uint32_t *bs, unsigned bit)
266{ 272{
267 __test_and_clear_bit_le(bit, (unsigned long *) bs); 273 __clear_bit_le(bit, bs);
268 l->touched_dirtied = 1; 274 l->touched_dirtied = 1;
269} 275}
270 276
271/*---------------------------------------------------------------- 277/*----------------------------------------------------------------
272 * Header IO 278 * Header IO
273 *--------------------------------------------------------------*/ 279 *--------------------------------------------------------------*/
274static void header_to_disk(struct log_header *core, struct log_header *disk) 280static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk)
275{ 281{
276 disk->magic = cpu_to_le32(core->magic); 282 disk->magic = cpu_to_le32(core->magic);
277 disk->version = cpu_to_le32(core->version); 283 disk->version = cpu_to_le32(core->version);
278 disk->nr_regions = cpu_to_le64(core->nr_regions); 284 disk->nr_regions = cpu_to_le64(core->nr_regions);
279} 285}
280 286
281static void header_from_disk(struct log_header *core, struct log_header *disk) 287static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk)
282{ 288{
283 core->magic = le32_to_cpu(disk->magic); 289 core->magic = le32_to_cpu(disk->magic);
284 core->version = le32_to_cpu(disk->version); 290 core->version = le32_to_cpu(disk->version);
@@ -486,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
486 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); 492 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
487 lc->sync_count = (sync == NOSYNC) ? region_count : 0; 493 lc->sync_count = (sync == NOSYNC) ? region_count : 0;
488 494
489 lc->recovering_bits = vmalloc(bitset_size); 495 lc->recovering_bits = vzalloc(bitset_size);
490 if (!lc->recovering_bits) { 496 if (!lc->recovering_bits) {
491 DMWARN("couldn't allocate sync bitset"); 497 DMWARN("couldn't allocate sync bitset");
492 vfree(lc->sync_bits); 498 vfree(lc->sync_bits);
@@ -498,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
498 kfree(lc); 504 kfree(lc);
499 return -ENOMEM; 505 return -ENOMEM;
500 } 506 }
501 memset(lc->recovering_bits, 0, bitset_size);
502 lc->sync_search = 0; 507 lc->sync_search = 0;
503 log->context = lc; 508 log->context = lc;
504 509
@@ -739,8 +744,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
739 return 0; 744 return 0;
740 745
741 do { 746 do {
742 *region = find_next_zero_bit_le( 747 *region = find_next_zero_bit_le(lc->sync_bits,
743 (unsigned long *) lc->sync_bits,
744 lc->region_count, 748 lc->region_count,
745 lc->sync_search); 749 lc->sync_search);
746 lc->sync_search = *region + 1; 750 lc->sync_search = *region + 1;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c3547016f0f..5e0090ef418 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -22,7 +22,6 @@
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23 23
24#define DM_MSG_PREFIX "multipath" 24#define DM_MSG_PREFIX "multipath"
25#define MESG_STR(x) x, sizeof(x)
26#define DM_PG_INIT_DELAY_MSECS 2000 25#define DM_PG_INIT_DELAY_MSECS 2000
27#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 26#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
28 27
@@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work)
505 * <#paths> <#per-path selector args> 504 * <#paths> <#per-path selector args>
506 * [<path> [<arg>]* ]+ ]+ 505 * [<path> [<arg>]* ]+ ]+
507 *---------------------------------------------------------------*/ 506 *---------------------------------------------------------------*/
508struct param { 507static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
509 unsigned min;
510 unsigned max;
511 char *error;
512};
513
514static int read_param(struct param *param, char *str, unsigned *v, char **error)
515{
516 if (!str ||
517 (sscanf(str, "%u", v) != 1) ||
518 (*v < param->min) ||
519 (*v > param->max)) {
520 *error = param->error;
521 return -EINVAL;
522 }
523
524 return 0;
525}
526
527struct arg_set {
528 unsigned argc;
529 char **argv;
530};
531
532static char *shift(struct arg_set *as)
533{
534 char *r;
535
536 if (as->argc) {
537 as->argc--;
538 r = *as->argv;
539 as->argv++;
540 return r;
541 }
542
543 return NULL;
544}
545
546static void consume(struct arg_set *as, unsigned n)
547{
548 BUG_ON (as->argc < n);
549 as->argc -= n;
550 as->argv += n;
551}
552
553static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
554 struct dm_target *ti) 508 struct dm_target *ti)
555{ 509{
556 int r; 510 int r;
557 struct path_selector_type *pst; 511 struct path_selector_type *pst;
558 unsigned ps_argc; 512 unsigned ps_argc;
559 513
560 static struct param _params[] = { 514 static struct dm_arg _args[] = {
561 {0, 1024, "invalid number of path selector args"}, 515 {0, 1024, "invalid number of path selector args"},
562 }; 516 };
563 517
564 pst = dm_get_path_selector(shift(as)); 518 pst = dm_get_path_selector(dm_shift_arg(as));
565 if (!pst) { 519 if (!pst) {
566 ti->error = "unknown path selector type"; 520 ti->error = "unknown path selector type";
567 return -EINVAL; 521 return -EINVAL;
568 } 522 }
569 523
570 r = read_param(_params, shift(as), &ps_argc, &ti->error); 524 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
571 if (r) { 525 if (r) {
572 dm_put_path_selector(pst); 526 dm_put_path_selector(pst);
573 return -EINVAL; 527 return -EINVAL;
574 } 528 }
575 529
576 if (ps_argc > as->argc) {
577 dm_put_path_selector(pst);
578 ti->error = "not enough arguments for path selector";
579 return -EINVAL;
580 }
581
582 r = pst->create(&pg->ps, ps_argc, as->argv); 530 r = pst->create(&pg->ps, ps_argc, as->argv);
583 if (r) { 531 if (r) {
584 dm_put_path_selector(pst); 532 dm_put_path_selector(pst);
@@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
587 } 535 }
588 536
589 pg->ps.type = pst; 537 pg->ps.type = pst;
590 consume(as, ps_argc); 538 dm_consume_args(as, ps_argc);
591 539
592 return 0; 540 return 0;
593} 541}
594 542
595static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, 543static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
596 struct dm_target *ti) 544 struct dm_target *ti)
597{ 545{
598 int r; 546 int r;
@@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
609 if (!p) 557 if (!p)
610 return ERR_PTR(-ENOMEM); 558 return ERR_PTR(-ENOMEM);
611 559
612 r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), 560 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
613 &p->path.dev); 561 &p->path.dev);
614 if (r) { 562 if (r) {
615 ti->error = "error getting device"; 563 ti->error = "error getting device";
@@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
660 return ERR_PTR(r); 608 return ERR_PTR(r);
661} 609}
662 610
663static struct priority_group *parse_priority_group(struct arg_set *as, 611static struct priority_group *parse_priority_group(struct dm_arg_set *as,
664 struct multipath *m) 612 struct multipath *m)
665{ 613{
666 static struct param _params[] = { 614 static struct dm_arg _args[] = {
667 {1, 1024, "invalid number of paths"}, 615 {1, 1024, "invalid number of paths"},
668 {0, 1024, "invalid number of selector args"} 616 {0, 1024, "invalid number of selector args"}
669 }; 617 };
670 618
671 int r; 619 int r;
672 unsigned i, nr_selector_args, nr_params; 620 unsigned i, nr_selector_args, nr_args;
673 struct priority_group *pg; 621 struct priority_group *pg;
674 struct dm_target *ti = m->ti; 622 struct dm_target *ti = m->ti;
675 623
@@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
693 /* 641 /*
694 * read the paths 642 * read the paths
695 */ 643 */
696 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); 644 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
697 if (r) 645 if (r)
698 goto bad; 646 goto bad;
699 647
700 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); 648 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
701 if (r) 649 if (r)
702 goto bad; 650 goto bad;
703 651
704 nr_params = 1 + nr_selector_args; 652 nr_args = 1 + nr_selector_args;
705 for (i = 0; i < pg->nr_pgpaths; i++) { 653 for (i = 0; i < pg->nr_pgpaths; i++) {
706 struct pgpath *pgpath; 654 struct pgpath *pgpath;
707 struct arg_set path_args; 655 struct dm_arg_set path_args;
708 656
709 if (as->argc < nr_params) { 657 if (as->argc < nr_args) {
710 ti->error = "not enough path parameters"; 658 ti->error = "not enough path parameters";
711 r = -EINVAL; 659 r = -EINVAL;
712 goto bad; 660 goto bad;
713 } 661 }
714 662
715 path_args.argc = nr_params; 663 path_args.argc = nr_args;
716 path_args.argv = as->argv; 664 path_args.argv = as->argv;
717 665
718 pgpath = parse_path(&path_args, &pg->ps, ti); 666 pgpath = parse_path(&path_args, &pg->ps, ti);
@@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
723 671
724 pgpath->pg = pg; 672 pgpath->pg = pg;
725 list_add_tail(&pgpath->list, &pg->pgpaths); 673 list_add_tail(&pgpath->list, &pg->pgpaths);
726 consume(as, nr_params); 674 dm_consume_args(as, nr_args);
727 } 675 }
728 676
729 return pg; 677 return pg;
@@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
733 return ERR_PTR(r); 681 return ERR_PTR(r);
734} 682}
735 683
736static int parse_hw_handler(struct arg_set *as, struct multipath *m) 684static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
737{ 685{
738 unsigned hw_argc; 686 unsigned hw_argc;
739 int ret; 687 int ret;
740 struct dm_target *ti = m->ti; 688 struct dm_target *ti = m->ti;
741 689
742 static struct param _params[] = { 690 static struct dm_arg _args[] = {
743 {0, 1024, "invalid number of hardware handler args"}, 691 {0, 1024, "invalid number of hardware handler args"},
744 }; 692 };
745 693
746 if (read_param(_params, shift(as), &hw_argc, &ti->error)) 694 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
747 return -EINVAL; 695 return -EINVAL;
748 696
749 if (!hw_argc) 697 if (!hw_argc)
750 return 0; 698 return 0;
751 699
752 if (hw_argc > as->argc) { 700 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
753 ti->error = "not enough arguments for hardware handler";
754 return -EINVAL;
755 }
756
757 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
758 request_module("scsi_dh_%s", m->hw_handler_name); 701 request_module("scsi_dh_%s", m->hw_handler_name);
759 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { 702 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
760 ti->error = "unknown hardware handler type"; 703 ti->error = "unknown hardware handler type";
@@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
778 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 721 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
779 j = sprintf(p, "%s", as->argv[i]); 722 j = sprintf(p, "%s", as->argv[i]);
780 } 723 }
781 consume(as, hw_argc - 1); 724 dm_consume_args(as, hw_argc - 1);
782 725
783 return 0; 726 return 0;
784fail: 727fail:
@@ -787,20 +730,20 @@ fail:
787 return ret; 730 return ret;
788} 731}
789 732
790static int parse_features(struct arg_set *as, struct multipath *m) 733static int parse_features(struct dm_arg_set *as, struct multipath *m)
791{ 734{
792 int r; 735 int r;
793 unsigned argc; 736 unsigned argc;
794 struct dm_target *ti = m->ti; 737 struct dm_target *ti = m->ti;
795 const char *param_name; 738 const char *arg_name;
796 739
797 static struct param _params[] = { 740 static struct dm_arg _args[] = {
798 {0, 5, "invalid number of feature args"}, 741 {0, 5, "invalid number of feature args"},
799 {1, 50, "pg_init_retries must be between 1 and 50"}, 742 {1, 50, "pg_init_retries must be between 1 and 50"},
800 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 743 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
801 }; 744 };
802 745
803 r = read_param(_params, shift(as), &argc, &ti->error); 746 r = dm_read_arg_group(_args, as, &argc, &ti->error);
804 if (r) 747 if (r)
805 return -EINVAL; 748 return -EINVAL;
806 749
@@ -808,26 +751,24 @@ static int parse_features(struct arg_set *as, struct multipath *m)
808 return 0; 751 return 0;
809 752
810 do { 753 do {
811 param_name = shift(as); 754 arg_name = dm_shift_arg(as);
812 argc--; 755 argc--;
813 756
814 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { 757 if (!strcasecmp(arg_name, "queue_if_no_path")) {
815 r = queue_if_no_path(m, 1, 0); 758 r = queue_if_no_path(m, 1, 0);
816 continue; 759 continue;
817 } 760 }
818 761
819 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && 762 if (!strcasecmp(arg_name, "pg_init_retries") &&
820 (argc >= 1)) { 763 (argc >= 1)) {
821 r = read_param(_params + 1, shift(as), 764 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
822 &m->pg_init_retries, &ti->error);
823 argc--; 765 argc--;
824 continue; 766 continue;
825 } 767 }
826 768
827 if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) && 769 if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
828 (argc >= 1)) { 770 (argc >= 1)) {
829 r = read_param(_params + 2, shift(as), 771 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
830 &m->pg_init_delay_msecs, &ti->error);
831 argc--; 772 argc--;
832 continue; 773 continue;
833 } 774 }
@@ -842,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m)
842static int multipath_ctr(struct dm_target *ti, unsigned int argc, 783static int multipath_ctr(struct dm_target *ti, unsigned int argc,
843 char **argv) 784 char **argv)
844{ 785{
845 /* target parameters */ 786 /* target arguments */
846 static struct param _params[] = { 787 static struct dm_arg _args[] = {
847 {0, 1024, "invalid number of priority groups"}, 788 {0, 1024, "invalid number of priority groups"},
848 {0, 1024, "invalid initial priority group number"}, 789 {0, 1024, "invalid initial priority group number"},
849 }; 790 };
850 791
851 int r; 792 int r;
852 struct multipath *m; 793 struct multipath *m;
853 struct arg_set as; 794 struct dm_arg_set as;
854 unsigned pg_count = 0; 795 unsigned pg_count = 0;
855 unsigned next_pg_num; 796 unsigned next_pg_num;
856 797
@@ -871,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
871 if (r) 812 if (r)
872 goto bad; 813 goto bad;
873 814
874 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); 815 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
875 if (r) 816 if (r)
876 goto bad; 817 goto bad;
877 818
878 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); 819 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
879 if (r) 820 if (r)
880 goto bad; 821 goto bad;
881 822
@@ -1505,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1505 } 1446 }
1506 1447
1507 if (argc == 1) { 1448 if (argc == 1) {
1508 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { 1449 if (!strcasecmp(argv[0], "queue_if_no_path")) {
1509 r = queue_if_no_path(m, 1, 0); 1450 r = queue_if_no_path(m, 1, 0);
1510 goto out; 1451 goto out;
1511 } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { 1452 } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
1512 r = queue_if_no_path(m, 0, 0); 1453 r = queue_if_no_path(m, 0, 0);
1513 goto out; 1454 goto out;
1514 } 1455 }
@@ -1519,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1519 goto out; 1460 goto out;
1520 } 1461 }
1521 1462
1522 if (!strnicmp(argv[0], MESG_STR("disable_group"))) { 1463 if (!strcasecmp(argv[0], "disable_group")) {
1523 r = bypass_pg_num(m, argv[1], 1); 1464 r = bypass_pg_num(m, argv[1], 1);
1524 goto out; 1465 goto out;
1525 } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { 1466 } else if (!strcasecmp(argv[0], "enable_group")) {
1526 r = bypass_pg_num(m, argv[1], 0); 1467 r = bypass_pg_num(m, argv[1], 0);
1527 goto out; 1468 goto out;
1528 } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { 1469 } else if (!strcasecmp(argv[0], "switch_group")) {
1529 r = switch_pg_num(m, argv[1]); 1470 r = switch_pg_num(m, argv[1]);
1530 goto out; 1471 goto out;
1531 } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) 1472 } else if (!strcasecmp(argv[0], "reinstate_path"))
1532 action = reinstate_path; 1473 action = reinstate_path;
1533 else if (!strnicmp(argv[0], MESG_STR("fail_path"))) 1474 else if (!strcasecmp(argv[0], "fail_path"))
1534 action = fail_path; 1475 action = fail_path;
1535 else { 1476 else {
1536 DMWARN("Unrecognised multipath message received."); 1477 DMWARN("Unrecognised multipath message received.");
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e5d8904fc8f..a002dd85db1 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -8,19 +8,19 @@
8#include <linux/slab.h> 8#include <linux/slab.h>
9 9
10#include "md.h" 10#include "md.h"
11#include "raid1.h"
11#include "raid5.h" 12#include "raid5.h"
12#include "dm.h"
13#include "bitmap.h" 13#include "bitmap.h"
14 14
15#include <linux/device-mapper.h>
16
15#define DM_MSG_PREFIX "raid" 17#define DM_MSG_PREFIX "raid"
16 18
17/* 19/*
18 * If the MD doesn't support MD_SYNC_STATE_FORCED yet, then 20 * The following flags are used by dm-raid.c to set up the array state.
19 * make it so the flag doesn't set anything. 21 * They must be cleared before md_run is called.
20 */ 22 */
21#ifndef MD_SYNC_STATE_FORCED 23#define FirstUse 10 /* rdev flag */
22#define MD_SYNC_STATE_FORCED 0
23#endif
24 24
25struct raid_dev { 25struct raid_dev {
26 /* 26 /*
@@ -43,14 +43,15 @@ struct raid_dev {
43/* 43/*
44 * Flags for rs->print_flags field. 44 * Flags for rs->print_flags field.
45 */ 45 */
46#define DMPF_DAEMON_SLEEP 0x1 46#define DMPF_SYNC 0x1
47#define DMPF_MAX_WRITE_BEHIND 0x2 47#define DMPF_NOSYNC 0x2
48#define DMPF_SYNC 0x4 48#define DMPF_REBUILD 0x4
49#define DMPF_NOSYNC 0x8 49#define DMPF_DAEMON_SLEEP 0x8
50#define DMPF_STRIPE_CACHE 0x10 50#define DMPF_MIN_RECOVERY_RATE 0x10
51#define DMPF_MIN_RECOVERY_RATE 0x20 51#define DMPF_MAX_RECOVERY_RATE 0x20
52#define DMPF_MAX_RECOVERY_RATE 0x40 52#define DMPF_MAX_WRITE_BEHIND 0x40
53 53#define DMPF_STRIPE_CACHE 0x80
54#define DMPF_REGION_SIZE 0X100
54struct raid_set { 55struct raid_set {
55 struct dm_target *ti; 56 struct dm_target *ti;
56 57
@@ -72,6 +73,7 @@ static struct raid_type {
72 const unsigned level; /* RAID level. */ 73 const unsigned level; /* RAID level. */
73 const unsigned algorithm; /* RAID algorithm. */ 74 const unsigned algorithm; /* RAID algorithm. */
74} raid_types[] = { 75} raid_types[] = {
76 {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
75 {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, 77 {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
76 {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, 78 {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
77 {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, 79 {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
105 } 107 }
106 108
107 sectors_per_dev = ti->len; 109 sectors_per_dev = ti->len;
108 if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { 110 if ((raid_type->level > 1) &&
111 sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
109 ti->error = "Target length not divisible by number of data devices"; 112 ti->error = "Target length not divisible by number of data devices";
110 return ERR_PTR(-EINVAL); 113 return ERR_PTR(-EINVAL);
111 } 114 }
@@ -147,9 +150,16 @@ static void context_free(struct raid_set *rs)
147{ 150{
148 int i; 151 int i;
149 152
150 for (i = 0; i < rs->md.raid_disks; i++) 153 for (i = 0; i < rs->md.raid_disks; i++) {
154 if (rs->dev[i].meta_dev)
155 dm_put_device(rs->ti, rs->dev[i].meta_dev);
156 if (rs->dev[i].rdev.sb_page)
157 put_page(rs->dev[i].rdev.sb_page);
158 rs->dev[i].rdev.sb_page = NULL;
159 rs->dev[i].rdev.sb_loaded = 0;
151 if (rs->dev[i].data_dev) 160 if (rs->dev[i].data_dev)
152 dm_put_device(rs->ti, rs->dev[i].data_dev); 161 dm_put_device(rs->ti, rs->dev[i].data_dev);
162 }
153 163
154 kfree(rs); 164 kfree(rs);
155} 165}
@@ -159,7 +169,16 @@ static void context_free(struct raid_set *rs)
159 * <meta_dev>: meta device name or '-' if missing 169 * <meta_dev>: meta device name or '-' if missing
160 * <data_dev>: data device name or '-' if missing 170 * <data_dev>: data device name or '-' if missing
161 * 171 *
162 * This code parses those words. 172 * The following are permitted:
173 * - -
174 * - <data_dev>
175 * <meta_dev> <data_dev>
176 *
177 * The following is not allowed:
178 * <meta_dev> -
179 *
180 * This code parses those words. If there is a failure,
181 * the caller must use context_free to unwind the operations.
163 */ 182 */
164static int dev_parms(struct raid_set *rs, char **argv) 183static int dev_parms(struct raid_set *rs, char **argv)
165{ 184{
@@ -182,8 +201,16 @@ static int dev_parms(struct raid_set *rs, char **argv)
182 rs->dev[i].rdev.mddev = &rs->md; 201 rs->dev[i].rdev.mddev = &rs->md;
183 202
184 if (strcmp(argv[0], "-")) { 203 if (strcmp(argv[0], "-")) {
185 rs->ti->error = "Metadata devices not supported"; 204 ret = dm_get_device(rs->ti, argv[0],
186 return -EINVAL; 205 dm_table_get_mode(rs->ti->table),
206 &rs->dev[i].meta_dev);
207 rs->ti->error = "RAID metadata device lookup failure";
208 if (ret)
209 return ret;
210
211 rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
212 if (!rs->dev[i].rdev.sb_page)
213 return -ENOMEM;
187 } 214 }
188 215
189 if (!strcmp(argv[1], "-")) { 216 if (!strcmp(argv[1], "-")) {
@@ -193,6 +220,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
193 return -EINVAL; 220 return -EINVAL;
194 } 221 }
195 222
223 rs->ti->error = "No data device supplied with metadata device";
224 if (rs->dev[i].meta_dev)
225 return -EINVAL;
226
196 continue; 227 continue;
197 } 228 }
198 229
@@ -204,6 +235,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
204 return ret; 235 return ret;
205 } 236 }
206 237
238 if (rs->dev[i].meta_dev) {
239 metadata_available = 1;
240 rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
241 }
207 rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev; 242 rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
208 list_add(&rs->dev[i].rdev.same_set, &rs->md.disks); 243 list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
209 if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) 244 if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
@@ -235,33 +270,109 @@ static int dev_parms(struct raid_set *rs, char **argv)
235} 270}
236 271
237/* 272/*
273 * validate_region_size
274 * @rs
275 * @region_size: region size in sectors. If 0, pick a size (4MiB default).
276 *
277 * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
278 * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
279 *
280 * Returns: 0 on success, -EINVAL on failure.
281 */
282static int validate_region_size(struct raid_set *rs, unsigned long region_size)
283{
284 unsigned long min_region_size = rs->ti->len / (1 << 21);
285
286 if (!region_size) {
287 /*
288 * Choose a reasonable default. All figures in sectors.
289 */
290 if (min_region_size > (1 << 13)) {
291 DMINFO("Choosing default region size of %lu sectors",
292 region_size);
293 region_size = min_region_size;
294 } else {
295 DMINFO("Choosing default region size of 4MiB");
296 region_size = 1 << 13; /* sectors */
297 }
298 } else {
299 /*
300 * Validate user-supplied value.
301 */
302 if (region_size > rs->ti->len) {
303 rs->ti->error = "Supplied region size is too large";
304 return -EINVAL;
305 }
306
307 if (region_size < min_region_size) {
308 DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
309 region_size, min_region_size);
310 rs->ti->error = "Supplied region size is too small";
311 return -EINVAL;
312 }
313
314 if (!is_power_of_2(region_size)) {
315 rs->ti->error = "Region size is not a power of 2";
316 return -EINVAL;
317 }
318
319 if (region_size < rs->md.chunk_sectors) {
320 rs->ti->error = "Region size is smaller than the chunk size";
321 return -EINVAL;
322 }
323 }
324
325 /*
326 * Convert sectors to bytes.
327 */
328 rs->md.bitmap_info.chunksize = (region_size << 9);
329
330 return 0;
331}
332
333/*
238 * Possible arguments are... 334 * Possible arguments are...
239 * RAID456:
240 * <chunk_size> [optional_args] 335 * <chunk_size> [optional_args]
241 * 336 *
242 * Optional args: 337 * Argument definitions
243 * [[no]sync] Force or prevent recovery of the entire array 338 * <chunk_size> The number of sectors per disk that
339 * will form the "stripe"
340 * [[no]sync] Force or prevent recovery of the
341 * entire array
244 * [rebuild <idx>] Rebuild the drive indicated by the index 342 * [rebuild <idx>] Rebuild the drive indicated by the index
245 * [daemon_sleep <ms>] Time between bitmap daemon work to clear bits 343 * [daemon_sleep <ms>] Time between bitmap daemon work to
344 * clear bits
246 * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization 345 * [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
247 * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization 346 * [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
347 * [write_mostly <idx>] Indicate a write mostly drive via index
248 * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 348 * [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
249 * [stripe_cache <sectors>] Stripe cache size for higher RAIDs 349 * [stripe_cache <sectors>] Stripe cache size for higher RAIDs
350 * [region_size <sectors>] Defines granularity of bitmap
250 */ 351 */
251static int parse_raid_params(struct raid_set *rs, char **argv, 352static int parse_raid_params(struct raid_set *rs, char **argv,
252 unsigned num_raid_params) 353 unsigned num_raid_params)
253{ 354{
254 unsigned i, rebuild_cnt = 0; 355 unsigned i, rebuild_cnt = 0;
255 unsigned long value; 356 unsigned long value, region_size = 0;
256 char *key; 357 char *key;
257 358
258 /* 359 /*
259 * First, parse the in-order required arguments 360 * First, parse the in-order required arguments
361 * "chunk_size" is the only argument of this type.
260 */ 362 */
261 if ((strict_strtoul(argv[0], 10, &value) < 0) || 363 if ((strict_strtoul(argv[0], 10, &value) < 0)) {
262 !is_power_of_2(value) || (value < 8)) {
263 rs->ti->error = "Bad chunk size"; 364 rs->ti->error = "Bad chunk size";
264 return -EINVAL; 365 return -EINVAL;
366 } else if (rs->raid_type->level == 1) {
367 if (value)
368 DMERR("Ignoring chunk size parameter for RAID 1");
369 value = 0;
370 } else if (!is_power_of_2(value)) {
371 rs->ti->error = "Chunk size must be a power of 2";
372 return -EINVAL;
373 } else if (value < 8) {
374 rs->ti->error = "Chunk size value is too small";
375 return -EINVAL;
265 } 376 }
266 377
267 rs->md.new_chunk_sectors = rs->md.chunk_sectors = value; 378 rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
@@ -269,22 +380,39 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
269 num_raid_params--; 380 num_raid_params--;
270 381
271 /* 382 /*
272 * Second, parse the unordered optional arguments 383 * We set each individual device as In_sync with a completed
384 * 'recovery_offset'. If there has been a device failure or
385 * replacement then one of the following cases applies:
386 *
387 * 1) User specifies 'rebuild'.
388 * - Device is reset when param is read.
389 * 2) A new device is supplied.
390 * - No matching superblock found, resets device.
391 * 3) Device failure was transient and returns on reload.
392 * - Failure noticed, resets device for bitmap replay.
393 * 4) Device hadn't completed recovery after previous failure.
394 * - Superblock is read and overrides recovery_offset.
395 *
396 * What is found in the superblocks of the devices is always
397 * authoritative, unless 'rebuild' or '[no]sync' was specified.
273 */ 398 */
274 for (i = 0; i < rs->md.raid_disks; i++) 399 for (i = 0; i < rs->md.raid_disks; i++) {
275 set_bit(In_sync, &rs->dev[i].rdev.flags); 400 set_bit(In_sync, &rs->dev[i].rdev.flags);
401 rs->dev[i].rdev.recovery_offset = MaxSector;
402 }
276 403
404 /*
405 * Second, parse the unordered optional arguments
406 */
277 for (i = 0; i < num_raid_params; i++) { 407 for (i = 0; i < num_raid_params; i++) {
278 if (!strcmp(argv[i], "nosync")) { 408 if (!strcasecmp(argv[i], "nosync")) {
279 rs->md.recovery_cp = MaxSector; 409 rs->md.recovery_cp = MaxSector;
280 rs->print_flags |= DMPF_NOSYNC; 410 rs->print_flags |= DMPF_NOSYNC;
281 rs->md.flags |= MD_SYNC_STATE_FORCED;
282 continue; 411 continue;
283 } 412 }
284 if (!strcmp(argv[i], "sync")) { 413 if (!strcasecmp(argv[i], "sync")) {
285 rs->md.recovery_cp = 0; 414 rs->md.recovery_cp = 0;
286 rs->print_flags |= DMPF_SYNC; 415 rs->print_flags |= DMPF_SYNC;
287 rs->md.flags |= MD_SYNC_STATE_FORCED;
288 continue; 416 continue;
289 } 417 }
290 418
@@ -300,9 +428,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
300 return -EINVAL; 428 return -EINVAL;
301 } 429 }
302 430
303 if (!strcmp(key, "rebuild")) { 431 if (!strcasecmp(key, "rebuild")) {
304 if (++rebuild_cnt > rs->raid_type->parity_devs) { 432 rebuild_cnt++;
305 rs->ti->error = "Too many rebuild drives given"; 433 if (((rs->raid_type->level != 1) &&
434 (rebuild_cnt > rs->raid_type->parity_devs)) ||
435 ((rs->raid_type->level == 1) &&
436 (rebuild_cnt > (rs->md.raid_disks - 1)))) {
437 rs->ti->error = "Too many rebuild devices specified for given RAID type";
306 return -EINVAL; 438 return -EINVAL;
307 } 439 }
308 if (value > rs->md.raid_disks) { 440 if (value > rs->md.raid_disks) {
@@ -311,7 +443,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
311 } 443 }
312 clear_bit(In_sync, &rs->dev[value].rdev.flags); 444 clear_bit(In_sync, &rs->dev[value].rdev.flags);
313 rs->dev[value].rdev.recovery_offset = 0; 445 rs->dev[value].rdev.recovery_offset = 0;
314 } else if (!strcmp(key, "max_write_behind")) { 446 rs->print_flags |= DMPF_REBUILD;
447 } else if (!strcasecmp(key, "write_mostly")) {
448 if (rs->raid_type->level != 1) {
449 rs->ti->error = "write_mostly option is only valid for RAID1";
450 return -EINVAL;
451 }
452 if (value > rs->md.raid_disks) {
453 rs->ti->error = "Invalid write_mostly drive index given";
454 return -EINVAL;
455 }
456 set_bit(WriteMostly, &rs->dev[value].rdev.flags);
457 } else if (!strcasecmp(key, "max_write_behind")) {
458 if (rs->raid_type->level != 1) {
459 rs->ti->error = "max_write_behind option is only valid for RAID1";
460 return -EINVAL;
461 }
315 rs->print_flags |= DMPF_MAX_WRITE_BEHIND; 462 rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
316 463
317 /* 464 /*
@@ -324,14 +471,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
324 return -EINVAL; 471 return -EINVAL;
325 } 472 }
326 rs->md.bitmap_info.max_write_behind = value; 473 rs->md.bitmap_info.max_write_behind = value;
327 } else if (!strcmp(key, "daemon_sleep")) { 474 } else if (!strcasecmp(key, "daemon_sleep")) {
328 rs->print_flags |= DMPF_DAEMON_SLEEP; 475 rs->print_flags |= DMPF_DAEMON_SLEEP;
329 if (!value || (value > MAX_SCHEDULE_TIMEOUT)) { 476 if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
330 rs->ti->error = "daemon sleep period out of range"; 477 rs->ti->error = "daemon sleep period out of range";
331 return -EINVAL; 478 return -EINVAL;
332 } 479 }
333 rs->md.bitmap_info.daemon_sleep = value; 480 rs->md.bitmap_info.daemon_sleep = value;
334 } else if (!strcmp(key, "stripe_cache")) { 481 } else if (!strcasecmp(key, "stripe_cache")) {
335 rs->print_flags |= DMPF_STRIPE_CACHE; 482 rs->print_flags |= DMPF_STRIPE_CACHE;
336 483
337 /* 484 /*
@@ -348,20 +495,23 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
348 rs->ti->error = "Bad stripe_cache size"; 495 rs->ti->error = "Bad stripe_cache size";
349 return -EINVAL; 496 return -EINVAL;
350 } 497 }
351 } else if (!strcmp(key, "min_recovery_rate")) { 498 } else if (!strcasecmp(key, "min_recovery_rate")) {
352 rs->print_flags |= DMPF_MIN_RECOVERY_RATE; 499 rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
353 if (value > INT_MAX) { 500 if (value > INT_MAX) {
354 rs->ti->error = "min_recovery_rate out of range"; 501 rs->ti->error = "min_recovery_rate out of range";
355 return -EINVAL; 502 return -EINVAL;
356 } 503 }
357 rs->md.sync_speed_min = (int)value; 504 rs->md.sync_speed_min = (int)value;
358 } else if (!strcmp(key, "max_recovery_rate")) { 505 } else if (!strcasecmp(key, "max_recovery_rate")) {
359 rs->print_flags |= DMPF_MAX_RECOVERY_RATE; 506 rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
360 if (value > INT_MAX) { 507 if (value > INT_MAX) {
361 rs->ti->error = "max_recovery_rate out of range"; 508 rs->ti->error = "max_recovery_rate out of range";
362 return -EINVAL; 509 return -EINVAL;
363 } 510 }
364 rs->md.sync_speed_max = (int)value; 511 rs->md.sync_speed_max = (int)value;
512 } else if (!strcasecmp(key, "region_size")) {
513 rs->print_flags |= DMPF_REGION_SIZE;
514 region_size = value;
365 } else { 515 } else {
366 DMERR("Unable to parse RAID parameter: %s", key); 516 DMERR("Unable to parse RAID parameter: %s", key);
367 rs->ti->error = "Unable to parse RAID parameters"; 517 rs->ti->error = "Unable to parse RAID parameters";
@@ -369,6 +519,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
369 } 519 }
370 } 520 }
371 521
522 if (validate_region_size(rs, region_size))
523 return -EINVAL;
524
525 if (rs->md.chunk_sectors)
526 rs->ti->split_io = rs->md.chunk_sectors;
527 else
528 rs->ti->split_io = region_size;
529
530 if (rs->md.chunk_sectors)
531 rs->ti->split_io = rs->md.chunk_sectors;
532 else
533 rs->ti->split_io = region_size;
534
372 /* Assume there are no metadata devices until the drives are parsed */ 535 /* Assume there are no metadata devices until the drives are parsed */
373 rs->md.persistent = 0; 536 rs->md.persistent = 0;
374 rs->md.external = 1; 537 rs->md.external = 1;
@@ -387,17 +550,351 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
387{ 550{
388 struct raid_set *rs = container_of(cb, struct raid_set, callbacks); 551 struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
389 552
553 if (rs->raid_type->level == 1)
554 return md_raid1_congested(&rs->md, bits);
555
390 return md_raid5_congested(&rs->md, bits); 556 return md_raid5_congested(&rs->md, bits);
391} 557}
392 558
393/* 559/*
560 * This structure is never routinely used by userspace, unlike md superblocks.
561 * Devices with this superblock should only ever be accessed via device-mapper.
562 */
563#define DM_RAID_MAGIC 0x64526D44
564struct dm_raid_superblock {
565 __le32 magic; /* "DmRd" */
566 __le32 features; /* Used to indicate possible future changes */
567
568 __le32 num_devices; /* Number of devices in this array. (Max 64) */
569 __le32 array_position; /* The position of this drive in the array */
570
571 __le64 events; /* Incremented by md when superblock updated */
572 __le64 failed_devices; /* Bit field of devices to indicate failures */
573
574 /*
575 * This offset tracks the progress of the repair or replacement of
576 * an individual drive.
577 */
578 __le64 disk_recovery_offset;
579
580 /*
581 * This offset tracks the progress of the initial array
582 * synchronisation/parity calculation.
583 */
584 __le64 array_resync_offset;
585
586 /*
587 * RAID characteristics
588 */
589 __le32 level;
590 __le32 layout;
591 __le32 stripe_sectors;
592
593 __u8 pad[452]; /* Round struct to 512 bytes. */
594 /* Always set to 0 when writing. */
595} __packed;
596
597static int read_disk_sb(mdk_rdev_t *rdev, int size)
598{
599 BUG_ON(!rdev->sb_page);
600
601 if (rdev->sb_loaded)
602 return 0;
603
604 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
605 DMERR("Failed to read device superblock");
606 return -EINVAL;
607 }
608
609 rdev->sb_loaded = 1;
610
611 return 0;
612}
613
614static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev)
615{
616 mdk_rdev_t *r, *t;
617 uint64_t failed_devices;
618 struct dm_raid_superblock *sb;
619
620 sb = page_address(rdev->sb_page);
621 failed_devices = le64_to_cpu(sb->failed_devices);
622
623 rdev_for_each(r, t, mddev)
624 if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
625 failed_devices |= (1ULL << r->raid_disk);
626
627 memset(sb, 0, sizeof(*sb));
628
629 sb->magic = cpu_to_le32(DM_RAID_MAGIC);
630 sb->features = cpu_to_le32(0); /* No features yet */
631
632 sb->num_devices = cpu_to_le32(mddev->raid_disks);
633 sb->array_position = cpu_to_le32(rdev->raid_disk);
634
635 sb->events = cpu_to_le64(mddev->events);
636 sb->failed_devices = cpu_to_le64(failed_devices);
637
638 sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
639 sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
640
641 sb->level = cpu_to_le32(mddev->level);
642 sb->layout = cpu_to_le32(mddev->layout);
643 sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
644}
645
646/*
647 * super_load
648 *
649 * This function creates a superblock if one is not found on the device
650 * and will decide which superblock to use if there's a choice.
651 *
652 * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
653 */
654static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
655{
656 int ret;
657 struct dm_raid_superblock *sb;
658 struct dm_raid_superblock *refsb;
659 uint64_t events_sb, events_refsb;
660
661 rdev->sb_start = 0;
662 rdev->sb_size = sizeof(*sb);
663
664 ret = read_disk_sb(rdev, rdev->sb_size);
665 if (ret)
666 return ret;
667
668 sb = page_address(rdev->sb_page);
669 if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
670 super_sync(rdev->mddev, rdev);
671
672 set_bit(FirstUse, &rdev->flags);
673
674 /* Force writing of superblocks to disk */
675 set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
676
677 /* Any superblock is better than none, choose that if given */
678 return refdev ? 0 : 1;
679 }
680
681 if (!refdev)
682 return 1;
683
684 events_sb = le64_to_cpu(sb->events);
685
686 refsb = page_address(refdev->sb_page);
687 events_refsb = le64_to_cpu(refsb->events);
688
689 return (events_sb > events_refsb) ? 1 : 0;
690}
691
692static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev)
693{
694 int role;
695 struct raid_set *rs = container_of(mddev, struct raid_set, md);
696 uint64_t events_sb;
697 uint64_t failed_devices;
698 struct dm_raid_superblock *sb;
699 uint32_t new_devs = 0;
700 uint32_t rebuilds = 0;
701 mdk_rdev_t *r, *t;
702 struct dm_raid_superblock *sb2;
703
704 sb = page_address(rdev->sb_page);
705 events_sb = le64_to_cpu(sb->events);
706 failed_devices = le64_to_cpu(sb->failed_devices);
707
708 /*
709 * Initialise to 1 if this is a new superblock.
710 */
711 mddev->events = events_sb ? : 1;
712
713 /*
714 * Reshaping is not currently allowed
715 */
716 if ((le32_to_cpu(sb->level) != mddev->level) ||
717 (le32_to_cpu(sb->layout) != mddev->layout) ||
718 (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) {
719 DMERR("Reshaping arrays not yet supported.");
720 return -EINVAL;
721 }
722
723 /* We can only change the number of devices in RAID1 right now */
724 if ((rs->raid_type->level != 1) &&
725 (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
726 DMERR("Reshaping arrays not yet supported.");
727 return -EINVAL;
728 }
729
730 if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)))
731 mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
732
733 /*
734 * During load, we set FirstUse if a new superblock was written.
735 * There are two reasons we might not have a superblock:
736 * 1) The array is brand new - in which case, all of the
737 * devices must have their In_sync bit set. Also,
738 * recovery_cp must be 0, unless forced.
739 * 2) This is a new device being added to an old array
740 * and the new device needs to be rebuilt - in which
741 * case the In_sync bit will /not/ be set and
742 * recovery_cp must be MaxSector.
743 */
744 rdev_for_each(r, t, mddev) {
745 if (!test_bit(In_sync, &r->flags)) {
746 if (!test_bit(FirstUse, &r->flags))
747 DMERR("Superblock area of "
748 "rebuild device %d should have been "
749 "cleared.", r->raid_disk);
750 set_bit(FirstUse, &r->flags);
751 rebuilds++;
752 } else if (test_bit(FirstUse, &r->flags))
753 new_devs++;
754 }
755
756 if (!rebuilds) {
757 if (new_devs == mddev->raid_disks) {
758 DMINFO("Superblocks created for new array");
759 set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
760 } else if (new_devs) {
761 DMERR("New device injected "
762 "into existing array without 'rebuild' "
763 "parameter specified");
764 return -EINVAL;
765 }
766 } else if (new_devs) {
767 DMERR("'rebuild' devices cannot be "
768 "injected into an array with other first-time devices");
769 return -EINVAL;
770 } else if (mddev->recovery_cp != MaxSector) {
771 DMERR("'rebuild' specified while array is not in-sync");
772 return -EINVAL;
773 }
774
775 /*
776 * Now we set the Faulty bit for those devices that are
777 * recorded in the superblock as failed.
778 */
779 rdev_for_each(r, t, mddev) {
780 if (!r->sb_page)
781 continue;
782 sb2 = page_address(r->sb_page);
783 sb2->failed_devices = 0;
784
785 /*
786 * Check for any device re-ordering.
787 */
788 if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
789 role = le32_to_cpu(sb2->array_position);
790 if (role != r->raid_disk) {
791 if (rs->raid_type->level != 1) {
792 rs->ti->error = "Cannot change device "
793 "positions in RAID array";
794 return -EINVAL;
795 }
796 DMINFO("RAID1 device #%d now at position #%d",
797 role, r->raid_disk);
798 }
799
800 /*
801 * Partial recovery is performed on
802 * returning failed devices.
803 */
804 if (failed_devices & (1 << role))
805 set_bit(Faulty, &r->flags);
806 }
807 }
808
809 return 0;
810}
811
812static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev)
813{
814 struct dm_raid_superblock *sb = page_address(rdev->sb_page);
815
816 /*
817 * If mddev->events is not set, we know we have not yet initialized
818 * the array.
819 */
820 if (!mddev->events && super_init_validation(mddev, rdev))
821 return -EINVAL;
822
823 mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */
824 rdev->mddev->bitmap_info.default_offset = 4096 >> 9;
825 if (!test_bit(FirstUse, &rdev->flags)) {
826 rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
827 if (rdev->recovery_offset != MaxSector)
828 clear_bit(In_sync, &rdev->flags);
829 }
830
831 /*
832 * If a device comes back, set it as not In_sync and no longer faulty.
833 */
834 if (test_bit(Faulty, &rdev->flags)) {
835 clear_bit(Faulty, &rdev->flags);
836 clear_bit(In_sync, &rdev->flags);
837 rdev->saved_raid_disk = rdev->raid_disk;
838 rdev->recovery_offset = 0;
839 }
840
841 clear_bit(FirstUse, &rdev->flags);
842
843 return 0;
844}
845
846/*
847 * Analyse superblocks and select the freshest.
848 */
849static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
850{
851 int ret;
852 mdk_rdev_t *rdev, *freshest, *tmp;
853 mddev_t *mddev = &rs->md;
854
855 freshest = NULL;
856 rdev_for_each(rdev, tmp, mddev) {
857 if (!rdev->meta_bdev)
858 continue;
859
860 ret = super_load(rdev, freshest);
861
862 switch (ret) {
863 case 1:
864 freshest = rdev;
865 break;
866 case 0:
867 break;
868 default:
869 ti->error = "Failed to load superblock";
870 return ret;
871 }
872 }
873
874 if (!freshest)
875 return 0;
876
877 /*
878 * Validation of the freshest device provides the source of
879 * validation for the remaining devices.
880 */
881 ti->error = "Unable to assemble array: Invalid superblocks";
882 if (super_validate(mddev, freshest))
883 return -EINVAL;
884
885 rdev_for_each(rdev, tmp, mddev)
886 if ((rdev != freshest) && super_validate(mddev, rdev))
887 return -EINVAL;
888
889 return 0;
890}
891
892/*
394 * Construct a RAID4/5/6 mapping: 893 * Construct a RAID4/5/6 mapping:
395 * Args: 894 * Args:
396 * <raid_type> <#raid_params> <raid_params> \ 895 * <raid_type> <#raid_params> <raid_params> \
397 * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> } 896 * <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
398 * 897 *
399 * ** metadata devices are not supported yet, use '-' instead **
400 *
401 * <raid_params> varies by <raid_type>. See 'parse_raid_params' for 898 * <raid_params> varies by <raid_type>. See 'parse_raid_params' for
402 * details on possible <raid_params>. 899 * details on possible <raid_params>.
403 */ 900 */
@@ -465,8 +962,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
465 if (ret) 962 if (ret)
466 goto bad; 963 goto bad;
467 964
965 rs->md.sync_super = super_sync;
966 ret = analyse_superblocks(ti, rs);
967 if (ret)
968 goto bad;
969
468 INIT_WORK(&rs->md.event_work, do_table_event); 970 INIT_WORK(&rs->md.event_work, do_table_event);
469 ti->split_io = rs->md.chunk_sectors;
470 ti->private = rs; 971 ti->private = rs;
471 972
472 mutex_lock(&rs->md.reconfig_mutex); 973 mutex_lock(&rs->md.reconfig_mutex);
@@ -482,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
482 rs->callbacks.congested_fn = raid_is_congested; 983 rs->callbacks.congested_fn = raid_is_congested;
483 dm_table_add_target_callbacks(ti->table, &rs->callbacks); 984 dm_table_add_target_callbacks(ti->table, &rs->callbacks);
484 985
986 mddev_suspend(&rs->md);
485 return 0; 987 return 0;
486 988
487bad: 989bad:
@@ -546,12 +1048,17 @@ static int raid_status(struct dm_target *ti, status_type_t type,
546 break; 1048 break;
547 case STATUSTYPE_TABLE: 1049 case STATUSTYPE_TABLE:
548 /* The string you would use to construct this array */ 1050 /* The string you would use to construct this array */
549 for (i = 0; i < rs->md.raid_disks; i++) 1051 for (i = 0; i < rs->md.raid_disks; i++) {
550 if (rs->dev[i].data_dev && 1052 if ((rs->print_flags & DMPF_REBUILD) &&
1053 rs->dev[i].data_dev &&
551 !test_bit(In_sync, &rs->dev[i].rdev.flags)) 1054 !test_bit(In_sync, &rs->dev[i].rdev.flags))
552 raid_param_cnt++; /* for rebuilds */ 1055 raid_param_cnt += 2; /* for rebuilds */
1056 if (rs->dev[i].data_dev &&
1057 test_bit(WriteMostly, &rs->dev[i].rdev.flags))
1058 raid_param_cnt += 2;
1059 }
553 1060
554 raid_param_cnt += (hweight64(rs->print_flags) * 2); 1061 raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2);
555 if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)) 1062 if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
556 raid_param_cnt--; 1063 raid_param_cnt--;
557 1064
@@ -565,7 +1072,8 @@ static int raid_status(struct dm_target *ti, status_type_t type,
565 DMEMIT(" nosync"); 1072 DMEMIT(" nosync");
566 1073
567 for (i = 0; i < rs->md.raid_disks; i++) 1074 for (i = 0; i < rs->md.raid_disks; i++)
568 if (rs->dev[i].data_dev && 1075 if ((rs->print_flags & DMPF_REBUILD) &&
1076 rs->dev[i].data_dev &&
569 !test_bit(In_sync, &rs->dev[i].rdev.flags)) 1077 !test_bit(In_sync, &rs->dev[i].rdev.flags))
570 DMEMIT(" rebuild %u", i); 1078 DMEMIT(" rebuild %u", i);
571 1079
@@ -579,6 +1087,11 @@ static int raid_status(struct dm_target *ti, status_type_t type,
579 if (rs->print_flags & DMPF_MAX_RECOVERY_RATE) 1087 if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
580 DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max); 1088 DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
581 1089
1090 for (i = 0; i < rs->md.raid_disks; i++)
1091 if (rs->dev[i].data_dev &&
1092 test_bit(WriteMostly, &rs->dev[i].rdev.flags))
1093 DMEMIT(" write_mostly %u", i);
1094
582 if (rs->print_flags & DMPF_MAX_WRITE_BEHIND) 1095 if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
583 DMEMIT(" max_write_behind %lu", 1096 DMEMIT(" max_write_behind %lu",
584 rs->md.bitmap_info.max_write_behind); 1097 rs->md.bitmap_info.max_write_behind);
@@ -591,9 +1104,16 @@ static int raid_status(struct dm_target *ti, status_type_t type,
591 conf ? conf->max_nr_stripes * 2 : 0); 1104 conf ? conf->max_nr_stripes * 2 : 0);
592 } 1105 }
593 1106
1107 if (rs->print_flags & DMPF_REGION_SIZE)
1108 DMEMIT(" region_size %lu",
1109 rs->md.bitmap_info.chunksize >> 9);
1110
594 DMEMIT(" %d", rs->md.raid_disks); 1111 DMEMIT(" %d", rs->md.raid_disks);
595 for (i = 0; i < rs->md.raid_disks; i++) { 1112 for (i = 0; i < rs->md.raid_disks; i++) {
596 DMEMIT(" -"); /* metadata device */ 1113 if (rs->dev[i].meta_dev)
1114 DMEMIT(" %s", rs->dev[i].meta_dev->name);
1115 else
1116 DMEMIT(" -");
597 1117
598 if (rs->dev[i].data_dev) 1118 if (rs->dev[i].data_dev)
599 DMEMIT(" %s", rs->dev[i].data_dev->name); 1119 DMEMIT(" %s", rs->dev[i].data_dev->name);
@@ -650,12 +1170,13 @@ static void raid_resume(struct dm_target *ti)
650{ 1170{
651 struct raid_set *rs = ti->private; 1171 struct raid_set *rs = ti->private;
652 1172
1173 bitmap_load(&rs->md);
653 mddev_resume(&rs->md); 1174 mddev_resume(&rs->md);
654} 1175}
655 1176
656static struct target_type raid_target = { 1177static struct target_type raid_target = {
657 .name = "raid", 1178 .name = "raid",
658 .version = {1, 0, 0}, 1179 .version = {1, 1, 0},
659 .module = THIS_MODULE, 1180 .module = THIS_MODULE,
660 .ctr = raid_ctr, 1181 .ctr = raid_ctr,
661 .dtr = raid_dtr, 1182 .dtr = raid_dtr,
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 135c2f1fdbf..d1f1d701710 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -58,25 +58,30 @@
58#define NUM_SNAPSHOT_HDR_CHUNKS 1 58#define NUM_SNAPSHOT_HDR_CHUNKS 1
59 59
60struct disk_header { 60struct disk_header {
61 uint32_t magic; 61 __le32 magic;
62 62
63 /* 63 /*
64 * Is this snapshot valid. There is no way of recovering 64 * Is this snapshot valid. There is no way of recovering
65 * an invalid snapshot. 65 * an invalid snapshot.
66 */ 66 */
67 uint32_t valid; 67 __le32 valid;
68 68
69 /* 69 /*
70 * Simple, incrementing version. no backward 70 * Simple, incrementing version. no backward
71 * compatibility. 71 * compatibility.
72 */ 72 */
73 uint32_t version; 73 __le32 version;
74 74
75 /* In sectors */ 75 /* In sectors */
76 uint32_t chunk_size; 76 __le32 chunk_size;
77}; 77} __packed;
78 78
79struct disk_exception { 79struct disk_exception {
80 __le64 old_chunk;
81 __le64 new_chunk;
82} __packed;
83
84struct core_exception {
80 uint64_t old_chunk; 85 uint64_t old_chunk;
81 uint64_t new_chunk; 86 uint64_t new_chunk;
82}; 87};
@@ -169,10 +174,9 @@ static int alloc_area(struct pstore *ps)
169 if (!ps->area) 174 if (!ps->area)
170 goto err_area; 175 goto err_area;
171 176
172 ps->zero_area = vmalloc(len); 177 ps->zero_area = vzalloc(len);
173 if (!ps->zero_area) 178 if (!ps->zero_area)
174 goto err_zero_area; 179 goto err_zero_area;
175 memset(ps->zero_area, 0, len);
176 180
177 ps->header_area = vmalloc(len); 181 ps->header_area = vmalloc(len);
178 if (!ps->header_area) 182 if (!ps->header_area)
@@ -396,32 +400,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
396} 400}
397 401
398static void read_exception(struct pstore *ps, 402static void read_exception(struct pstore *ps,
399 uint32_t index, struct disk_exception *result) 403 uint32_t index, struct core_exception *result)
400{ 404{
401 struct disk_exception *e = get_exception(ps, index); 405 struct disk_exception *de = get_exception(ps, index);
402 406
403 /* copy it */ 407 /* copy it */
404 result->old_chunk = le64_to_cpu(e->old_chunk); 408 result->old_chunk = le64_to_cpu(de->old_chunk);
405 result->new_chunk = le64_to_cpu(e->new_chunk); 409 result->new_chunk = le64_to_cpu(de->new_chunk);
406} 410}
407 411
408static void write_exception(struct pstore *ps, 412static void write_exception(struct pstore *ps,
409 uint32_t index, struct disk_exception *de) 413 uint32_t index, struct core_exception *e)
410{ 414{
411 struct disk_exception *e = get_exception(ps, index); 415 struct disk_exception *de = get_exception(ps, index);
412 416
413 /* copy it */ 417 /* copy it */
414 e->old_chunk = cpu_to_le64(de->old_chunk); 418 de->old_chunk = cpu_to_le64(e->old_chunk);
415 e->new_chunk = cpu_to_le64(de->new_chunk); 419 de->new_chunk = cpu_to_le64(e->new_chunk);
416} 420}
417 421
418static void clear_exception(struct pstore *ps, uint32_t index) 422static void clear_exception(struct pstore *ps, uint32_t index)
419{ 423{
420 struct disk_exception *e = get_exception(ps, index); 424 struct disk_exception *de = get_exception(ps, index);
421 425
422 /* clear it */ 426 /* clear it */
423 e->old_chunk = 0; 427 de->old_chunk = 0;
424 e->new_chunk = 0; 428 de->new_chunk = 0;
425} 429}
426 430
427/* 431/*
@@ -437,13 +441,13 @@ static int insert_exceptions(struct pstore *ps,
437{ 441{
438 int r; 442 int r;
439 unsigned int i; 443 unsigned int i;
440 struct disk_exception de; 444 struct core_exception e;
441 445
442 /* presume the area is full */ 446 /* presume the area is full */
443 *full = 1; 447 *full = 1;
444 448
445 for (i = 0; i < ps->exceptions_per_area; i++) { 449 for (i = 0; i < ps->exceptions_per_area; i++) {
446 read_exception(ps, i, &de); 450 read_exception(ps, i, &e);
447 451
448 /* 452 /*
449 * If the new_chunk is pointing at the start of 453 * If the new_chunk is pointing at the start of
@@ -451,7 +455,7 @@ static int insert_exceptions(struct pstore *ps,
451 * is we know that we've hit the end of the 455 * is we know that we've hit the end of the
452 * exceptions. Therefore the area is not full. 456 * exceptions. Therefore the area is not full.
453 */ 457 */
454 if (de.new_chunk == 0LL) { 458 if (e.new_chunk == 0LL) {
455 ps->current_committed = i; 459 ps->current_committed = i;
456 *full = 0; 460 *full = 0;
457 break; 461 break;
@@ -460,13 +464,13 @@ static int insert_exceptions(struct pstore *ps,
460 /* 464 /*
461 * Keep track of the start of the free chunks. 465 * Keep track of the start of the free chunks.
462 */ 466 */
463 if (ps->next_free <= de.new_chunk) 467 if (ps->next_free <= e.new_chunk)
464 ps->next_free = de.new_chunk + 1; 468 ps->next_free = e.new_chunk + 1;
465 469
466 /* 470 /*
467 * Otherwise we add the exception to the snapshot. 471 * Otherwise we add the exception to the snapshot.
468 */ 472 */
469 r = callback(callback_context, de.old_chunk, de.new_chunk); 473 r = callback(callback_context, e.old_chunk, e.new_chunk);
470 if (r) 474 if (r)
471 return r; 475 return r;
472 } 476 }
@@ -563,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
563 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / 567 ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
564 sizeof(struct disk_exception); 568 sizeof(struct disk_exception);
565 ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 569 ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
566 sizeof(*ps->callbacks)); 570 sizeof(*ps->callbacks));
567 if (!ps->callbacks) 571 if (!ps->callbacks)
568 return -ENOMEM; 572 return -ENOMEM;
569 573
@@ -641,12 +645,12 @@ static void persistent_commit_exception(struct dm_exception_store *store,
641{ 645{
642 unsigned int i; 646 unsigned int i;
643 struct pstore *ps = get_info(store); 647 struct pstore *ps = get_info(store);
644 struct disk_exception de; 648 struct core_exception ce;
645 struct commit_callback *cb; 649 struct commit_callback *cb;
646 650
647 de.old_chunk = e->old_chunk; 651 ce.old_chunk = e->old_chunk;
648 de.new_chunk = e->new_chunk; 652 ce.new_chunk = e->new_chunk;
649 write_exception(ps, ps->current_committed++, &de); 653 write_exception(ps, ps->current_committed++, &ce);
650 654
651 /* 655 /*
652 * Add the callback to the back of the array. This code 656 * Add the callback to the back of the array. This code
@@ -670,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
670 * If we completely filled the current area, then wipe the next one. 674 * If we completely filled the current area, then wipe the next one.
671 */ 675 */
672 if ((ps->current_committed == ps->exceptions_per_area) && 676 if ((ps->current_committed == ps->exceptions_per_area) &&
673 zero_disk_area(ps, ps->current_area + 1)) 677 zero_disk_area(ps, ps->current_area + 1))
674 ps->valid = 0; 678 ps->valid = 0;
675 679
676 /* 680 /*
@@ -701,7 +705,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
701 chunk_t *last_new_chunk) 705 chunk_t *last_new_chunk)
702{ 706{
703 struct pstore *ps = get_info(store); 707 struct pstore *ps = get_info(store);
704 struct disk_exception de; 708 struct core_exception ce;
705 int nr_consecutive; 709 int nr_consecutive;
706 int r; 710 int r;
707 711
@@ -722,9 +726,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
722 ps->current_committed = ps->exceptions_per_area; 726 ps->current_committed = ps->exceptions_per_area;
723 } 727 }
724 728
725 read_exception(ps, ps->current_committed - 1, &de); 729 read_exception(ps, ps->current_committed - 1, &ce);
726 *last_old_chunk = de.old_chunk; 730 *last_old_chunk = ce.old_chunk;
727 *last_new_chunk = de.new_chunk; 731 *last_new_chunk = ce.new_chunk;
728 732
729 /* 733 /*
730 * Find number of consecutive chunks within the current area, 734 * Find number of consecutive chunks within the current area,
@@ -733,9 +737,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
733 for (nr_consecutive = 1; nr_consecutive < ps->current_committed; 737 for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
734 nr_consecutive++) { 738 nr_consecutive++) {
735 read_exception(ps, ps->current_committed - 1 - nr_consecutive, 739 read_exception(ps, ps->current_committed - 1 - nr_consecutive,
736 &de); 740 &ce);
737 if (de.old_chunk != *last_old_chunk - nr_consecutive || 741 if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
738 de.new_chunk != *last_new_chunk - nr_consecutive) 742 ce.new_chunk != *last_new_chunk - nr_consecutive)
739 break; 743 break;
740 } 744 }
741 745
@@ -753,7 +757,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
753 for (i = 0; i < nr_merged; i++) 757 for (i = 0; i < nr_merged; i++)
754 clear_exception(ps, ps->current_committed - 1 - i); 758 clear_exception(ps, ps->current_committed - 1 - i);
755 759
756 r = area_io(ps, WRITE); 760 r = area_io(ps, WRITE_FLUSH_FUA);
757 if (r < 0) 761 if (r < 0)
758 return r; 762 return r;
759 763
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9ecff5f3023..6f758870fc1 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -30,16 +30,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
30 ((ti)->type->name == dm_snapshot_merge_target_name) 30 ((ti)->type->name == dm_snapshot_merge_target_name)
31 31
32/* 32/*
33 * The percentage increment we will wake up users at
34 */
35#define WAKE_UP_PERCENT 5
36
37/*
38 * kcopyd priority of snapshot operations
39 */
40#define SNAPSHOT_COPY_PRIORITY 2
41
42/*
43 * The size of the mempool used to track chunks in use. 33 * The size of the mempool used to track chunks in use.
44 */ 34 */
45#define MIN_IOS 256 35#define MIN_IOS 256
@@ -180,6 +170,13 @@ struct dm_snap_pending_exception {
180 * kcopyd. 170 * kcopyd.
181 */ 171 */
182 int started; 172 int started;
173
174 /*
175 * For writing a complete chunk, bypassing the copy.
176 */
177 struct bio *full_bio;
178 bio_end_io_t *full_bio_end_io;
179 void *full_bio_private;
183}; 180};
184 181
185/* 182/*
@@ -1055,8 +1052,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1055 1052
1056 s = kmalloc(sizeof(*s), GFP_KERNEL); 1053 s = kmalloc(sizeof(*s), GFP_KERNEL);
1057 if (!s) { 1054 if (!s) {
1058 ti->error = "Cannot allocate snapshot context private " 1055 ti->error = "Cannot allocate private snapshot structure";
1059 "structure";
1060 r = -ENOMEM; 1056 r = -ENOMEM;
1061 goto bad; 1057 goto bad;
1062 } 1058 }
@@ -1380,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
1380 struct dm_snapshot *s = pe->snap; 1376 struct dm_snapshot *s = pe->snap;
1381 struct bio *origin_bios = NULL; 1377 struct bio *origin_bios = NULL;
1382 struct bio *snapshot_bios = NULL; 1378 struct bio *snapshot_bios = NULL;
1379 struct bio *full_bio = NULL;
1383 int error = 0; 1380 int error = 0;
1384 1381
1385 if (!success) { 1382 if (!success) {
@@ -1415,10 +1412,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
1415 */ 1412 */
1416 dm_insert_exception(&s->complete, e); 1413 dm_insert_exception(&s->complete, e);
1417 1414
1418 out: 1415out:
1419 dm_remove_exception(&pe->e); 1416 dm_remove_exception(&pe->e);
1420 snapshot_bios = bio_list_get(&pe->snapshot_bios); 1417 snapshot_bios = bio_list_get(&pe->snapshot_bios);
1421 origin_bios = bio_list_get(&pe->origin_bios); 1418 origin_bios = bio_list_get(&pe->origin_bios);
1419 full_bio = pe->full_bio;
1420 if (full_bio) {
1421 full_bio->bi_end_io = pe->full_bio_end_io;
1422 full_bio->bi_private = pe->full_bio_private;
1423 }
1422 free_pending_exception(pe); 1424 free_pending_exception(pe);
1423 1425
1424 increment_pending_exceptions_done_count(); 1426 increment_pending_exceptions_done_count();
@@ -1426,10 +1428,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
1426 up_write(&s->lock); 1428 up_write(&s->lock);
1427 1429
1428 /* Submit any pending write bios */ 1430 /* Submit any pending write bios */
1429 if (error) 1431 if (error) {
1432 if (full_bio)
1433 bio_io_error(full_bio);
1430 error_bios(snapshot_bios); 1434 error_bios(snapshot_bios);
1431 else 1435 } else {
1436 if (full_bio)
1437 bio_endio(full_bio, 0);
1432 flush_bios(snapshot_bios); 1438 flush_bios(snapshot_bios);
1439 }
1433 1440
1434 retry_origin_bios(s, origin_bios); 1441 retry_origin_bios(s, origin_bios);
1435} 1442}
@@ -1480,8 +1487,33 @@ static void start_copy(struct dm_snap_pending_exception *pe)
1480 dest.count = src.count; 1487 dest.count = src.count;
1481 1488
1482 /* Hand over to kcopyd */ 1489 /* Hand over to kcopyd */
1483 dm_kcopyd_copy(s->kcopyd_client, 1490 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
1484 &src, 1, &dest, 0, copy_callback, pe); 1491}
1492
1493static void full_bio_end_io(struct bio *bio, int error)
1494{
1495 void *callback_data = bio->bi_private;
1496
1497 dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0);
1498}
1499
1500static void start_full_bio(struct dm_snap_pending_exception *pe,
1501 struct bio *bio)
1502{
1503 struct dm_snapshot *s = pe->snap;
1504 void *callback_data;
1505
1506 pe->full_bio = bio;
1507 pe->full_bio_end_io = bio->bi_end_io;
1508 pe->full_bio_private = bio->bi_private;
1509
1510 callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
1511 copy_callback, pe);
1512
1513 bio->bi_end_io = full_bio_end_io;
1514 bio->bi_private = callback_data;
1515
1516 generic_make_request(bio);
1485} 1517}
1486 1518
1487static struct dm_snap_pending_exception * 1519static struct dm_snap_pending_exception *
@@ -1519,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s,
1519 bio_list_init(&pe->origin_bios); 1551 bio_list_init(&pe->origin_bios);
1520 bio_list_init(&pe->snapshot_bios); 1552 bio_list_init(&pe->snapshot_bios);
1521 pe->started = 0; 1553 pe->started = 0;
1554 pe->full_bio = NULL;
1522 1555
1523 if (s->store->type->prepare_exception(s->store, &pe->e)) { 1556 if (s->store->type->prepare_exception(s->store, &pe->e)) {
1524 free_pending_exception(pe); 1557 free_pending_exception(pe);
@@ -1612,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1612 } 1645 }
1613 1646
1614 remap_exception(s, &pe->e, bio, chunk); 1647 remap_exception(s, &pe->e, bio, chunk);
1615 bio_list_add(&pe->snapshot_bios, bio);
1616 1648
1617 r = DM_MAPIO_SUBMITTED; 1649 r = DM_MAPIO_SUBMITTED;
1618 1650
1651 if (!pe->started &&
1652 bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
1653 pe->started = 1;
1654 up_write(&s->lock);
1655 start_full_bio(pe, bio);
1656 goto out;
1657 }
1658
1659 bio_list_add(&pe->snapshot_bios, bio);
1660
1619 if (!pe->started) { 1661 if (!pe->started) {
1620 /* this is protected by snap->lock */ 1662 /* this is protected by snap->lock */
1621 pe->started = 1; 1663 pe->started = 1;
@@ -1628,9 +1670,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
1628 map_context->ptr = track_chunk(s, chunk); 1670 map_context->ptr = track_chunk(s, chunk);
1629 } 1671 }
1630 1672
1631 out_unlock: 1673out_unlock:
1632 up_write(&s->lock); 1674 up_write(&s->lock);
1633 out: 1675out:
1634 return r; 1676 return r;
1635} 1677}
1636 1678
@@ -1974,7 +2016,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
1974 pe_to_start_now = pe; 2016 pe_to_start_now = pe;
1975 } 2017 }
1976 2018
1977 next_snapshot: 2019next_snapshot:
1978 up_write(&snap->lock); 2020 up_write(&snap->lock);
1979 2021
1980 if (pe_to_start_now) { 2022 if (pe_to_start_now) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bfe9c2333ce..986b8754bb0 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -54,7 +54,6 @@ struct dm_table {
54 sector_t *highs; 54 sector_t *highs;
55 struct dm_target *targets; 55 struct dm_target *targets;
56 56
57 unsigned discards_supported:1;
58 unsigned integrity_supported:1; 57 unsigned integrity_supported:1;
59 58
60 /* 59 /*
@@ -154,12 +153,11 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
154 return NULL; 153 return NULL;
155 154
156 size = nmemb * elem_size; 155 size = nmemb * elem_size;
157 addr = vmalloc(size); 156 addr = vzalloc(size);
158 if (addr)
159 memset(addr, 0, size);
160 157
161 return addr; 158 return addr;
162} 159}
160EXPORT_SYMBOL(dm_vcalloc);
163 161
164/* 162/*
165 * highs, and targets are managed as dynamic arrays during a 163 * highs, and targets are managed as dynamic arrays during a
@@ -209,7 +207,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
209 INIT_LIST_HEAD(&t->devices); 207 INIT_LIST_HEAD(&t->devices);
210 INIT_LIST_HEAD(&t->target_callbacks); 208 INIT_LIST_HEAD(&t->target_callbacks);
211 atomic_set(&t->holders, 0); 209 atomic_set(&t->holders, 0);
212 t->discards_supported = 1;
213 210
214 if (!num_targets) 211 if (!num_targets)
215 num_targets = KEYS_PER_NODE; 212 num_targets = KEYS_PER_NODE;
@@ -281,6 +278,7 @@ void dm_table_get(struct dm_table *t)
281{ 278{
282 atomic_inc(&t->holders); 279 atomic_inc(&t->holders);
283} 280}
281EXPORT_SYMBOL(dm_table_get);
284 282
285void dm_table_put(struct dm_table *t) 283void dm_table_put(struct dm_table *t)
286{ 284{
@@ -290,6 +288,7 @@ void dm_table_put(struct dm_table *t)
290 smp_mb__before_atomic_dec(); 288 smp_mb__before_atomic_dec();
291 atomic_dec(&t->holders); 289 atomic_dec(&t->holders);
292} 290}
291EXPORT_SYMBOL(dm_table_put);
293 292
294/* 293/*
295 * Checks to see if we need to extend highs or targets. 294 * Checks to see if we need to extend highs or targets.
@@ -455,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
455 * Add a device to the list, or just increment the usage count if 454 * Add a device to the list, or just increment the usage count if
456 * it's already present. 455 * it's already present.
457 */ 456 */
458static int __table_get_device(struct dm_table *t, struct dm_target *ti, 457int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
459 const char *path, fmode_t mode, struct dm_dev **result) 458 struct dm_dev **result)
460{ 459{
461 int r; 460 int r;
462 dev_t uninitialized_var(dev); 461 dev_t uninitialized_var(dev);
463 struct dm_dev_internal *dd; 462 struct dm_dev_internal *dd;
464 unsigned int major, minor; 463 unsigned int major, minor;
464 struct dm_table *t = ti->table;
465 465
466 BUG_ON(!t); 466 BUG_ON(!t);
467 467
@@ -509,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
509 *result = &dd->dm_dev; 509 *result = &dd->dm_dev;
510 return 0; 510 return 0;
511} 511}
512EXPORT_SYMBOL(dm_get_device);
512 513
513int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 514int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
514 sector_t start, sector_t len, void *data) 515 sector_t start, sector_t len, void *data)
@@ -539,23 +540,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
539 * If not we'll force DM to use PAGE_SIZE or 540 * If not we'll force DM to use PAGE_SIZE or
540 * smaller I/O, just to be safe. 541 * smaller I/O, just to be safe.
541 */ 542 */
542 543 if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
543 if (q->merge_bvec_fn && !ti->type->merge)
544 blk_limits_max_hw_sectors(limits, 544 blk_limits_max_hw_sectors(limits,
545 (unsigned int) (PAGE_SIZE >> 9)); 545 (unsigned int) (PAGE_SIZE >> 9));
546 return 0; 546 return 0;
547} 547}
548EXPORT_SYMBOL_GPL(dm_set_device_limits); 548EXPORT_SYMBOL_GPL(dm_set_device_limits);
549 549
550int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
551 struct dm_dev **result)
552{
553 return __table_get_device(ti->table, ti, path, mode, result);
554}
555
556
557/* 550/*
558 * Decrement a devices use count and remove it if necessary. 551 * Decrement a device's use count and remove it if necessary.
559 */ 552 */
560void dm_put_device(struct dm_target *ti, struct dm_dev *d) 553void dm_put_device(struct dm_target *ti, struct dm_dev *d)
561{ 554{
@@ -568,6 +561,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
568 kfree(dd); 561 kfree(dd);
569 } 562 }
570} 563}
564EXPORT_SYMBOL(dm_put_device);
571 565
572/* 566/*
573 * Checks to see if the target joins onto the end of the table. 567 * Checks to see if the target joins onto the end of the table.
@@ -791,8 +785,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
791 785
792 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 786 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
793 787
794 if (!tgt->num_discard_requests) 788 if (!tgt->num_discard_requests && tgt->discards_supported)
795 t->discards_supported = 0; 789 DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
790 dm_device_name(t->md), type);
796 791
797 return 0; 792 return 0;
798 793
@@ -802,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type,
802 return r; 797 return r;
803} 798}
804 799
800/*
801 * Target argument parsing helpers.
802 */
803static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
804 unsigned *value, char **error, unsigned grouped)
805{
806 const char *arg_str = dm_shift_arg(arg_set);
807
808 if (!arg_str ||
809 (sscanf(arg_str, "%u", value) != 1) ||
810 (*value < arg->min) ||
811 (*value > arg->max) ||
812 (grouped && arg_set->argc < *value)) {
813 *error = arg->error;
814 return -EINVAL;
815 }
816
817 return 0;
818}
819
820int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
821 unsigned *value, char **error)
822{
823 return validate_next_arg(arg, arg_set, value, error, 0);
824}
825EXPORT_SYMBOL(dm_read_arg);
826
827int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
828 unsigned *value, char **error)
829{
830 return validate_next_arg(arg, arg_set, value, error, 1);
831}
832EXPORT_SYMBOL(dm_read_arg_group);
833
834const char *dm_shift_arg(struct dm_arg_set *as)
835{
836 char *r;
837
838 if (as->argc) {
839 as->argc--;
840 r = *as->argv;
841 as->argv++;
842 return r;
843 }
844
845 return NULL;
846}
847EXPORT_SYMBOL(dm_shift_arg);
848
849void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
850{
851 BUG_ON(as->argc < num_args);
852 as->argc -= num_args;
853 as->argv += num_args;
854}
855EXPORT_SYMBOL(dm_consume_args);
856
805static int dm_table_set_type(struct dm_table *t) 857static int dm_table_set_type(struct dm_table *t)
806{ 858{
807 unsigned i; 859 unsigned i;
@@ -1077,11 +1129,13 @@ void dm_table_event(struct dm_table *t)
1077 t->event_fn(t->event_context); 1129 t->event_fn(t->event_context);
1078 mutex_unlock(&_event_lock); 1130 mutex_unlock(&_event_lock);
1079} 1131}
1132EXPORT_SYMBOL(dm_table_event);
1080 1133
1081sector_t dm_table_get_size(struct dm_table *t) 1134sector_t dm_table_get_size(struct dm_table *t)
1082{ 1135{
1083 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; 1136 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
1084} 1137}
1138EXPORT_SYMBOL(dm_table_get_size);
1085 1139
1086struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) 1140struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
1087{ 1141{
@@ -1194,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t)
1194 blk_get_integrity(template_disk)); 1248 blk_get_integrity(template_disk));
1195} 1249}
1196 1250
1251static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
1252 sector_t start, sector_t len, void *data)
1253{
1254 unsigned flush = (*(unsigned *)data);
1255 struct request_queue *q = bdev_get_queue(dev->bdev);
1256
1257 return q && (q->flush_flags & flush);
1258}
1259
1260static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
1261{
1262 struct dm_target *ti;
1263 unsigned i = 0;
1264
1265 /*
1266 * Require at least one underlying device to support flushes.
1267 * t->devices includes internal dm devices such as mirror logs
1268 * so we need to use iterate_devices here, which targets
1269 * supporting flushes must provide.
1270 */
1271 while (i < dm_table_get_num_targets(t)) {
1272 ti = dm_table_get_target(t, i++);
1273
1274 if (!ti->num_flush_requests)
1275 continue;
1276
1277 if (ti->type->iterate_devices &&
1278 ti->type->iterate_devices(ti, device_flush_capable, &flush))
1279 return 1;
1280 }
1281
1282 return 0;
1283}
1284
1197void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 1285void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1198 struct queue_limits *limits) 1286 struct queue_limits *limits)
1199{ 1287{
1288 unsigned flush = 0;
1289
1200 /* 1290 /*
1201 * Copy table's limits to the DM device's request_queue 1291 * Copy table's limits to the DM device's request_queue
1202 */ 1292 */
@@ -1207,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1207 else 1297 else
1208 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 1298 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1209 1299
1300 if (dm_table_supports_flush(t, REQ_FLUSH)) {
1301 flush |= REQ_FLUSH;
1302 if (dm_table_supports_flush(t, REQ_FUA))
1303 flush |= REQ_FUA;
1304 }
1305 blk_queue_flush(q, flush);
1306
1210 dm_table_set_integrity(t); 1307 dm_table_set_integrity(t);
1211 1308
1212 /* 1309 /*
@@ -1237,6 +1334,7 @@ fmode_t dm_table_get_mode(struct dm_table *t)
1237{ 1334{
1238 return t->mode; 1335 return t->mode;
1239} 1336}
1337EXPORT_SYMBOL(dm_table_get_mode);
1240 1338
1241static void suspend_targets(struct dm_table *t, unsigned postsuspend) 1339static void suspend_targets(struct dm_table *t, unsigned postsuspend)
1242{ 1340{
@@ -1345,6 +1443,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
1345{ 1443{
1346 return t->md; 1444 return t->md;
1347} 1445}
1446EXPORT_SYMBOL(dm_table_get_md);
1348 1447
1349static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, 1448static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
1350 sector_t start, sector_t len, void *data) 1449 sector_t start, sector_t len, void *data)
@@ -1359,19 +1458,19 @@ bool dm_table_supports_discards(struct dm_table *t)
1359 struct dm_target *ti; 1458 struct dm_target *ti;
1360 unsigned i = 0; 1459 unsigned i = 0;
1361 1460
1362 if (!t->discards_supported)
1363 return 0;
1364
1365 /* 1461 /*
1366 * Unless any target used by the table set discards_supported, 1462 * Unless any target used by the table set discards_supported,
1367 * require at least one underlying device to support discards. 1463 * require at least one underlying device to support discards.
1368 * t->devices includes internal dm devices such as mirror logs 1464 * t->devices includes internal dm devices such as mirror logs
1369 * so we need to use iterate_devices here, which targets 1465 * so we need to use iterate_devices here, which targets
1370 * supporting discard must provide. 1466 * supporting discard selectively must provide.
1371 */ 1467 */
1372 while (i < dm_table_get_num_targets(t)) { 1468 while (i < dm_table_get_num_targets(t)) {
1373 ti = dm_table_get_target(t, i++); 1469 ti = dm_table_get_target(t, i++);
1374 1470
1471 if (!ti->num_discard_requests)
1472 continue;
1473
1375 if (ti->discards_supported) 1474 if (ti->discards_supported)
1376 return 1; 1475 return 1;
1377 1476
@@ -1382,13 +1481,3 @@ bool dm_table_supports_discards(struct dm_table *t)
1382 1481
1383 return 0; 1482 return 0;
1384} 1483}
1385
1386EXPORT_SYMBOL(dm_vcalloc);
1387EXPORT_SYMBOL(dm_get_device);
1388EXPORT_SYMBOL(dm_put_device);
1389EXPORT_SYMBOL(dm_table_event);
1390EXPORT_SYMBOL(dm_table_get_size);
1391EXPORT_SYMBOL(dm_table_get_mode);
1392EXPORT_SYMBOL(dm_table_get_md);
1393EXPORT_SYMBOL(dm_table_put);
1394EXPORT_SYMBOL(dm_table_get);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0cf68b47887..52b39f335bb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -37,6 +37,8 @@ static const char *_name = DM_NAME;
37static unsigned int major = 0; 37static unsigned int major = 0;
38static unsigned int _major = 0; 38static unsigned int _major = 0;
39 39
40static DEFINE_IDR(_minor_idr);
41
40static DEFINE_SPINLOCK(_minor_lock); 42static DEFINE_SPINLOCK(_minor_lock);
41/* 43/*
42 * For bio-based dm. 44 * For bio-based dm.
@@ -109,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
109#define DMF_FREEING 3 111#define DMF_FREEING 3
110#define DMF_DELETING 4 112#define DMF_DELETING 4
111#define DMF_NOFLUSH_SUSPENDING 5 113#define DMF_NOFLUSH_SUSPENDING 5
114#define DMF_MERGE_IS_OPTIONAL 6
112 115
113/* 116/*
114 * Work processed by per-device workqueue. 117 * Work processed by per-device workqueue.
@@ -313,6 +316,12 @@ static void __exit dm_exit(void)
313 316
314 while (i--) 317 while (i--)
315 _exits[i](); 318 _exits[i]();
319
320 /*
321 * Should be empty by this point.
322 */
323 idr_remove_all(&_minor_idr);
324 idr_destroy(&_minor_idr);
316} 325}
317 326
318/* 327/*
@@ -1171,7 +1180,8 @@ static int __clone_and_map_discard(struct clone_info *ci)
1171 1180
1172 /* 1181 /*
1173 * Even though the device advertised discard support, 1182 * Even though the device advertised discard support,
1174 * reconfiguration might have changed that since the 1183 * that does not mean every target supports it, and
1184 * reconfiguration might also have changed that since the
1175 * check was performed. 1185 * check was performed.
1176 */ 1186 */
1177 if (!ti->num_discard_requests) 1187 if (!ti->num_discard_requests)
@@ -1705,8 +1715,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
1705/*----------------------------------------------------------------- 1715/*-----------------------------------------------------------------
1706 * An IDR is used to keep track of allocated minor numbers. 1716 * An IDR is used to keep track of allocated minor numbers.
1707 *---------------------------------------------------------------*/ 1717 *---------------------------------------------------------------*/
1708static DEFINE_IDR(_minor_idr);
1709
1710static void free_minor(int minor) 1718static void free_minor(int minor)
1711{ 1719{
1712 spin_lock(&_minor_lock); 1720 spin_lock(&_minor_lock);
@@ -1800,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md)
1800 blk_queue_make_request(md->queue, dm_request); 1808 blk_queue_make_request(md->queue, dm_request);
1801 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1809 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1802 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1810 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
1803 blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
1804} 1811}
1805 1812
1806/* 1813/*
@@ -1986,6 +1993,59 @@ static void __set_size(struct mapped_device *md, sector_t size)
1986} 1993}
1987 1994
1988/* 1995/*
1996 * Return 1 if the queue has a compulsory merge_bvec_fn function.
1997 *
1998 * If this function returns 0, then the device is either a non-dm
1999 * device without a merge_bvec_fn, or it is a dm device that is
2000 * able to split any bios it receives that are too big.
2001 */
2002int dm_queue_merge_is_compulsory(struct request_queue *q)
2003{
2004 struct mapped_device *dev_md;
2005
2006 if (!q->merge_bvec_fn)
2007 return 0;
2008
2009 if (q->make_request_fn == dm_request) {
2010 dev_md = q->queuedata;
2011 if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
2012 return 0;
2013 }
2014
2015 return 1;
2016}
2017
2018static int dm_device_merge_is_compulsory(struct dm_target *ti,
2019 struct dm_dev *dev, sector_t start,
2020 sector_t len, void *data)
2021{
2022 struct block_device *bdev = dev->bdev;
2023 struct request_queue *q = bdev_get_queue(bdev);
2024
2025 return dm_queue_merge_is_compulsory(q);
2026}
2027
2028/*
2029 * Return 1 if it is acceptable to ignore merge_bvec_fn based
2030 * on the properties of the underlying devices.
2031 */
2032static int dm_table_merge_is_optional(struct dm_table *table)
2033{
2034 unsigned i = 0;
2035 struct dm_target *ti;
2036
2037 while (i < dm_table_get_num_targets(table)) {
2038 ti = dm_table_get_target(table, i++);
2039
2040 if (ti->type->iterate_devices &&
2041 ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
2042 return 0;
2043 }
2044
2045 return 1;
2046}
2047
2048/*
1989 * Returns old map, which caller must destroy. 2049 * Returns old map, which caller must destroy.
1990 */ 2050 */
1991static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 2051static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
@@ -1995,6 +2055,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
1995 struct request_queue *q = md->queue; 2055 struct request_queue *q = md->queue;
1996 sector_t size; 2056 sector_t size;
1997 unsigned long flags; 2057 unsigned long flags;
2058 int merge_is_optional;
1998 2059
1999 size = dm_table_get_size(t); 2060 size = dm_table_get_size(t);
2000 2061
@@ -2020,10 +2081,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2020 2081
2021 __bind_mempools(md, t); 2082 __bind_mempools(md, t);
2022 2083
2084 merge_is_optional = dm_table_merge_is_optional(t);
2085
2023 write_lock_irqsave(&md->map_lock, flags); 2086 write_lock_irqsave(&md->map_lock, flags);
2024 old_map = md->map; 2087 old_map = md->map;
2025 md->map = t; 2088 md->map = t;
2026 dm_table_set_restrictions(t, q, limits); 2089 dm_table_set_restrictions(t, q, limits);
2090 if (merge_is_optional)
2091 set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2092 else
2093 clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2027 write_unlock_irqrestore(&md->map_lock, flags); 2094 write_unlock_irqrestore(&md->map_lock, flags);
2028 2095
2029 return old_map; 2096 return old_map;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 1aaf16746da..6745dbd278a 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t);
66void dm_table_free_md_mempools(struct dm_table *t); 66void dm_table_free_md_mempools(struct dm_table *t);
67struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); 67struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
68 68
69int dm_queue_merge_is_compulsory(struct request_queue *q);
70
69void dm_lock_md_type(struct mapped_device *md); 71void dm_lock_md_type(struct mapped_device *md);
70void dm_unlock_md_type(struct mapped_device *md); 72void dm_unlock_md_type(struct mapped_device *md);
71void dm_set_md_type(struct mapped_device *md, unsigned type); 73void dm_set_md_type(struct mapped_device *md, unsigned type);
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 5b0dba6d4ef..d724a18b528 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -1989,14 +1989,20 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
1989 return -EINVAL; 1989 return -EINVAL;
1990 } 1990 }
1991 1991
1992 /*
1993 * It's important to set the bp->state to the value different from
1994 * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int()
1995 * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
1996 */
1997 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
1998 smp_mb();
1999
1992 /* Stop Tx */ 2000 /* Stop Tx */
1993 bnx2x_tx_disable(bp); 2001 bnx2x_tx_disable(bp);
1994 2002
1995#ifdef BCM_CNIC 2003#ifdef BCM_CNIC
1996 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); 2004 bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
1997#endif 2005#endif
1998 bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
1999 smp_mb();
2000 2006
2001 bp->rx_mode = BNX2X_RX_MODE_NONE; 2007 bp->rx_mode = BNX2X_RX_MODE_NONE;
2002 2008
diff --git a/drivers/net/bnx2x/bnx2x_hsi.h b/drivers/net/bnx2x/bnx2x_hsi.h
index 06727f32e50..dc24de40e33 100644
--- a/drivers/net/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/bnx2x/bnx2x_hsi.h
@@ -1204,6 +1204,8 @@ struct drv_port_mb {
1204 1204
1205 #define LINK_STATUS_PFC_ENABLED 0x20000000 1205 #define LINK_STATUS_PFC_ENABLED 0x20000000
1206 1206
1207 #define LINK_STATUS_PHYSICAL_LINK_FLAG 0x40000000
1208
1207 u32 port_stx; 1209 u32 port_stx;
1208 1210
1209 u32 stat_nig_timer; 1211 u32 stat_nig_timer;
diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c
index bcd8f003862..d45b1555a60 100644
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -1546,6 +1546,12 @@ static void bnx2x_umac_enable(struct link_params *params,
1546 vars->line_speed); 1546 vars->line_speed);
1547 break; 1547 break;
1548 } 1548 }
1549 if (!(vars->flow_ctrl & BNX2X_FLOW_CTRL_TX))
1550 val |= UMAC_COMMAND_CONFIG_REG_IGNORE_TX_PAUSE;
1551
1552 if (!(vars->flow_ctrl & BNX2X_FLOW_CTRL_RX))
1553 val |= UMAC_COMMAND_CONFIG_REG_PAUSE_IGNORE;
1554
1549 REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val); 1555 REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val);
1550 udelay(50); 1556 udelay(50);
1551 1557
@@ -1661,10 +1667,20 @@ static void bnx2x_xmac_disable(struct link_params *params)
1661{ 1667{
1662 u8 port = params->port; 1668 u8 port = params->port;
1663 struct bnx2x *bp = params->bp; 1669 struct bnx2x *bp = params->bp;
1664 u32 xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; 1670 u32 pfc_ctrl, xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
1665 1671
1666 if (REG_RD(bp, MISC_REG_RESET_REG_2) & 1672 if (REG_RD(bp, MISC_REG_RESET_REG_2) &
1667 MISC_REGISTERS_RESET_REG_2_XMAC) { 1673 MISC_REGISTERS_RESET_REG_2_XMAC) {
1674 /*
1675 * Send an indication to change the state in the NIG back to XON
1676 * Clearing this bit enables the next set of this bit to get
1677 * rising edge
1678 */
1679 pfc_ctrl = REG_RD(bp, xmac_base + XMAC_REG_PFC_CTRL_HI);
1680 REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI,
1681 (pfc_ctrl & ~(1<<1)));
1682 REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI,
1683 (pfc_ctrl | (1<<1)));
1668 DP(NETIF_MSG_LINK, "Disable XMAC on port %x\n", port); 1684 DP(NETIF_MSG_LINK, "Disable XMAC on port %x\n", port);
1669 REG_WR(bp, xmac_base + XMAC_REG_CTRL, 0); 1685 REG_WR(bp, xmac_base + XMAC_REG_CTRL, 0);
1670 usleep_range(1000, 1000); 1686 usleep_range(1000, 1000);
@@ -1729,6 +1745,10 @@ static int bnx2x_emac_enable(struct link_params *params,
1729 1745
1730 DP(NETIF_MSG_LINK, "enabling EMAC\n"); 1746 DP(NETIF_MSG_LINK, "enabling EMAC\n");
1731 1747
1748 /* Disable BMAC */
1749 REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
1750 (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
1751
1732 /* enable emac and not bmac */ 1752 /* enable emac and not bmac */
1733 REG_WR(bp, NIG_REG_EGRESS_EMAC0_PORT + port*4, 1); 1753 REG_WR(bp, NIG_REG_EGRESS_EMAC0_PORT + port*4, 1);
1734 1754
@@ -2583,12 +2603,6 @@ static int bnx2x_bmac1_enable(struct link_params *params,
2583 REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS, 2603 REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS,
2584 wb_data, 2); 2604 wb_data, 2);
2585 2605
2586 if (vars->phy_flags & PHY_TX_ERROR_CHECK_FLAG) {
2587 REG_RD_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_LSS_STATUS,
2588 wb_data, 2);
2589 if (wb_data[0] > 0)
2590 return -ESRCH;
2591 }
2592 return 0; 2606 return 0;
2593} 2607}
2594 2608
@@ -2654,16 +2668,6 @@ static int bnx2x_bmac2_enable(struct link_params *params,
2654 udelay(30); 2668 udelay(30);
2655 bnx2x_update_pfc_bmac2(params, vars, is_lb); 2669 bnx2x_update_pfc_bmac2(params, vars, is_lb);
2656 2670
2657 if (vars->phy_flags & PHY_TX_ERROR_CHECK_FLAG) {
2658 REG_RD_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_RX_LSS_STAT,
2659 wb_data, 2);
2660 if (wb_data[0] > 0) {
2661 DP(NETIF_MSG_LINK, "Got bad LSS status 0x%x\n",
2662 wb_data[0]);
2663 return -ESRCH;
2664 }
2665 }
2666
2667 return 0; 2671 return 0;
2668} 2672}
2669 2673
@@ -2949,7 +2953,9 @@ static int bnx2x_cl45_read(struct bnx2x *bp, struct bnx2x_phy *phy,
2949 u32 val; 2953 u32 val;
2950 u16 i; 2954 u16 i;
2951 int rc = 0; 2955 int rc = 0;
2952 2956 if (phy->flags & FLAGS_MDC_MDIO_WA_B0)
2957 bnx2x_bits_en(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS,
2958 EMAC_MDIO_STATUS_10MB);
2953 /* address */ 2959 /* address */
2954 val = ((phy->addr << 21) | (devad << 16) | reg | 2960 val = ((phy->addr << 21) | (devad << 16) | reg |
2955 EMAC_MDIO_COMM_COMMAND_ADDRESS | 2961 EMAC_MDIO_COMM_COMMAND_ADDRESS |
@@ -3003,6 +3009,9 @@ static int bnx2x_cl45_read(struct bnx2x *bp, struct bnx2x_phy *phy,
3003 } 3009 }
3004 } 3010 }
3005 3011
3012 if (phy->flags & FLAGS_MDC_MDIO_WA_B0)
3013 bnx2x_bits_dis(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS,
3014 EMAC_MDIO_STATUS_10MB);
3006 return rc; 3015 return rc;
3007} 3016}
3008 3017
@@ -3012,6 +3021,9 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy,
3012 u32 tmp; 3021 u32 tmp;
3013 u8 i; 3022 u8 i;
3014 int rc = 0; 3023 int rc = 0;
3024 if (phy->flags & FLAGS_MDC_MDIO_WA_B0)
3025 bnx2x_bits_en(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS,
3026 EMAC_MDIO_STATUS_10MB);
3015 3027
3016 /* address */ 3028 /* address */
3017 3029
@@ -3065,7 +3077,9 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy,
3065 bnx2x_cl45_read(bp, phy, devad, 0xf, &temp_val); 3077 bnx2x_cl45_read(bp, phy, devad, 0xf, &temp_val);
3066 } 3078 }
3067 } 3079 }
3068 3080 if (phy->flags & FLAGS_MDC_MDIO_WA_B0)
3081 bnx2x_bits_dis(bp, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_STATUS,
3082 EMAC_MDIO_STATUS_10MB);
3069 return rc; 3083 return rc;
3070} 3084}
3071 3085
@@ -4353,6 +4367,9 @@ void bnx2x_link_status_update(struct link_params *params,
4353 4367
4354 vars->link_up = (vars->link_status & LINK_STATUS_LINK_UP); 4368 vars->link_up = (vars->link_status & LINK_STATUS_LINK_UP);
4355 vars->phy_flags = PHY_XGXS_FLAG; 4369 vars->phy_flags = PHY_XGXS_FLAG;
4370 if (vars->link_status & LINK_STATUS_PHYSICAL_LINK_FLAG)
4371 vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG;
4372
4356 if (vars->link_up) { 4373 if (vars->link_up) {
4357 DP(NETIF_MSG_LINK, "phy link up\n"); 4374 DP(NETIF_MSG_LINK, "phy link up\n");
4358 4375
@@ -4444,6 +4461,8 @@ void bnx2x_link_status_update(struct link_params *params,
4444 4461
4445 /* indicate no mac active */ 4462 /* indicate no mac active */
4446 vars->mac_type = MAC_TYPE_NONE; 4463 vars->mac_type = MAC_TYPE_NONE;
4464 if (vars->link_status & LINK_STATUS_PHYSICAL_LINK_FLAG)
4465 vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
4447 } 4466 }
4448 4467
4449 /* Sync media type */ 4468 /* Sync media type */
@@ -5903,20 +5922,30 @@ int bnx2x_set_led(struct link_params *params,
5903 tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED); 5922 tmp = EMAC_RD(bp, EMAC_REG_EMAC_LED);
5904 EMAC_WR(bp, EMAC_REG_EMAC_LED, 5923 EMAC_WR(bp, EMAC_REG_EMAC_LED,
5905 (tmp | EMAC_LED_OVERRIDE)); 5924 (tmp | EMAC_LED_OVERRIDE));
5906 return rc; 5925 /*
5926 * return here without enabling traffic
5927 * LED blink andsetting rate in ON mode.
5928 * In oper mode, enabling LED blink
5929 * and setting rate is needed.
5930 */
5931 if (mode == LED_MODE_ON)
5932 return rc;
5907 } 5933 }
5908 } else if (SINGLE_MEDIA_DIRECT(params) && 5934 } else if (SINGLE_MEDIA_DIRECT(params)) {
5909 (CHIP_IS_E1x(bp) ||
5910 CHIP_IS_E2(bp))) {
5911 /* 5935 /*
5912 * This is a work-around for HW issue found when link 5936 * This is a work-around for HW issue found when link
5913 * is up in CL73 5937 * is up in CL73
5914 */ 5938 */
5915 REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, 0);
5916 REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1); 5939 REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1);
5917 } else { 5940 if (CHIP_IS_E1x(bp) ||
5941 CHIP_IS_E2(bp) ||
5942 (mode == LED_MODE_ON))
5943 REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, 0);
5944 else
5945 REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4,
5946 hw_led_mode);
5947 } else
5918 REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, hw_led_mode); 5948 REG_WR(bp, NIG_REG_LED_MODE_P0 + port*4, hw_led_mode);
5919 }
5920 5949
5921 REG_WR(bp, NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0 + port*4, 0); 5950 REG_WR(bp, NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0 + port*4, 0);
5922 /* Set blinking rate to ~15.9Hz */ 5951 /* Set blinking rate to ~15.9Hz */
@@ -6160,6 +6189,7 @@ static int bnx2x_update_link_down(struct link_params *params,
6160 /* update shared memory */ 6189 /* update shared memory */
6161 vars->link_status &= ~(LINK_STATUS_SPEED_AND_DUPLEX_MASK | 6190 vars->link_status &= ~(LINK_STATUS_SPEED_AND_DUPLEX_MASK |
6162 LINK_STATUS_LINK_UP | 6191 LINK_STATUS_LINK_UP |
6192 LINK_STATUS_PHYSICAL_LINK_FLAG |
6163 LINK_STATUS_AUTO_NEGOTIATE_COMPLETE | 6193 LINK_STATUS_AUTO_NEGOTIATE_COMPLETE |
6164 LINK_STATUS_RX_FLOW_CONTROL_FLAG_MASK | 6194 LINK_STATUS_RX_FLOW_CONTROL_FLAG_MASK |
6165 LINK_STATUS_TX_FLOW_CONTROL_FLAG_MASK | 6195 LINK_STATUS_TX_FLOW_CONTROL_FLAG_MASK |
@@ -6197,7 +6227,8 @@ static int bnx2x_update_link_up(struct link_params *params,
6197 u8 port = params->port; 6227 u8 port = params->port;
6198 int rc = 0; 6228 int rc = 0;
6199 6229
6200 vars->link_status |= LINK_STATUS_LINK_UP; 6230 vars->link_status |= (LINK_STATUS_LINK_UP |
6231 LINK_STATUS_PHYSICAL_LINK_FLAG);
6201 vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG; 6232 vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG;
6202 6233
6203 if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) 6234 if (vars->flow_ctrl & BNX2X_FLOW_CTRL_TX)
@@ -7998,6 +8029,9 @@ static void bnx2x_warpcore_set_limiting_mode(struct link_params *params,
7998 bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, 8029 bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
7999 MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE, &val); 8030 MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE, &val);
8000 8031
8032 /* Restart microcode to re-read the new mode */
8033 bnx2x_warpcore_reset_lane(bp, phy, 1);
8034 bnx2x_warpcore_reset_lane(bp, phy, 0);
8001 8035
8002} 8036}
8003 8037
@@ -8116,7 +8150,6 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
8116 offsetof(struct shmem_region, dev_info. 8150 offsetof(struct shmem_region, dev_info.
8117 port_feature_config[params->port]. 8151 port_feature_config[params->port].
8118 config)); 8152 config));
8119
8120 bnx2x_set_gpio_int(bp, gpio_num, 8153 bnx2x_set_gpio_int(bp, gpio_num,
8121 MISC_REGISTERS_GPIO_INT_OUTPUT_SET, 8154 MISC_REGISTERS_GPIO_INT_OUTPUT_SET,
8122 gpio_port); 8155 gpio_port);
@@ -8125,8 +8158,9 @@ void bnx2x_handle_module_detect_int(struct link_params *params)
8125 * Disable transmit for this module 8158 * Disable transmit for this module
8126 */ 8159 */
8127 phy->media_type = ETH_PHY_NOT_PRESENT; 8160 phy->media_type = ETH_PHY_NOT_PRESENT;
8128 if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) == 8161 if (((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) ==
8129 PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER) 8162 PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER) ||
8163 CHIP_IS_E3(bp))
8130 bnx2x_sfp_set_transmitter(params, phy, 0); 8164 bnx2x_sfp_set_transmitter(params, phy, 0);
8131 } 8165 }
8132} 8166}
@@ -8228,9 +8262,6 @@ static u8 bnx2x_8706_config_init(struct bnx2x_phy *phy,
8228 u16 cnt, val, tmp1; 8262 u16 cnt, val, tmp1;
8229 struct bnx2x *bp = params->bp; 8263 struct bnx2x *bp = params->bp;
8230 8264
8231 /* SPF+ PHY: Set flag to check for Tx error */
8232 vars->phy_flags = PHY_TX_ERROR_CHECK_FLAG;
8233
8234 bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_2, 8265 bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_2,
8235 MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port); 8266 MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
8236 /* HW reset */ 8267 /* HW reset */
@@ -8414,9 +8445,6 @@ static int bnx2x_8726_config_init(struct bnx2x_phy *phy,
8414 struct bnx2x *bp = params->bp; 8445 struct bnx2x *bp = params->bp;
8415 DP(NETIF_MSG_LINK, "Initializing BCM8726\n"); 8446 DP(NETIF_MSG_LINK, "Initializing BCM8726\n");
8416 8447
8417 /* SPF+ PHY: Set flag to check for Tx error */
8418 vars->phy_flags = PHY_TX_ERROR_CHECK_FLAG;
8419
8420 bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1<<15); 8448 bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1<<15);
8421 bnx2x_wait_reset_complete(bp, phy, params); 8449 bnx2x_wait_reset_complete(bp, phy, params);
8422 8450
@@ -8585,9 +8613,6 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy,
8585 struct bnx2x *bp = params->bp; 8613 struct bnx2x *bp = params->bp;
8586 /* Enable PMD link, MOD_ABS_FLT, and 1G link alarm */ 8614 /* Enable PMD link, MOD_ABS_FLT, and 1G link alarm */
8587 8615
8588 /* SPF+ PHY: Set flag to check for Tx error */
8589 vars->phy_flags = PHY_TX_ERROR_CHECK_FLAG;
8590
8591 bnx2x_wait_reset_complete(bp, phy, params); 8616 bnx2x_wait_reset_complete(bp, phy, params);
8592 rx_alarm_ctrl_val = (1<<2) | (1<<5) ; 8617 rx_alarm_ctrl_val = (1<<2) | (1<<5) ;
8593 /* Should be 0x6 to enable XS on Tx side. */ 8618 /* Should be 0x6 to enable XS on Tx side. */
@@ -9243,7 +9268,13 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy,
9243 if (phy->req_duplex == DUPLEX_FULL) 9268 if (phy->req_duplex == DUPLEX_FULL)
9244 autoneg_val |= (1<<8); 9269 autoneg_val |= (1<<8);
9245 9270
9246 bnx2x_cl45_write(bp, phy, 9271 /*
9272 * Always write this if this is not 84833.
9273 * For 84833, write it only when it's a forced speed.
9274 */
9275 if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) ||
9276 ((autoneg_val & (1<<12)) == 0))
9277 bnx2x_cl45_write(bp, phy,
9247 MDIO_AN_DEVAD, 9278 MDIO_AN_DEVAD,
9248 MDIO_AN_REG_8481_LEGACY_MII_CTRL, autoneg_val); 9279 MDIO_AN_REG_8481_LEGACY_MII_CTRL, autoneg_val);
9249 9280
@@ -9257,13 +9288,12 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy,
9257 bnx2x_cl45_write(bp, phy, 9288 bnx2x_cl45_write(bp, phy,
9258 MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 9289 MDIO_AN_DEVAD, MDIO_AN_REG_CTRL,
9259 0x3200); 9290 0x3200);
9260 } else if (phy->req_line_speed != SPEED_10 && 9291 } else
9261 phy->req_line_speed != SPEED_100) {
9262 bnx2x_cl45_write(bp, phy, 9292 bnx2x_cl45_write(bp, phy,
9263 MDIO_AN_DEVAD, 9293 MDIO_AN_DEVAD,
9264 MDIO_AN_REG_8481_10GBASE_T_AN_CTRL, 9294 MDIO_AN_REG_8481_10GBASE_T_AN_CTRL,
9265 1); 9295 1);
9266 } 9296
9267 /* Save spirom version */ 9297 /* Save spirom version */
9268 bnx2x_save_848xx_spirom_version(phy, params); 9298 bnx2x_save_848xx_spirom_version(phy, params);
9269 9299
@@ -9756,11 +9786,9 @@ static void bnx2x_848x3_link_reset(struct bnx2x_phy *phy,
9756 bnx2x_cl45_read(bp, phy, 9786 bnx2x_cl45_read(bp, phy,
9757 MDIO_CTL_DEVAD, 9787 MDIO_CTL_DEVAD,
9758 0x400f, &val16); 9788 0x400f, &val16);
9759 /* Put to low power mode on newer FW */ 9789 bnx2x_cl45_write(bp, phy,
9760 if ((val16 & 0x303f) > 0x1009) 9790 MDIO_PMA_DEVAD,
9761 bnx2x_cl45_write(bp, phy, 9791 MDIO_PMA_REG_CTRL, 0x800);
9762 MDIO_PMA_DEVAD,
9763 MDIO_PMA_REG_CTRL, 0x800);
9764 } 9792 }
9765} 9793}
9766 9794
@@ -10191,8 +10219,15 @@ static void bnx2x_54618se_link_reset(struct bnx2x_phy *phy,
10191 u32 cfg_pin; 10219 u32 cfg_pin;
10192 u8 port; 10220 u8 port;
10193 10221
10194 /* This works with E3 only, no need to check the chip 10222 /*
10195 before determining the port. */ 10223 * In case of no EPIO routed to reset the GPHY, put it
10224 * in low power mode.
10225 */
10226 bnx2x_cl22_write(bp, phy, MDIO_PMA_REG_CTRL, 0x800);
10227 /*
10228 * This works with E3 only, no need to check the chip
10229 * before determining the port.
10230 */
10196 port = params->port; 10231 port = params->port;
10197 cfg_pin = (REG_RD(bp, params->shmem_base + 10232 cfg_pin = (REG_RD(bp, params->shmem_base +
10198 offsetof(struct shmem_region, 10233 offsetof(struct shmem_region,
@@ -10603,7 +10638,8 @@ static struct bnx2x_phy phy_warpcore = {
10603 .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT, 10638 .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
10604 .addr = 0xff, 10639 .addr = 0xff,
10605 .def_md_devad = 0, 10640 .def_md_devad = 0,
10606 .flags = FLAGS_HW_LOCK_REQUIRED, 10641 .flags = (FLAGS_HW_LOCK_REQUIRED |
10642 FLAGS_TX_ERROR_CHECK),
10607 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10643 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10608 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10644 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10609 .mdio_ctrl = 0, 10645 .mdio_ctrl = 0,
@@ -10729,7 +10765,8 @@ static struct bnx2x_phy phy_8706 = {
10729 .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706, 10765 .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706,
10730 .addr = 0xff, 10766 .addr = 0xff,
10731 .def_md_devad = 0, 10767 .def_md_devad = 0,
10732 .flags = FLAGS_INIT_XGXS_FIRST, 10768 .flags = (FLAGS_INIT_XGXS_FIRST |
10769 FLAGS_TX_ERROR_CHECK),
10733 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10770 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10734 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10771 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10735 .mdio_ctrl = 0, 10772 .mdio_ctrl = 0,
@@ -10760,7 +10797,8 @@ static struct bnx2x_phy phy_8726 = {
10760 .addr = 0xff, 10797 .addr = 0xff,
10761 .def_md_devad = 0, 10798 .def_md_devad = 0,
10762 .flags = (FLAGS_HW_LOCK_REQUIRED | 10799 .flags = (FLAGS_HW_LOCK_REQUIRED |
10763 FLAGS_INIT_XGXS_FIRST), 10800 FLAGS_INIT_XGXS_FIRST |
10801 FLAGS_TX_ERROR_CHECK),
10764 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10802 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10765 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10803 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10766 .mdio_ctrl = 0, 10804 .mdio_ctrl = 0,
@@ -10791,7 +10829,8 @@ static struct bnx2x_phy phy_8727 = {
10791 .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727, 10829 .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727,
10792 .addr = 0xff, 10830 .addr = 0xff,
10793 .def_md_devad = 0, 10831 .def_md_devad = 0,
10794 .flags = FLAGS_FAN_FAILURE_DET_REQ, 10832 .flags = (FLAGS_FAN_FAILURE_DET_REQ |
10833 FLAGS_TX_ERROR_CHECK),
10795 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10834 .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10796 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 10835 .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
10797 .mdio_ctrl = 0, 10836 .mdio_ctrl = 0,
@@ -11112,6 +11151,8 @@ static int bnx2x_populate_int_phy(struct bnx2x *bp, u32 shmem_base, u8 port,
11112 */ 11151 */
11113 if (CHIP_REV(bp) == CHIP_REV_Ax) 11152 if (CHIP_REV(bp) == CHIP_REV_Ax)
11114 phy->flags |= FLAGS_MDC_MDIO_WA; 11153 phy->flags |= FLAGS_MDC_MDIO_WA;
11154 else
11155 phy->flags |= FLAGS_MDC_MDIO_WA_B0;
11115 } else { 11156 } else {
11116 switch (switch_cfg) { 11157 switch (switch_cfg) {
11117 case SWITCH_CFG_1G: 11158 case SWITCH_CFG_1G:
@@ -11500,13 +11541,12 @@ void bnx2x_init_xmac_loopback(struct link_params *params,
11500 * Set WC to loopback mode since link is required to provide clock 11541 * Set WC to loopback mode since link is required to provide clock
11501 * to the XMAC in 20G mode 11542 * to the XMAC in 20G mode
11502 */ 11543 */
11503 if (vars->line_speed == SPEED_20000) { 11544 bnx2x_set_aer_mmd(params, &params->phy[0]);
11504 bnx2x_set_aer_mmd(params, &params->phy[0]); 11545 bnx2x_warpcore_reset_lane(bp, &params->phy[0], 0);
11505 bnx2x_warpcore_reset_lane(bp, &params->phy[0], 0); 11546 params->phy[INT_PHY].config_loopback(
11506 params->phy[INT_PHY].config_loopback(
11507 &params->phy[INT_PHY], 11547 &params->phy[INT_PHY],
11508 params); 11548 params);
11509 } 11549
11510 bnx2x_xmac_enable(params, vars, 1); 11550 bnx2x_xmac_enable(params, vars, 1);
11511 REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); 11551 REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0);
11512} 11552}
@@ -11684,12 +11724,16 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars,
11684 bnx2x_set_led(params, vars, LED_MODE_OFF, 0); 11724 bnx2x_set_led(params, vars, LED_MODE_OFF, 0);
11685 11725
11686 if (reset_ext_phy) { 11726 if (reset_ext_phy) {
11727 bnx2x_set_mdio_clk(bp, params->chip_id, port);
11687 for (phy_index = EXT_PHY1; phy_index < params->num_phys; 11728 for (phy_index = EXT_PHY1; phy_index < params->num_phys;
11688 phy_index++) { 11729 phy_index++) {
11689 if (params->phy[phy_index].link_reset) 11730 if (params->phy[phy_index].link_reset) {
11731 bnx2x_set_aer_mmd(params,
11732 &params->phy[phy_index]);
11690 params->phy[phy_index].link_reset( 11733 params->phy[phy_index].link_reset(
11691 &params->phy[phy_index], 11734 &params->phy[phy_index],
11692 params); 11735 params);
11736 }
11693 if (params->phy[phy_index].flags & 11737 if (params->phy[phy_index].flags &
11694 FLAGS_REARM_LATCH_SIGNAL) 11738 FLAGS_REARM_LATCH_SIGNAL)
11695 clear_latch_ind = 1; 11739 clear_latch_ind = 1;
@@ -12178,10 +12222,6 @@ static void bnx2x_analyze_link_error(struct link_params *params,
12178 u8 led_mode; 12222 u8 led_mode;
12179 u32 half_open_conn = (vars->phy_flags & PHY_HALF_OPEN_CONN_FLAG) > 0; 12223 u32 half_open_conn = (vars->phy_flags & PHY_HALF_OPEN_CONN_FLAG) > 0;
12180 12224
12181 /*DP(NETIF_MSG_LINK, "CHECK LINK: %x half_open:%x-> lss:%x\n",
12182 vars->link_up,
12183 half_open_conn, lss_status);*/
12184
12185 if ((lss_status ^ half_open_conn) == 0) 12225 if ((lss_status ^ half_open_conn) == 0)
12186 return; 12226 return;
12187 12227
@@ -12194,6 +12234,7 @@ static void bnx2x_analyze_link_error(struct link_params *params,
12194 * b. Update link_vars->link_up 12234 * b. Update link_vars->link_up
12195 */ 12235 */
12196 if (lss_status) { 12236 if (lss_status) {
12237 DP(NETIF_MSG_LINK, "Remote Fault detected !!!\n");
12197 vars->link_status &= ~LINK_STATUS_LINK_UP; 12238 vars->link_status &= ~LINK_STATUS_LINK_UP;
12198 vars->link_up = 0; 12239 vars->link_up = 0;
12199 vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG; 12240 vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
@@ -12203,6 +12244,7 @@ static void bnx2x_analyze_link_error(struct link_params *params,
12203 */ 12244 */
12204 led_mode = LED_MODE_OFF; 12245 led_mode = LED_MODE_OFF;
12205 } else { 12246 } else {
12247 DP(NETIF_MSG_LINK, "Remote Fault cleared\n");
12206 vars->link_status |= LINK_STATUS_LINK_UP; 12248 vars->link_status |= LINK_STATUS_LINK_UP;
12207 vars->link_up = 1; 12249 vars->link_up = 1;
12208 vars->phy_flags &= ~PHY_HALF_OPEN_CONN_FLAG; 12250 vars->phy_flags &= ~PHY_HALF_OPEN_CONN_FLAG;
@@ -12219,6 +12261,15 @@ static void bnx2x_analyze_link_error(struct link_params *params,
12219 bnx2x_notify_link_changed(bp); 12261 bnx2x_notify_link_changed(bp);
12220} 12262}
12221 12263
12264/******************************************************************************
12265* Description:
12266* This function checks for half opened connection change indication.
12267* When such change occurs, it calls the bnx2x_analyze_link_error
12268* to check if Remote Fault is set or cleared. Reception of remote fault
12269* status message in the MAC indicates that the peer's MAC has detected
12270* a fault, for example, due to break in the TX side of fiber.
12271*
12272******************************************************************************/
12222static void bnx2x_check_half_open_conn(struct link_params *params, 12273static void bnx2x_check_half_open_conn(struct link_params *params,
12223 struct link_vars *vars) 12274 struct link_vars *vars)
12224{ 12275{
@@ -12229,9 +12280,28 @@ static void bnx2x_check_half_open_conn(struct link_params *params,
12229 if ((vars->phy_flags & PHY_PHYSICAL_LINK_FLAG) == 0) 12280 if ((vars->phy_flags & PHY_PHYSICAL_LINK_FLAG) == 0)
12230 return; 12281 return;
12231 12282
12232 if (!CHIP_IS_E3(bp) && 12283 if (CHIP_IS_E3(bp) &&
12233 (REG_RD(bp, MISC_REG_RESET_REG_2) & 12284 (REG_RD(bp, MISC_REG_RESET_REG_2) &
12234 (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << params->port))) { 12285 (MISC_REGISTERS_RESET_REG_2_XMAC))) {
12286 /* Check E3 XMAC */
12287 /*
12288 * Note that link speed cannot be queried here, since it may be
12289 * zero while link is down. In case UMAC is active, LSS will
12290 * simply not be set
12291 */
12292 mac_base = (params->port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
12293
12294 /* Clear stick bits (Requires rising edge) */
12295 REG_WR(bp, mac_base + XMAC_REG_CLEAR_RX_LSS_STATUS, 0);
12296 REG_WR(bp, mac_base + XMAC_REG_CLEAR_RX_LSS_STATUS,
12297 XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS |
12298 XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS);
12299 if (REG_RD(bp, mac_base + XMAC_REG_RX_LSS_STATUS))
12300 lss_status = 1;
12301
12302 bnx2x_analyze_link_error(params, vars, lss_status);
12303 } else if (REG_RD(bp, MISC_REG_RESET_REG_2) &
12304 (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << params->port)) {
12235 /* Check E1X / E2 BMAC */ 12305 /* Check E1X / E2 BMAC */
12236 u32 lss_status_reg; 12306 u32 lss_status_reg;
12237 u32 wb_data[2]; 12307 u32 wb_data[2];
@@ -12253,14 +12323,20 @@ static void bnx2x_check_half_open_conn(struct link_params *params,
12253void bnx2x_period_func(struct link_params *params, struct link_vars *vars) 12323void bnx2x_period_func(struct link_params *params, struct link_vars *vars)
12254{ 12324{
12255 struct bnx2x *bp = params->bp; 12325 struct bnx2x *bp = params->bp;
12326 u16 phy_idx;
12256 if (!params) { 12327 if (!params) {
12257 DP(NETIF_MSG_LINK, "Ininitliazed params !\n"); 12328 DP(NETIF_MSG_LINK, "Uninitialized params !\n");
12258 return; 12329 return;
12259 } 12330 }
12260 /* DP(NETIF_MSG_LINK, "Periodic called vars->phy_flags 0x%x speed 0x%x 12331
12261 RESET_REG_2 0x%x\n", vars->phy_flags, vars->line_speed, 12332 for (phy_idx = INT_PHY; phy_idx < MAX_PHYS; phy_idx++) {
12262 REG_RD(bp, MISC_REG_RESET_REG_2)); */ 12333 if (params->phy[phy_idx].flags & FLAGS_TX_ERROR_CHECK) {
12263 bnx2x_check_half_open_conn(params, vars); 12334 bnx2x_set_aer_mmd(params, &params->phy[phy_idx]);
12335 bnx2x_check_half_open_conn(params, vars);
12336 break;
12337 }
12338 }
12339
12264 if (CHIP_IS_E3(bp)) 12340 if (CHIP_IS_E3(bp))
12265 bnx2x_check_over_curr(params, vars); 12341 bnx2x_check_over_curr(params, vars);
12266} 12342}
diff --git a/drivers/net/bnx2x/bnx2x_link.h b/drivers/net/bnx2x/bnx2x_link.h
index 6a7708d5da3..c12db6da213 100644
--- a/drivers/net/bnx2x/bnx2x_link.h
+++ b/drivers/net/bnx2x/bnx2x_link.h
@@ -145,6 +145,8 @@ struct bnx2x_phy {
145#define FLAGS_SFP_NOT_APPROVED (1<<7) 145#define FLAGS_SFP_NOT_APPROVED (1<<7)
146#define FLAGS_MDC_MDIO_WA (1<<8) 146#define FLAGS_MDC_MDIO_WA (1<<8)
147#define FLAGS_DUMMY_READ (1<<9) 147#define FLAGS_DUMMY_READ (1<<9)
148#define FLAGS_MDC_MDIO_WA_B0 (1<<10)
149#define FLAGS_TX_ERROR_CHECK (1<<12)
148 150
149 /* preemphasis values for the rx side */ 151 /* preemphasis values for the rx side */
150 u16 rx_preemphasis[4]; 152 u16 rx_preemphasis[4];
@@ -276,7 +278,6 @@ struct link_vars {
276#define PHY_PHYSICAL_LINK_FLAG (1<<2) 278#define PHY_PHYSICAL_LINK_FLAG (1<<2)
277#define PHY_HALF_OPEN_CONN_FLAG (1<<3) 279#define PHY_HALF_OPEN_CONN_FLAG (1<<3)
278#define PHY_OVER_CURRENT_FLAG (1<<4) 280#define PHY_OVER_CURRENT_FLAG (1<<4)
279#define PHY_TX_ERROR_CHECK_FLAG (1<<5)
280 281
281 u8 mac_type; 282 u8 mac_type;
282#define MAC_TYPE_NONE 0 283#define MAC_TYPE_NONE 0
diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h
index 02461fef875..27b5ecb1183 100644
--- a/drivers/net/bnx2x/bnx2x_reg.h
+++ b/drivers/net/bnx2x/bnx2x_reg.h
@@ -4771,9 +4771,11 @@
4771 The fields are: [4:0] - tail pointer; 10:5] - Link List size; 15:11] - 4771 The fields are: [4:0] - tail pointer; 10:5] - Link List size; 15:11] -
4772 header pointer. */ 4772 header pointer. */
4773#define UCM_REG_XX_TABLE 0xe0300 4773#define UCM_REG_XX_TABLE 0xe0300
4774#define UMAC_COMMAND_CONFIG_REG_IGNORE_TX_PAUSE (0x1<<28)
4774#define UMAC_COMMAND_CONFIG_REG_LOOP_ENA (0x1<<15) 4775#define UMAC_COMMAND_CONFIG_REG_LOOP_ENA (0x1<<15)
4775#define UMAC_COMMAND_CONFIG_REG_NO_LGTH_CHECK (0x1<<24) 4776#define UMAC_COMMAND_CONFIG_REG_NO_LGTH_CHECK (0x1<<24)
4776#define UMAC_COMMAND_CONFIG_REG_PAD_EN (0x1<<5) 4777#define UMAC_COMMAND_CONFIG_REG_PAD_EN (0x1<<5)
4778#define UMAC_COMMAND_CONFIG_REG_PAUSE_IGNORE (0x1<<8)
4777#define UMAC_COMMAND_CONFIG_REG_PROMIS_EN (0x1<<4) 4779#define UMAC_COMMAND_CONFIG_REG_PROMIS_EN (0x1<<4)
4778#define UMAC_COMMAND_CONFIG_REG_RX_ENA (0x1<<1) 4780#define UMAC_COMMAND_CONFIG_REG_RX_ENA (0x1<<1)
4779#define UMAC_COMMAND_CONFIG_REG_SW_RESET (0x1<<13) 4781#define UMAC_COMMAND_CONFIG_REG_SW_RESET (0x1<<13)
@@ -5622,8 +5624,9 @@
5622#define EMAC_MDIO_COMM_START_BUSY (1L<<29) 5624#define EMAC_MDIO_COMM_START_BUSY (1L<<29)
5623#define EMAC_MDIO_MODE_AUTO_POLL (1L<<4) 5625#define EMAC_MDIO_MODE_AUTO_POLL (1L<<4)
5624#define EMAC_MDIO_MODE_CLAUSE_45 (1L<<31) 5626#define EMAC_MDIO_MODE_CLAUSE_45 (1L<<31)
5625#define EMAC_MDIO_MODE_CLOCK_CNT (0x3fL<<16) 5627#define EMAC_MDIO_MODE_CLOCK_CNT (0x3ffL<<16)
5626#define EMAC_MDIO_MODE_CLOCK_CNT_BITSHIFT 16 5628#define EMAC_MDIO_MODE_CLOCK_CNT_BITSHIFT 16
5629#define EMAC_MDIO_STATUS_10MB (1L<<1)
5627#define EMAC_MODE_25G_MODE (1L<<5) 5630#define EMAC_MODE_25G_MODE (1L<<5)
5628#define EMAC_MODE_HALF_DUPLEX (1L<<1) 5631#define EMAC_MODE_HALF_DUPLEX (1L<<1)
5629#define EMAC_MODE_PORT_GMII (2L<<2) 5632#define EMAC_MODE_PORT_GMII (2L<<2)
@@ -5634,6 +5637,7 @@
5634#define EMAC_REG_EMAC_MAC_MATCH 0x10 5637#define EMAC_REG_EMAC_MAC_MATCH 0x10
5635#define EMAC_REG_EMAC_MDIO_COMM 0xac 5638#define EMAC_REG_EMAC_MDIO_COMM 0xac
5636#define EMAC_REG_EMAC_MDIO_MODE 0xb4 5639#define EMAC_REG_EMAC_MDIO_MODE 0xb4
5640#define EMAC_REG_EMAC_MDIO_STATUS 0xb0
5637#define EMAC_REG_EMAC_MODE 0x0 5641#define EMAC_REG_EMAC_MODE 0x0
5638#define EMAC_REG_EMAC_RX_MODE 0xc8 5642#define EMAC_REG_EMAC_RX_MODE 0xc8
5639#define EMAC_REG_EMAC_RX_MTU_SIZE 0x9c 5643#define EMAC_REG_EMAC_RX_MTU_SIZE 0x9c
diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c
index c5f0f04219f..5548d464261 100644
--- a/drivers/net/e1000/e1000_ethtool.c
+++ b/drivers/net/e1000/e1000_ethtool.c
@@ -838,6 +838,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
838 838
839 /* Disable all the interrupts */ 839 /* Disable all the interrupts */
840 ew32(IMC, 0xFFFFFFFF); 840 ew32(IMC, 0xFFFFFFFF);
841 E1000_WRITE_FLUSH();
841 msleep(10); 842 msleep(10);
842 843
843 /* Test each interrupt */ 844 /* Test each interrupt */
@@ -856,6 +857,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
856 adapter->test_icr = 0; 857 adapter->test_icr = 0;
857 ew32(IMC, mask); 858 ew32(IMC, mask);
858 ew32(ICS, mask); 859 ew32(ICS, mask);
860 E1000_WRITE_FLUSH();
859 msleep(10); 861 msleep(10);
860 862
861 if (adapter->test_icr & mask) { 863 if (adapter->test_icr & mask) {
@@ -873,6 +875,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
873 adapter->test_icr = 0; 875 adapter->test_icr = 0;
874 ew32(IMS, mask); 876 ew32(IMS, mask);
875 ew32(ICS, mask); 877 ew32(ICS, mask);
878 E1000_WRITE_FLUSH();
876 msleep(10); 879 msleep(10);
877 880
878 if (!(adapter->test_icr & mask)) { 881 if (!(adapter->test_icr & mask)) {
@@ -890,6 +893,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
890 adapter->test_icr = 0; 893 adapter->test_icr = 0;
891 ew32(IMC, ~mask & 0x00007FFF); 894 ew32(IMC, ~mask & 0x00007FFF);
892 ew32(ICS, ~mask & 0x00007FFF); 895 ew32(ICS, ~mask & 0x00007FFF);
896 E1000_WRITE_FLUSH();
893 msleep(10); 897 msleep(10);
894 898
895 if (adapter->test_icr) { 899 if (adapter->test_icr) {
@@ -901,6 +905,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
901 905
902 /* Disable all the interrupts */ 906 /* Disable all the interrupts */
903 ew32(IMC, 0xFFFFFFFF); 907 ew32(IMC, 0xFFFFFFFF);
908 E1000_WRITE_FLUSH();
904 msleep(10); 909 msleep(10);
905 910
906 /* Unhook test interrupt handler */ 911 /* Unhook test interrupt handler */
@@ -1394,6 +1399,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter)
1394 if (unlikely(++k == txdr->count)) k = 0; 1399 if (unlikely(++k == txdr->count)) k = 0;
1395 } 1400 }
1396 ew32(TDT, k); 1401 ew32(TDT, k);
1402 E1000_WRITE_FLUSH();
1397 msleep(200); 1403 msleep(200);
1398 time = jiffies; /* set the start time for the receive */ 1404 time = jiffies; /* set the start time for the receive */
1399 good_cnt = 0; 1405 good_cnt = 0;
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index 1698622af43..8545c7aa93e 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -446,6 +446,7 @@ s32 e1000_reset_hw(struct e1000_hw *hw)
446 /* Must reset the PHY before resetting the MAC */ 446 /* Must reset the PHY before resetting the MAC */
447 if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) { 447 if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
448 ew32(CTRL, (ctrl | E1000_CTRL_PHY_RST)); 448 ew32(CTRL, (ctrl | E1000_CTRL_PHY_RST));
449 E1000_WRITE_FLUSH();
449 msleep(5); 450 msleep(5);
450 } 451 }
451 452
@@ -3752,6 +3753,7 @@ static s32 e1000_acquire_eeprom(struct e1000_hw *hw)
3752 /* Clear SK and CS */ 3753 /* Clear SK and CS */
3753 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); 3754 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
3754 ew32(EECD, eecd); 3755 ew32(EECD, eecd);
3756 E1000_WRITE_FLUSH();
3755 udelay(1); 3757 udelay(1);
3756 } 3758 }
3757 3759
@@ -3824,6 +3826,7 @@ static void e1000_release_eeprom(struct e1000_hw *hw)
3824 eecd &= ~E1000_EECD_SK; /* Lower SCK */ 3826 eecd &= ~E1000_EECD_SK; /* Lower SCK */
3825 3827
3826 ew32(EECD, eecd); 3828 ew32(EECD, eecd);
3829 E1000_WRITE_FLUSH();
3827 3830
3828 udelay(hw->eeprom.delay_usec); 3831 udelay(hw->eeprom.delay_usec);
3829 } else if (hw->eeprom.type == e1000_eeprom_microwire) { 3832 } else if (hw->eeprom.type == e1000_eeprom_microwire) {
diff --git a/drivers/net/e1000e/es2lan.c b/drivers/net/e1000e/es2lan.c
index c0ecb2d9fdb..e4f42257c24 100644
--- a/drivers/net/e1000e/es2lan.c
+++ b/drivers/net/e1000e/es2lan.c
@@ -1313,6 +1313,7 @@ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
1313 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & 1313 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
1314 E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; 1314 E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
1315 ew32(KMRNCTRLSTA, kmrnctrlsta); 1315 ew32(KMRNCTRLSTA, kmrnctrlsta);
1316 e1e_flush();
1316 1317
1317 udelay(2); 1318 udelay(2);
1318 1319
@@ -1347,6 +1348,7 @@ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
1347 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & 1348 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
1348 E1000_KMRNCTRLSTA_OFFSET) | data; 1349 E1000_KMRNCTRLSTA_OFFSET) | data;
1349 ew32(KMRNCTRLSTA, kmrnctrlsta); 1350 ew32(KMRNCTRLSTA, kmrnctrlsta);
1351 e1e_flush();
1350 1352
1351 udelay(2); 1353 udelay(2);
1352 1354
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
index cb1a3623253..06d88f316dc 100644
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -28,8 +28,8 @@
28 28
29/* ethtool support for e1000 */ 29/* ethtool support for e1000 */
30 30
31#include <linux/interrupt.h>
32#include <linux/netdevice.h> 31#include <linux/netdevice.h>
32#include <linux/interrupt.h>
33#include <linux/ethtool.h> 33#include <linux/ethtool.h>
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
@@ -964,6 +964,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
964 964
965 /* Disable all the interrupts */ 965 /* Disable all the interrupts */
966 ew32(IMC, 0xFFFFFFFF); 966 ew32(IMC, 0xFFFFFFFF);
967 e1e_flush();
967 usleep_range(10000, 20000); 968 usleep_range(10000, 20000);
968 969
969 /* Test each interrupt */ 970 /* Test each interrupt */
@@ -996,6 +997,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
996 adapter->test_icr = 0; 997 adapter->test_icr = 0;
997 ew32(IMC, mask); 998 ew32(IMC, mask);
998 ew32(ICS, mask); 999 ew32(ICS, mask);
1000 e1e_flush();
999 usleep_range(10000, 20000); 1001 usleep_range(10000, 20000);
1000 1002
1001 if (adapter->test_icr & mask) { 1003 if (adapter->test_icr & mask) {
@@ -1014,6 +1016,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
1014 adapter->test_icr = 0; 1016 adapter->test_icr = 0;
1015 ew32(IMS, mask); 1017 ew32(IMS, mask);
1016 ew32(ICS, mask); 1018 ew32(ICS, mask);
1019 e1e_flush();
1017 usleep_range(10000, 20000); 1020 usleep_range(10000, 20000);
1018 1021
1019 if (!(adapter->test_icr & mask)) { 1022 if (!(adapter->test_icr & mask)) {
@@ -1032,6 +1035,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
1032 adapter->test_icr = 0; 1035 adapter->test_icr = 0;
1033 ew32(IMC, ~mask & 0x00007FFF); 1036 ew32(IMC, ~mask & 0x00007FFF);
1034 ew32(ICS, ~mask & 0x00007FFF); 1037 ew32(ICS, ~mask & 0x00007FFF);
1038 e1e_flush();
1035 usleep_range(10000, 20000); 1039 usleep_range(10000, 20000);
1036 1040
1037 if (adapter->test_icr) { 1041 if (adapter->test_icr) {
@@ -1043,6 +1047,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
1043 1047
1044 /* Disable all the interrupts */ 1048 /* Disable all the interrupts */
1045 ew32(IMC, 0xFFFFFFFF); 1049 ew32(IMC, 0xFFFFFFFF);
1050 e1e_flush();
1046 usleep_range(10000, 20000); 1051 usleep_range(10000, 20000);
1047 1052
1048 /* Unhook test interrupt handler */ 1053 /* Unhook test interrupt handler */
@@ -1276,6 +1281,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
1276 E1000_CTRL_FD); /* Force Duplex to FULL */ 1281 E1000_CTRL_FD); /* Force Duplex to FULL */
1277 1282
1278 ew32(CTRL, ctrl_reg); 1283 ew32(CTRL, ctrl_reg);
1284 e1e_flush();
1279 udelay(500); 1285 udelay(500);
1280 1286
1281 return 0; 1287 return 0;
@@ -1418,6 +1424,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter)
1418 */ 1424 */
1419#define E1000_SERDES_LB_ON 0x410 1425#define E1000_SERDES_LB_ON 0x410
1420 ew32(SCTL, E1000_SERDES_LB_ON); 1426 ew32(SCTL, E1000_SERDES_LB_ON);
1427 e1e_flush();
1421 usleep_range(10000, 20000); 1428 usleep_range(10000, 20000);
1422 1429
1423 return 0; 1430 return 0;
@@ -1513,6 +1520,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
1513 hw->phy.media_type == e1000_media_type_internal_serdes) { 1520 hw->phy.media_type == e1000_media_type_internal_serdes) {
1514#define E1000_SERDES_LB_OFF 0x400 1521#define E1000_SERDES_LB_OFF 0x400
1515 ew32(SCTL, E1000_SERDES_LB_OFF); 1522 ew32(SCTL, E1000_SERDES_LB_OFF);
1523 e1e_flush();
1516 usleep_range(10000, 20000); 1524 usleep_range(10000, 20000);
1517 break; 1525 break;
1518 } 1526 }
@@ -1592,6 +1600,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter)
1592 k = 0; 1600 k = 0;
1593 } 1601 }
1594 ew32(TDT, k); 1602 ew32(TDT, k);
1603 e1e_flush();
1595 msleep(200); 1604 msleep(200);
1596 time = jiffies; /* set the start time for the receive */ 1605 time = jiffies; /* set the start time for the receive */
1597 good_cnt = 0; 1606 good_cnt = 0;
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index c1752124f3c..4e36978b8fd 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -283,6 +283,7 @@ static void e1000_toggle_lanphypc_value_ich8lan(struct e1000_hw *hw)
283 ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE; 283 ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE;
284 ctrl &= ~E1000_CTRL_LANPHYPC_VALUE; 284 ctrl &= ~E1000_CTRL_LANPHYPC_VALUE;
285 ew32(CTRL, ctrl); 285 ew32(CTRL, ctrl);
286 e1e_flush();
286 udelay(10); 287 udelay(10);
287 ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE; 288 ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
288 ew32(CTRL, ctrl); 289 ew32(CTRL, ctrl);
@@ -1230,9 +1231,11 @@ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable)
1230 ew32(CTRL, reg); 1231 ew32(CTRL, reg);
1231 1232
1232 ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS); 1233 ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS);
1234 e1e_flush();
1233 udelay(20); 1235 udelay(20);
1234 ew32(CTRL, ctrl_reg); 1236 ew32(CTRL, ctrl_reg);
1235 ew32(CTRL_EXT, ctrl_ext); 1237 ew32(CTRL_EXT, ctrl_ext);
1238 e1e_flush();
1236 udelay(20); 1239 udelay(20);
1237 1240
1238out: 1241out:
@@ -2134,8 +2137,7 @@ static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
2134 2137
2135 ret_val = 0; 2138 ret_val = 0;
2136 for (i = 0; i < words; i++) { 2139 for (i = 0; i < words; i++) {
2137 if ((dev_spec->shadow_ram) && 2140 if (dev_spec->shadow_ram[offset+i].modified) {
2138 (dev_spec->shadow_ram[offset+i].modified)) {
2139 data[i] = dev_spec->shadow_ram[offset+i].value; 2141 data[i] = dev_spec->shadow_ram[offset+i].value;
2140 } else { 2142 } else {
2141 ret_val = e1000_read_flash_word_ich8lan(hw, 2143 ret_val = e1000_read_flash_word_ich8lan(hw,
@@ -3090,6 +3092,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
3090 ret_val = e1000_acquire_swflag_ich8lan(hw); 3092 ret_val = e1000_acquire_swflag_ich8lan(hw);
3091 e_dbg("Issuing a global reset to ich8lan\n"); 3093 e_dbg("Issuing a global reset to ich8lan\n");
3092 ew32(CTRL, (ctrl | E1000_CTRL_RST)); 3094 ew32(CTRL, (ctrl | E1000_CTRL_RST));
3095 /* cannot issue a flush here because it hangs the hardware */
3093 msleep(20); 3096 msleep(20);
3094 3097
3095 if (!ret_val) 3098 if (!ret_val)
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c
index 65580b40594..7898a67d650 100644
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -1986,6 +1986,7 @@ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
1986 /* Clear SK and CS */ 1986 /* Clear SK and CS */
1987 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); 1987 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
1988 ew32(EECD, eecd); 1988 ew32(EECD, eecd);
1989 e1e_flush();
1989 udelay(1); 1990 udelay(1);
1990 1991
1991 /* 1992 /*
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 4353ad56cf1..ab4be80f7ab 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -31,12 +31,12 @@
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/interrupt.h>
35#include <linux/pci.h> 34#include <linux/pci.h>
36#include <linux/vmalloc.h> 35#include <linux/vmalloc.h>
37#include <linux/pagemap.h> 36#include <linux/pagemap.h>
38#include <linux/delay.h> 37#include <linux/delay.h>
39#include <linux/netdevice.h> 38#include <linux/netdevice.h>
39#include <linux/interrupt.h>
40#include <linux/tcp.h> 40#include <linux/tcp.h>
41#include <linux/ipv6.h> 41#include <linux/ipv6.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c
index 2a6ee13285b..8666476cb9b 100644
--- a/drivers/net/e1000e/phy.c
+++ b/drivers/net/e1000e/phy.c
@@ -537,6 +537,7 @@ static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data,
537 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & 537 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
538 E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; 538 E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
539 ew32(KMRNCTRLSTA, kmrnctrlsta); 539 ew32(KMRNCTRLSTA, kmrnctrlsta);
540 e1e_flush();
540 541
541 udelay(2); 542 udelay(2);
542 543
@@ -609,6 +610,7 @@ static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data,
609 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & 610 kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
610 E1000_KMRNCTRLSTA_OFFSET) | data; 611 E1000_KMRNCTRLSTA_OFFSET) | data;
611 ew32(KMRNCTRLSTA, kmrnctrlsta); 612 ew32(KMRNCTRLSTA, kmrnctrlsta);
613 e1e_flush();
612 614
613 udelay(2); 615 udelay(2);
614 616
diff --git a/drivers/net/igb/e1000_nvm.c b/drivers/net/igb/e1000_nvm.c
index 7dcd65cede5..40407124e72 100644
--- a/drivers/net/igb/e1000_nvm.c
+++ b/drivers/net/igb/e1000_nvm.c
@@ -285,6 +285,7 @@ static s32 igb_ready_nvm_eeprom(struct e1000_hw *hw)
285 /* Clear SK and CS */ 285 /* Clear SK and CS */
286 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); 286 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
287 wr32(E1000_EECD, eecd); 287 wr32(E1000_EECD, eecd);
288 wrfl();
288 udelay(1); 289 udelay(1);
289 timeout = NVM_MAX_RETRY_SPI; 290 timeout = NVM_MAX_RETRY_SPI;
290 291
diff --git a/drivers/net/igb/igb_ethtool.c b/drivers/net/igb/igb_ethtool.c
index ff244ce803c..414b0225be8 100644
--- a/drivers/net/igb/igb_ethtool.c
+++ b/drivers/net/igb/igb_ethtool.c
@@ -1225,6 +1225,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
1225 1225
1226 /* Disable all the interrupts */ 1226 /* Disable all the interrupts */
1227 wr32(E1000_IMC, ~0); 1227 wr32(E1000_IMC, ~0);
1228 wrfl();
1228 msleep(10); 1229 msleep(10);
1229 1230
1230 /* Define all writable bits for ICS */ 1231 /* Define all writable bits for ICS */
@@ -1268,6 +1269,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
1268 1269
1269 wr32(E1000_IMC, mask); 1270 wr32(E1000_IMC, mask);
1270 wr32(E1000_ICS, mask); 1271 wr32(E1000_ICS, mask);
1272 wrfl();
1271 msleep(10); 1273 msleep(10);
1272 1274
1273 if (adapter->test_icr & mask) { 1275 if (adapter->test_icr & mask) {
@@ -1289,6 +1291,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
1289 1291
1290 wr32(E1000_IMS, mask); 1292 wr32(E1000_IMS, mask);
1291 wr32(E1000_ICS, mask); 1293 wr32(E1000_ICS, mask);
1294 wrfl();
1292 msleep(10); 1295 msleep(10);
1293 1296
1294 if (!(adapter->test_icr & mask)) { 1297 if (!(adapter->test_icr & mask)) {
@@ -1310,6 +1313,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
1310 1313
1311 wr32(E1000_IMC, ~mask); 1314 wr32(E1000_IMC, ~mask);
1312 wr32(E1000_ICS, ~mask); 1315 wr32(E1000_ICS, ~mask);
1316 wrfl();
1313 msleep(10); 1317 msleep(10);
1314 1318
1315 if (adapter->test_icr & mask) { 1319 if (adapter->test_icr & mask) {
@@ -1321,6 +1325,7 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
1321 1325
1322 /* Disable all the interrupts */ 1326 /* Disable all the interrupts */
1323 wr32(E1000_IMC, ~0); 1327 wr32(E1000_IMC, ~0);
1328 wrfl();
1324 msleep(10); 1329 msleep(10);
1325 1330
1326 /* Unhook test interrupt handler */ 1331 /* Unhook test interrupt handler */
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index dc599059512..40d4c405fd7 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1052,6 +1052,7 @@ msi_only:
1052 kfree(adapter->vf_data); 1052 kfree(adapter->vf_data);
1053 adapter->vf_data = NULL; 1053 adapter->vf_data = NULL;
1054 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 1054 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055 wrfl();
1055 msleep(100); 1056 msleep(100);
1056 dev_info(&adapter->pdev->dev, "IOV Disabled\n"); 1057 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1057 } 1058 }
@@ -2022,7 +2023,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
2022 2023
2023 if (hw->bus.func == 0) 2024 if (hw->bus.func == 0)
2024 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 2025 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2025 else if (hw->mac.type == e1000_82580) 2026 else if (hw->mac.type >= e1000_82580)
2026 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + 2027 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2027 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, 2028 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2028 &eeprom_data); 2029 &eeprom_data);
@@ -2198,6 +2199,7 @@ static void __devexit igb_remove(struct pci_dev *pdev)
2198 kfree(adapter->vf_data); 2199 kfree(adapter->vf_data);
2199 adapter->vf_data = NULL; 2200 adapter->vf_data = NULL;
2200 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 2201 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2202 wrfl();
2201 msleep(100); 2203 msleep(100);
2202 dev_info(&pdev->dev, "IOV Disabled\n"); 2204 dev_info(&pdev->dev, "IOV Disabled\n");
2203 } 2205 }
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 1330c8e932d..40ed066e3ef 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -1226,6 +1226,7 @@ static void igbvf_configure_tx(struct igbvf_adapter *adapter)
1226 /* disable transmits */ 1226 /* disable transmits */
1227 txdctl = er32(TXDCTL(0)); 1227 txdctl = er32(TXDCTL(0));
1228 ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE); 1228 ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
1229 e1e_flush();
1229 msleep(10); 1230 msleep(10);
1230 1231
1231 /* Setup the HW Tx Head and Tail descriptor pointers */ 1232 /* Setup the HW Tx Head and Tail descriptor pointers */
@@ -1306,6 +1307,7 @@ static void igbvf_configure_rx(struct igbvf_adapter *adapter)
1306 /* disable receives */ 1307 /* disable receives */
1307 rxdctl = er32(RXDCTL(0)); 1308 rxdctl = er32(RXDCTL(0));
1308 ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE); 1309 ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
1310 e1e_flush();
1309 msleep(10); 1311 msleep(10);
1310 1312
1311 rdlen = rx_ring->count * sizeof(union e1000_adv_rx_desc); 1313 rdlen = rx_ring->count * sizeof(union e1000_adv_rx_desc);
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 954f6e938fb..8b1c3484d27 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -2405,8 +2405,6 @@ static int __init smsc_superio_lpc(unsigned short cfg_base)
2405 * addresses making a subsystem device table necessary. 2405 * addresses making a subsystem device table necessary.
2406 */ 2406 */
2407#ifdef CONFIG_PCI 2407#ifdef CONFIG_PCI
2408#define PCIID_VENDOR_INTEL 0x8086
2409#define PCIID_VENDOR_ALI 0x10b9
2410static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __initdata = { 2408static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __initdata = {
2411 /* 2409 /*
2412 * Subsystems needing entries: 2410 * Subsystems needing entries:
@@ -2416,7 +2414,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2416 */ 2414 */
2417 { 2415 {
2418 /* Guessed entry */ 2416 /* Guessed entry */
2419 .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ 2417 .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */
2420 .device = 0x24cc, 2418 .device = 0x24cc,
2421 .subvendor = 0x103c, 2419 .subvendor = 0x103c,
2422 .subdevice = 0x08bc, 2420 .subdevice = 0x08bc,
@@ -2429,7 +2427,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2429 .name = "HP nx5000 family", 2427 .name = "HP nx5000 family",
2430 }, 2428 },
2431 { 2429 {
2432 .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ 2430 .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */
2433 .device = 0x24cc, 2431 .device = 0x24cc,
2434 .subvendor = 0x103c, 2432 .subvendor = 0x103c,
2435 .subdevice = 0x088c, 2433 .subdevice = 0x088c,
@@ -2443,7 +2441,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2443 .name = "HP nc8000 family", 2441 .name = "HP nc8000 family",
2444 }, 2442 },
2445 { 2443 {
2446 .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ 2444 .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */
2447 .device = 0x24cc, 2445 .device = 0x24cc,
2448 .subvendor = 0x103c, 2446 .subvendor = 0x103c,
2449 .subdevice = 0x0890, 2447 .subdevice = 0x0890,
@@ -2456,7 +2454,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2456 .name = "HP nc6000 family", 2454 .name = "HP nc6000 family",
2457 }, 2455 },
2458 { 2456 {
2459 .vendor = PCIID_VENDOR_INTEL, /* Intel 82801DBM LPC bridge */ 2457 .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801DBM LPC bridge */
2460 .device = 0x24cc, 2458 .device = 0x24cc,
2461 .subvendor = 0x0e11, 2459 .subvendor = 0x0e11,
2462 .subdevice = 0x0860, 2460 .subdevice = 0x0860,
@@ -2471,7 +2469,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2471 }, 2469 },
2472 { 2470 {
2473 /* Intel 82801DB/DBL (ICH4/ICH4-L) LPC Interface Bridge */ 2471 /* Intel 82801DB/DBL (ICH4/ICH4-L) LPC Interface Bridge */
2474 .vendor = PCIID_VENDOR_INTEL, 2472 .vendor = PCI_VENDOR_ID_INTEL,
2475 .device = 0x24c0, 2473 .device = 0x24c0,
2476 .subvendor = 0x1179, 2474 .subvendor = 0x1179,
2477 .subdevice = 0xffff, /* 0xffff is "any" */ 2475 .subdevice = 0xffff, /* 0xffff is "any" */
@@ -2484,7 +2482,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2484 .name = "Toshiba laptop with Intel 82801DB/DBL LPC bridge", 2482 .name = "Toshiba laptop with Intel 82801DB/DBL LPC bridge",
2485 }, 2483 },
2486 { 2484 {
2487 .vendor = PCIID_VENDOR_INTEL, /* Intel 82801CAM ISA bridge */ 2485 .vendor = PCI_VENDOR_ID_INTEL, /* Intel 82801CAM ISA bridge */
2488 .device = 0x248c, 2486 .device = 0x248c,
2489 .subvendor = 0x1179, 2487 .subvendor = 0x1179,
2490 .subdevice = 0xffff, /* 0xffff is "any" */ 2488 .subdevice = 0xffff, /* 0xffff is "any" */
@@ -2498,7 +2496,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2498 }, 2496 },
2499 { 2497 {
2500 /* 82801DBM (ICH4-M) LPC Interface Bridge */ 2498 /* 82801DBM (ICH4-M) LPC Interface Bridge */
2501 .vendor = PCIID_VENDOR_INTEL, 2499 .vendor = PCI_VENDOR_ID_INTEL,
2502 .device = 0x24cc, 2500 .device = 0x24cc,
2503 .subvendor = 0x1179, 2501 .subvendor = 0x1179,
2504 .subdevice = 0xffff, /* 0xffff is "any" */ 2502 .subdevice = 0xffff, /* 0xffff is "any" */
@@ -2512,7 +2510,7 @@ static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __ini
2512 }, 2510 },
2513 { 2511 {
2514 /* ALi M1533/M1535 PCI to ISA Bridge [Aladdin IV/V/V+] */ 2512 /* ALi M1533/M1535 PCI to ISA Bridge [Aladdin IV/V/V+] */
2515 .vendor = PCIID_VENDOR_ALI, 2513 .vendor = PCI_VENDOR_ID_AL,
2516 .device = 0x1533, 2514 .device = 0x1533,
2517 .subvendor = 0x1179, 2515 .subvendor = 0x1179,
2518 .subdevice = 0xffff, /* 0xffff is "any" */ 2516 .subdevice = 0xffff, /* 0xffff is "any" */
diff --git a/drivers/net/ixgb/ixgb_ee.c b/drivers/net/ixgb/ixgb_ee.c
index c982ab9f900..38b362b6785 100644
--- a/drivers/net/ixgb/ixgb_ee.c
+++ b/drivers/net/ixgb/ixgb_ee.c
@@ -57,6 +57,7 @@ ixgb_raise_clock(struct ixgb_hw *hw,
57 */ 57 */
58 *eecd_reg = *eecd_reg | IXGB_EECD_SK; 58 *eecd_reg = *eecd_reg | IXGB_EECD_SK;
59 IXGB_WRITE_REG(hw, EECD, *eecd_reg); 59 IXGB_WRITE_REG(hw, EECD, *eecd_reg);
60 IXGB_WRITE_FLUSH(hw);
60 udelay(50); 61 udelay(50);
61} 62}
62 63
@@ -75,6 +76,7 @@ ixgb_lower_clock(struct ixgb_hw *hw,
75 */ 76 */
76 *eecd_reg = *eecd_reg & ~IXGB_EECD_SK; 77 *eecd_reg = *eecd_reg & ~IXGB_EECD_SK;
77 IXGB_WRITE_REG(hw, EECD, *eecd_reg); 78 IXGB_WRITE_REG(hw, EECD, *eecd_reg);
79 IXGB_WRITE_FLUSH(hw);
78 udelay(50); 80 udelay(50);
79} 81}
80 82
@@ -112,6 +114,7 @@ ixgb_shift_out_bits(struct ixgb_hw *hw,
112 eecd_reg |= IXGB_EECD_DI; 114 eecd_reg |= IXGB_EECD_DI;
113 115
114 IXGB_WRITE_REG(hw, EECD, eecd_reg); 116 IXGB_WRITE_REG(hw, EECD, eecd_reg);
117 IXGB_WRITE_FLUSH(hw);
115 118
116 udelay(50); 119 udelay(50);
117 120
@@ -206,21 +209,25 @@ ixgb_standby_eeprom(struct ixgb_hw *hw)
206 /* Deselect EEPROM */ 209 /* Deselect EEPROM */
207 eecd_reg &= ~(IXGB_EECD_CS | IXGB_EECD_SK); 210 eecd_reg &= ~(IXGB_EECD_CS | IXGB_EECD_SK);
208 IXGB_WRITE_REG(hw, EECD, eecd_reg); 211 IXGB_WRITE_REG(hw, EECD, eecd_reg);
212 IXGB_WRITE_FLUSH(hw);
209 udelay(50); 213 udelay(50);
210 214
211 /* Clock high */ 215 /* Clock high */
212 eecd_reg |= IXGB_EECD_SK; 216 eecd_reg |= IXGB_EECD_SK;
213 IXGB_WRITE_REG(hw, EECD, eecd_reg); 217 IXGB_WRITE_REG(hw, EECD, eecd_reg);
218 IXGB_WRITE_FLUSH(hw);
214 udelay(50); 219 udelay(50);
215 220
216 /* Select EEPROM */ 221 /* Select EEPROM */
217 eecd_reg |= IXGB_EECD_CS; 222 eecd_reg |= IXGB_EECD_CS;
218 IXGB_WRITE_REG(hw, EECD, eecd_reg); 223 IXGB_WRITE_REG(hw, EECD, eecd_reg);
224 IXGB_WRITE_FLUSH(hw);
219 udelay(50); 225 udelay(50);
220 226
221 /* Clock low */ 227 /* Clock low */
222 eecd_reg &= ~IXGB_EECD_SK; 228 eecd_reg &= ~IXGB_EECD_SK;
223 IXGB_WRITE_REG(hw, EECD, eecd_reg); 229 IXGB_WRITE_REG(hw, EECD, eecd_reg);
230 IXGB_WRITE_FLUSH(hw);
224 udelay(50); 231 udelay(50);
225} 232}
226 233
@@ -239,11 +246,13 @@ ixgb_clock_eeprom(struct ixgb_hw *hw)
239 /* Rising edge of clock */ 246 /* Rising edge of clock */
240 eecd_reg |= IXGB_EECD_SK; 247 eecd_reg |= IXGB_EECD_SK;
241 IXGB_WRITE_REG(hw, EECD, eecd_reg); 248 IXGB_WRITE_REG(hw, EECD, eecd_reg);
249 IXGB_WRITE_FLUSH(hw);
242 udelay(50); 250 udelay(50);
243 251
244 /* Falling edge of clock */ 252 /* Falling edge of clock */
245 eecd_reg &= ~IXGB_EECD_SK; 253 eecd_reg &= ~IXGB_EECD_SK;
246 IXGB_WRITE_REG(hw, EECD, eecd_reg); 254 IXGB_WRITE_REG(hw, EECD, eecd_reg);
255 IXGB_WRITE_FLUSH(hw);
247 udelay(50); 256 udelay(50);
248} 257}
249 258
diff --git a/drivers/net/ixgb/ixgb_hw.c b/drivers/net/ixgb/ixgb_hw.c
index 6cb2e42ff4c..3d61a9e4faf 100644
--- a/drivers/net/ixgb/ixgb_hw.c
+++ b/drivers/net/ixgb/ixgb_hw.c
@@ -149,6 +149,7 @@ ixgb_adapter_stop(struct ixgb_hw *hw)
149 */ 149 */
150 IXGB_WRITE_REG(hw, RCTL, IXGB_READ_REG(hw, RCTL) & ~IXGB_RCTL_RXEN); 150 IXGB_WRITE_REG(hw, RCTL, IXGB_READ_REG(hw, RCTL) & ~IXGB_RCTL_RXEN);
151 IXGB_WRITE_REG(hw, TCTL, IXGB_READ_REG(hw, TCTL) & ~IXGB_TCTL_TXEN); 151 IXGB_WRITE_REG(hw, TCTL, IXGB_READ_REG(hw, TCTL) & ~IXGB_TCTL_TXEN);
152 IXGB_WRITE_FLUSH(hw);
152 msleep(IXGB_DELAY_BEFORE_RESET); 153 msleep(IXGB_DELAY_BEFORE_RESET);
153 154
154 /* Issue a global reset to the MAC. This will reset the chip's 155 /* Issue a global reset to the MAC. This will reset the chip's
@@ -1220,6 +1221,7 @@ ixgb_optics_reset_bcm(struct ixgb_hw *hw)
1220 ctrl &= ~IXGB_CTRL0_SDP2; 1221 ctrl &= ~IXGB_CTRL0_SDP2;
1221 ctrl |= IXGB_CTRL0_SDP3; 1222 ctrl |= IXGB_CTRL0_SDP3;
1222 IXGB_WRITE_REG(hw, CTRL0, ctrl); 1223 IXGB_WRITE_REG(hw, CTRL0, ctrl);
1224 IXGB_WRITE_FLUSH(hw);
1223 1225
1224 /* SerDes needs extra delay */ 1226 /* SerDes needs extra delay */
1225 msleep(IXGB_SUN_PHY_RESET_DELAY); 1227 msleep(IXGB_SUN_PHY_RESET_DELAY);
diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 3b3dd4df4c5..34f30ec79c2 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -213,6 +213,7 @@ static s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
213 switch (hw->phy.type) { 213 switch (hw->phy.type) {
214 case ixgbe_phy_tn: 214 case ixgbe_phy_tn:
215 phy->ops.check_link = &ixgbe_check_phy_link_tnx; 215 phy->ops.check_link = &ixgbe_check_phy_link_tnx;
216 phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
216 phy->ops.get_firmware_version = 217 phy->ops.get_firmware_version =
217 &ixgbe_get_phy_firmware_version_tnx; 218 &ixgbe_get_phy_firmware_version_tnx;
218 break; 219 break;
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index 777051f54e5..fc1375f26fe 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -2632,6 +2632,7 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
2632 autoc_reg |= IXGBE_AUTOC_AN_RESTART; 2632 autoc_reg |= IXGBE_AUTOC_AN_RESTART;
2633 autoc_reg |= IXGBE_AUTOC_FLU; 2633 autoc_reg |= IXGBE_AUTOC_FLU;
2634 IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); 2634 IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
2635 IXGBE_WRITE_FLUSH(hw);
2635 usleep_range(10000, 20000); 2636 usleep_range(10000, 20000);
2636 } 2637 }
2637 2638
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index dc649553a0a..82d4244c6e1 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -1378,6 +1378,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
1378 1378
1379 /* Disable all the interrupts */ 1379 /* Disable all the interrupts */
1380 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); 1380 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
1381 IXGBE_WRITE_FLUSH(&adapter->hw);
1381 usleep_range(10000, 20000); 1382 usleep_range(10000, 20000);
1382 1383
1383 /* Test each interrupt */ 1384 /* Test each interrupt */
@@ -1398,6 +1399,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
1398 ~mask & 0x00007FFF); 1399 ~mask & 0x00007FFF);
1399 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, 1400 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
1400 ~mask & 0x00007FFF); 1401 ~mask & 0x00007FFF);
1402 IXGBE_WRITE_FLUSH(&adapter->hw);
1401 usleep_range(10000, 20000); 1403 usleep_range(10000, 20000);
1402 1404
1403 if (adapter->test_icr & mask) { 1405 if (adapter->test_icr & mask) {
@@ -1415,6 +1417,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
1415 adapter->test_icr = 0; 1417 adapter->test_icr = 0;
1416 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); 1418 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
1417 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); 1419 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1420 IXGBE_WRITE_FLUSH(&adapter->hw);
1418 usleep_range(10000, 20000); 1421 usleep_range(10000, 20000);
1419 1422
1420 if (!(adapter->test_icr &mask)) { 1423 if (!(adapter->test_icr &mask)) {
@@ -1435,6 +1438,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
1435 ~mask & 0x00007FFF); 1438 ~mask & 0x00007FFF);
1436 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, 1439 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
1437 ~mask & 0x00007FFF); 1440 ~mask & 0x00007FFF);
1441 IXGBE_WRITE_FLUSH(&adapter->hw);
1438 usleep_range(10000, 20000); 1442 usleep_range(10000, 20000);
1439 1443
1440 if (adapter->test_icr) { 1444 if (adapter->test_icr) {
@@ -1446,6 +1450,7 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
1446 1450
1447 /* Disable all the interrupts */ 1451 /* Disable all the interrupts */
1448 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); 1452 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
1453 IXGBE_WRITE_FLUSH(&adapter->hw);
1449 usleep_range(10000, 20000); 1454 usleep_range(10000, 20000);
1450 1455
1451 /* Unhook test interrupt handler */ 1456 /* Unhook test interrupt handler */
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1be617545dc..e86297b3273 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -184,6 +184,7 @@ static inline void ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
184 vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); 184 vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
185 vmdctl &= ~IXGBE_VT_CTL_POOL_MASK; 185 vmdctl &= ~IXGBE_VT_CTL_POOL_MASK;
186 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl); 186 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
187 IXGBE_WRITE_FLUSH(hw);
187 188
188 /* take a breather then clean up driver data */ 189 /* take a breather then clean up driver data */
189 msleep(100); 190 msleep(100);
@@ -1005,7 +1006,7 @@ static int __ixgbe_notify_dca(struct device *dev, void *data)
1005 struct ixgbe_adapter *adapter = dev_get_drvdata(dev); 1006 struct ixgbe_adapter *adapter = dev_get_drvdata(dev);
1006 unsigned long event = *(unsigned long *)data; 1007 unsigned long event = *(unsigned long *)data;
1007 1008
1008 if (!(adapter->flags & IXGBE_FLAG_DCA_ENABLED)) 1009 if (!(adapter->flags & IXGBE_FLAG_DCA_CAPABLE))
1009 return 0; 1010 return 0;
1010 1011
1011 switch (event) { 1012 switch (event) {
diff --git a/drivers/net/ixgbe/ixgbe_phy.c b/drivers/net/ixgbe/ixgbe_phy.c
index 735f686c3b3..f7ca3511b9f 100644
--- a/drivers/net/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ixgbe/ixgbe_phy.c
@@ -1585,6 +1585,7 @@ static s32 ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
1585 *i2cctl |= IXGBE_I2C_CLK_OUT; 1585 *i2cctl |= IXGBE_I2C_CLK_OUT;
1586 1586
1587 IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); 1587 IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
1588 IXGBE_WRITE_FLUSH(hw);
1588 1589
1589 /* SCL rise time (1000ns) */ 1590 /* SCL rise time (1000ns) */
1590 udelay(IXGBE_I2C_T_RISE); 1591 udelay(IXGBE_I2C_T_RISE);
@@ -1605,6 +1606,7 @@ static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
1605 *i2cctl &= ~IXGBE_I2C_CLK_OUT; 1606 *i2cctl &= ~IXGBE_I2C_CLK_OUT;
1606 1607
1607 IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); 1608 IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
1609 IXGBE_WRITE_FLUSH(hw);
1608 1610
1609 /* SCL fall time (300ns) */ 1611 /* SCL fall time (300ns) */
1610 udelay(IXGBE_I2C_T_FALL); 1612 udelay(IXGBE_I2C_T_FALL);
@@ -1628,6 +1630,7 @@ static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
1628 *i2cctl &= ~IXGBE_I2C_DATA_OUT; 1630 *i2cctl &= ~IXGBE_I2C_DATA_OUT;
1629 1631
1630 IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); 1632 IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl);
1633 IXGBE_WRITE_FLUSH(hw);
1631 1634
1632 /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ 1635 /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
1633 udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA); 1636 udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA);
diff --git a/drivers/net/ixgbe/ixgbe_x540.c b/drivers/net/ixgbe/ixgbe_x540.c
index bec30ed91ad..2696c78e9f4 100644
--- a/drivers/net/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ixgbe/ixgbe_x540.c
@@ -162,6 +162,7 @@ mac_reset_top:
162 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); 162 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
163 ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; 163 ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
164 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); 164 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
165 IXGBE_WRITE_FLUSH(hw);
165 166
166 msleep(50); 167 msleep(50);
167 168
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 0fcdc25699d..dc4e305a108 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -322,6 +322,9 @@ static void macb_tx(struct macb *bp)
322 for (i = 0; i < TX_RING_SIZE; i++) 322 for (i = 0; i < TX_RING_SIZE; i++)
323 bp->tx_ring[i].ctrl = MACB_BIT(TX_USED); 323 bp->tx_ring[i].ctrl = MACB_BIT(TX_USED);
324 324
325 /* Add wrap bit */
326 bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
327
325 /* free transmit buffer in upper layer*/ 328 /* free transmit buffer in upper layer*/
326 for (tail = bp->tx_tail; tail != head; tail = NEXT_TX(tail)) { 329 for (tail = bp->tx_tail; tail != head; tail = NEXT_TX(tail)) {
327 struct ring_info *rp = &bp->tx_skb[tail]; 330 struct ring_info *rp = &bp->tx_skb[tail];
diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c
index 5e710917806..5ada5b46911 100644
--- a/drivers/net/mlx4/en_port.c
+++ b/drivers/net/mlx4/en_port.c
@@ -128,7 +128,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
128 memset(context, 0, sizeof *context); 128 memset(context, 0, sizeof *context);
129 129
130 context->base_qpn = cpu_to_be32(base_qpn); 130 context->base_qpn = cpu_to_be32(base_qpn);
131 context->n_mac = 0x7; 131 context->n_mac = 0x2;
132 context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT | 132 context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
133 base_qpn); 133 base_qpn);
134 context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT | 134 context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index c94b3426d35..f0ee35df4dd 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -1117,6 +1117,8 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
1117 info->port = port; 1117 info->port = port;
1118 mlx4_init_mac_table(dev, &info->mac_table); 1118 mlx4_init_mac_table(dev, &info->mac_table);
1119 mlx4_init_vlan_table(dev, &info->vlan_table); 1119 mlx4_init_vlan_table(dev, &info->vlan_table);
1120 info->base_qpn = dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
1121 (port - 1) * (1 << log_num_mac);
1120 1122
1121 sprintf(info->dev_name, "mlx4_port%d", port); 1123 sprintf(info->dev_name, "mlx4_port%d", port);
1122 info->port_attr.attr.name = info->dev_name; 1124 info->port_attr.attr.name = info->dev_name;
diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c
index 1f95afda684..609e0ec14ce 100644
--- a/drivers/net/mlx4/port.c
+++ b/drivers/net/mlx4/port.c
@@ -258,9 +258,12 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn)
258 if (validate_index(dev, table, index)) 258 if (validate_index(dev, table, index))
259 goto out; 259 goto out;
260 260
261 table->entries[index] = 0; 261 /* Check whether this address has reference count */
262 mlx4_set_port_mac_table(dev, port, table->entries); 262 if (!(--table->refs[index])) {
263 --table->total; 263 table->entries[index] = 0;
264 mlx4_set_port_mac_table(dev, port, table->entries);
265 --table->total;
266 }
264out: 267out:
265 mutex_unlock(&table->mutex); 268 mutex_unlock(&table->mutex);
266} 269}
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index cd6c2317e29..ed47585a686 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -9201,7 +9201,7 @@ static int __devinit niu_ldg_init(struct niu *np)
9201 9201
9202 first_chan = 0; 9202 first_chan = 0;
9203 for (i = 0; i < port; i++) 9203 for (i = 0; i < port; i++)
9204 first_chan += parent->rxchan_per_port[port]; 9204 first_chan += parent->rxchan_per_port[i];
9205 num_chan = parent->rxchan_per_port[port]; 9205 num_chan = parent->rxchan_per_port[port];
9206 9206
9207 for (i = first_chan; i < (first_chan + num_chan); i++) { 9207 for (i = first_chan; i < (first_chan + num_chan); i++) {
@@ -9217,7 +9217,7 @@ static int __devinit niu_ldg_init(struct niu *np)
9217 9217
9218 first_chan = 0; 9218 first_chan = 0;
9219 for (i = 0; i < port; i++) 9219 for (i = 0; i < port; i++)
9220 first_chan += parent->txchan_per_port[port]; 9220 first_chan += parent->txchan_per_port[i];
9221 num_chan = parent->txchan_per_port[port]; 9221 num_chan = parent->txchan_per_port[port];
9222 for (i = first_chan; i < (first_chan + num_chan); i++) { 9222 for (i = first_chan; i < (first_chan + num_chan); i++) {
9223 err = niu_ldg_assign_ldn(np, parent, 9223 err = niu_ldg_assign_ldn(np, parent,
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 7d9c650f395..02339b3352e 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -239,6 +239,7 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = {
239 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, 239 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 },
240 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, 240 { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 },
241 { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 }, 241 { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 },
242 { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4302), 0, 0, RTL_CFG_0 },
242 { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 }, 243 { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 },
243 { PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 }, 244 { PCI_DEVICE(0x16ec, 0x0116), 0, 0, RTL_CFG_0 },
244 { PCI_VENDOR_ID_LINKSYS, 0x1032, 245 { PCI_VENDOR_ID_LINKSYS, 0x1032,
@@ -1091,6 +1092,21 @@ rtl_w1w0_eri(void __iomem *ioaddr, int addr, u32 mask, u32 p, u32 m, int type)
1091 rtl_eri_write(ioaddr, addr, mask, (val & ~m) | p, type); 1092 rtl_eri_write(ioaddr, addr, mask, (val & ~m) | p, type);
1092} 1093}
1093 1094
1095struct exgmac_reg {
1096 u16 addr;
1097 u16 mask;
1098 u32 val;
1099};
1100
1101static void rtl_write_exgmac_batch(void __iomem *ioaddr,
1102 const struct exgmac_reg *r, int len)
1103{
1104 while (len-- > 0) {
1105 rtl_eri_write(ioaddr, r->addr, r->mask, r->val, ERIAR_EXGMAC);
1106 r++;
1107 }
1108}
1109
1094static u8 rtl8168d_efuse_read(void __iomem *ioaddr, int reg_addr) 1110static u8 rtl8168d_efuse_read(void __iomem *ioaddr, int reg_addr)
1095{ 1111{
1096 u8 value = 0xff; 1112 u8 value = 0xff;
@@ -3116,6 +3132,18 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
3116 RTL_W32(MAC0, low); 3132 RTL_W32(MAC0, low);
3117 RTL_R32(MAC0); 3133 RTL_R32(MAC0);
3118 3134
3135 if (tp->mac_version == RTL_GIGA_MAC_VER_34) {
3136 const struct exgmac_reg e[] = {
3137 { .addr = 0xe0, ERIAR_MASK_1111, .val = low },
3138 { .addr = 0xe4, ERIAR_MASK_1111, .val = high },
3139 { .addr = 0xf0, ERIAR_MASK_1111, .val = low << 16 },
3140 { .addr = 0xf4, ERIAR_MASK_1111, .val = high << 16 |
3141 low >> 16 },
3142 };
3143
3144 rtl_write_exgmac_batch(ioaddr, e, ARRAY_SIZE(e));
3145 }
3146
3119 RTL_W8(Cfg9346, Cfg9346_Lock); 3147 RTL_W8(Cfg9346, Cfg9346_Lock);
3120 3148
3121 spin_unlock_irq(&tp->lock); 3149 spin_unlock_irq(&tp->lock);
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 8ad7bfbaa3a..3c0f1312b39 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -1825,6 +1825,16 @@ static int sis190_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1825 generic_mii_ioctl(&tp->mii_if, if_mii(ifr), cmd, NULL); 1825 generic_mii_ioctl(&tp->mii_if, if_mii(ifr), cmd, NULL);
1826} 1826}
1827 1827
1828static int sis190_mac_addr(struct net_device *dev, void *p)
1829{
1830 int rc;
1831
1832 rc = eth_mac_addr(dev, p);
1833 if (!rc)
1834 sis190_init_rxfilter(dev);
1835 return rc;
1836}
1837
1828static const struct net_device_ops sis190_netdev_ops = { 1838static const struct net_device_ops sis190_netdev_ops = {
1829 .ndo_open = sis190_open, 1839 .ndo_open = sis190_open,
1830 .ndo_stop = sis190_close, 1840 .ndo_stop = sis190_close,
@@ -1833,7 +1843,7 @@ static const struct net_device_ops sis190_netdev_ops = {
1833 .ndo_tx_timeout = sis190_tx_timeout, 1843 .ndo_tx_timeout = sis190_tx_timeout,
1834 .ndo_set_multicast_list = sis190_set_rx_mode, 1844 .ndo_set_multicast_list = sis190_set_rx_mode,
1835 .ndo_change_mtu = eth_change_mtu, 1845 .ndo_change_mtu = eth_change_mtu,
1836 .ndo_set_mac_address = eth_mac_addr, 1846 .ndo_set_mac_address = sis190_mac_addr,
1837 .ndo_validate_addr = eth_validate_addr, 1847 .ndo_validate_addr = eth_validate_addr,
1838#ifdef CONFIG_NET_POLL_CONTROLLER 1848#ifdef CONFIG_NET_POLL_CONTROLLER
1839 .ndo_poll_controller = sis190_netpoll, 1849 .ndo_poll_controller = sis190_netpoll,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index fd622a66ebb..a03336e086d 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -53,7 +53,7 @@
53#include <linux/usb/usbnet.h> 53#include <linux/usb/usbnet.h>
54#include <linux/usb/cdc.h> 54#include <linux/usb/cdc.h>
55 55
56#define DRIVER_VERSION "01-June-2011" 56#define DRIVER_VERSION "04-Aug-2011"
57 57
58/* CDC NCM subclass 3.2.1 */ 58/* CDC NCM subclass 3.2.1 */
59#define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 59#define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10
@@ -163,35 +163,8 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
163 usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info)); 163 usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
164} 164}
165 165
166static int
167cdc_ncm_do_request(struct cdc_ncm_ctx *ctx, struct usb_cdc_notification *req,
168 void *data, u16 flags, u16 *actlen, u16 timeout)
169{
170 int err;
171
172 err = usb_control_msg(ctx->udev, (req->bmRequestType & USB_DIR_IN) ?
173 usb_rcvctrlpipe(ctx->udev, 0) :
174 usb_sndctrlpipe(ctx->udev, 0),
175 req->bNotificationType, req->bmRequestType,
176 req->wValue,
177 req->wIndex, data,
178 req->wLength, timeout);
179
180 if (err < 0) {
181 if (actlen)
182 *actlen = 0;
183 return err;
184 }
185
186 if (actlen)
187 *actlen = err;
188
189 return 0;
190}
191
192static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) 166static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
193{ 167{
194 struct usb_cdc_notification req;
195 u32 val; 168 u32 val;
196 u8 flags; 169 u8 flags;
197 u8 iface_no; 170 u8 iface_no;
@@ -200,14 +173,14 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
200 173
201 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; 174 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
202 175
203 req.bmRequestType = USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE; 176 err = usb_control_msg(ctx->udev,
204 req.bNotificationType = USB_CDC_GET_NTB_PARAMETERS; 177 usb_rcvctrlpipe(ctx->udev, 0),
205 req.wValue = 0; 178 USB_CDC_GET_NTB_PARAMETERS,
206 req.wIndex = cpu_to_le16(iface_no); 179 USB_TYPE_CLASS | USB_DIR_IN
207 req.wLength = cpu_to_le16(sizeof(ctx->ncm_parm)); 180 | USB_RECIP_INTERFACE,
208 181 0, iface_no, &ctx->ncm_parm,
209 err = cdc_ncm_do_request(ctx, &req, &ctx->ncm_parm, 0, NULL, 1000); 182 sizeof(ctx->ncm_parm), 10000);
210 if (err) { 183 if (err < 0) {
211 pr_debug("failed GET_NTB_PARAMETERS\n"); 184 pr_debug("failed GET_NTB_PARAMETERS\n");
212 return 1; 185 return 1;
213 } 186 }
@@ -253,31 +226,26 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
253 226
254 /* inform device about NTB input size changes */ 227 /* inform device about NTB input size changes */
255 if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { 228 if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
256 req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT |
257 USB_RECIP_INTERFACE;
258 req.bNotificationType = USB_CDC_SET_NTB_INPUT_SIZE;
259 req.wValue = 0;
260 req.wIndex = cpu_to_le16(iface_no);
261 229
262 if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) { 230 if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) {
263 struct usb_cdc_ncm_ndp_input_size ndp_in_sz; 231 struct usb_cdc_ncm_ndp_input_size ndp_in_sz;
264 232 err = usb_control_msg(ctx->udev,
265 req.wLength = 8; 233 usb_sndctrlpipe(ctx->udev, 0),
266 ndp_in_sz.dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); 234 USB_CDC_SET_NTB_INPUT_SIZE,
267 ndp_in_sz.wNtbInMaxDatagrams = 235 USB_TYPE_CLASS | USB_DIR_OUT
268 cpu_to_le16(CDC_NCM_DPT_DATAGRAMS_MAX); 236 | USB_RECIP_INTERFACE,
269 ndp_in_sz.wReserved = 0; 237 0, iface_no, &ndp_in_sz, 8, 1000);
270 err = cdc_ncm_do_request(ctx, &req, &ndp_in_sz, 0, NULL,
271 1000);
272 } else { 238 } else {
273 __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); 239 __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
274 240 err = usb_control_msg(ctx->udev,
275 req.wLength = 4; 241 usb_sndctrlpipe(ctx->udev, 0),
276 err = cdc_ncm_do_request(ctx, &req, &dwNtbInMaxSize, 0, 242 USB_CDC_SET_NTB_INPUT_SIZE,
277 NULL, 1000); 243 USB_TYPE_CLASS | USB_DIR_OUT
244 | USB_RECIP_INTERFACE,
245 0, iface_no, &dwNtbInMaxSize, 4, 1000);
278 } 246 }
279 247
280 if (err) 248 if (err < 0)
281 pr_debug("Setting NTB Input Size failed\n"); 249 pr_debug("Setting NTB Input Size failed\n");
282 } 250 }
283 251
@@ -332,29 +300,24 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
332 300
333 /* set CRC Mode */ 301 /* set CRC Mode */
334 if (flags & USB_CDC_NCM_NCAP_CRC_MODE) { 302 if (flags & USB_CDC_NCM_NCAP_CRC_MODE) {
335 req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | 303 err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0),
336 USB_RECIP_INTERFACE; 304 USB_CDC_SET_CRC_MODE,
337 req.bNotificationType = USB_CDC_SET_CRC_MODE; 305 USB_TYPE_CLASS | USB_DIR_OUT
338 req.wValue = cpu_to_le16(USB_CDC_NCM_CRC_NOT_APPENDED); 306 | USB_RECIP_INTERFACE,
339 req.wIndex = cpu_to_le16(iface_no); 307 USB_CDC_NCM_CRC_NOT_APPENDED,
340 req.wLength = 0; 308 iface_no, NULL, 0, 1000);
341 309 if (err < 0)
342 err = cdc_ncm_do_request(ctx, &req, NULL, 0, NULL, 1000);
343 if (err)
344 pr_debug("Setting CRC mode off failed\n"); 310 pr_debug("Setting CRC mode off failed\n");
345 } 311 }
346 312
347 /* set NTB format, if both formats are supported */ 313 /* set NTB format, if both formats are supported */
348 if (ntb_fmt_supported & USB_CDC_NCM_NTH32_SIGN) { 314 if (ntb_fmt_supported & USB_CDC_NCM_NTH32_SIGN) {
349 req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | 315 err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0),
350 USB_RECIP_INTERFACE; 316 USB_CDC_SET_NTB_FORMAT, USB_TYPE_CLASS
351 req.bNotificationType = USB_CDC_SET_NTB_FORMAT; 317 | USB_DIR_OUT | USB_RECIP_INTERFACE,
352 req.wValue = cpu_to_le16(USB_CDC_NCM_NTB16_FORMAT); 318 USB_CDC_NCM_NTB16_FORMAT,
353 req.wIndex = cpu_to_le16(iface_no); 319 iface_no, NULL, 0, 1000);
354 req.wLength = 0; 320 if (err < 0)
355
356 err = cdc_ncm_do_request(ctx, &req, NULL, 0, NULL, 1000);
357 if (err)
358 pr_debug("Setting NTB format to 16-bit failed\n"); 321 pr_debug("Setting NTB format to 16-bit failed\n");
359 } 322 }
360 323
@@ -364,17 +327,13 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
364 if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) { 327 if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) {
365 __le16 max_datagram_size; 328 __le16 max_datagram_size;
366 u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); 329 u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
367 330 err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0),
368 req.bmRequestType = USB_TYPE_CLASS | USB_DIR_IN | 331 USB_CDC_GET_MAX_DATAGRAM_SIZE,
369 USB_RECIP_INTERFACE; 332 USB_TYPE_CLASS | USB_DIR_IN
370 req.bNotificationType = USB_CDC_GET_MAX_DATAGRAM_SIZE; 333 | USB_RECIP_INTERFACE,
371 req.wValue = 0; 334 0, iface_no, &max_datagram_size,
372 req.wIndex = cpu_to_le16(iface_no); 335 2, 1000);
373 req.wLength = cpu_to_le16(2); 336 if (err < 0) {
374
375 err = cdc_ncm_do_request(ctx, &req, &max_datagram_size, 0, NULL,
376 1000);
377 if (err) {
378 pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n", 337 pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n",
379 CDC_NCM_MIN_DATAGRAM_SIZE); 338 CDC_NCM_MIN_DATAGRAM_SIZE);
380 } else { 339 } else {
@@ -395,17 +354,15 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
395 CDC_NCM_MIN_DATAGRAM_SIZE; 354 CDC_NCM_MIN_DATAGRAM_SIZE;
396 355
397 /* if value changed, update device */ 356 /* if value changed, update device */
398 req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | 357 err = usb_control_msg(ctx->udev,
399 USB_RECIP_INTERFACE; 358 usb_sndctrlpipe(ctx->udev, 0),
400 req.bNotificationType = USB_CDC_SET_MAX_DATAGRAM_SIZE; 359 USB_CDC_SET_MAX_DATAGRAM_SIZE,
401 req.wValue = 0; 360 USB_TYPE_CLASS | USB_DIR_OUT
402 req.wIndex = cpu_to_le16(iface_no); 361 | USB_RECIP_INTERFACE,
403 req.wLength = 2; 362 0,
404 max_datagram_size = cpu_to_le16(ctx->max_datagram_size); 363 iface_no, &max_datagram_size,
405 364 2, 1000);
406 err = cdc_ncm_do_request(ctx, &req, &max_datagram_size, 365 if (err < 0)
407 0, NULL, 1000);
408 if (err)
409 pr_debug("SET_MAX_DATAGRAM_SIZE failed\n"); 366 pr_debug("SET_MAX_DATAGRAM_SIZE failed\n");
410 } 367 }
411 368
@@ -671,7 +628,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb)
671 u32 rem; 628 u32 rem;
672 u32 offset; 629 u32 offset;
673 u32 last_offset; 630 u32 last_offset;
674 u16 n = 0; 631 u16 n = 0, index;
675 u8 ready2send = 0; 632 u8 ready2send = 0;
676 633
677 /* if there is a remaining skb, it gets priority */ 634 /* if there is a remaining skb, it gets priority */
@@ -859,8 +816,8 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb)
859 cpu_to_le16(sizeof(ctx->tx_ncm.nth16)); 816 cpu_to_le16(sizeof(ctx->tx_ncm.nth16));
860 ctx->tx_ncm.nth16.wSequence = cpu_to_le16(ctx->tx_seq); 817 ctx->tx_ncm.nth16.wSequence = cpu_to_le16(ctx->tx_seq);
861 ctx->tx_ncm.nth16.wBlockLength = cpu_to_le16(last_offset); 818 ctx->tx_ncm.nth16.wBlockLength = cpu_to_le16(last_offset);
862 ctx->tx_ncm.nth16.wNdpIndex = ALIGN(sizeof(struct usb_cdc_ncm_nth16), 819 index = ALIGN(sizeof(struct usb_cdc_ncm_nth16), ctx->tx_ndp_modulus);
863 ctx->tx_ndp_modulus); 820 ctx->tx_ncm.nth16.wNdpIndex = cpu_to_le16(index);
864 821
865 memcpy(skb_out->data, &(ctx->tx_ncm.nth16), sizeof(ctx->tx_ncm.nth16)); 822 memcpy(skb_out->data, &(ctx->tx_ncm.nth16), sizeof(ctx->tx_ncm.nth16));
866 ctx->tx_seq++; 823 ctx->tx_seq++;
@@ -873,12 +830,11 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb)
873 ctx->tx_ncm.ndp16.wLength = cpu_to_le16(rem); 830 ctx->tx_ncm.ndp16.wLength = cpu_to_le16(rem);
874 ctx->tx_ncm.ndp16.wNextNdpIndex = 0; /* reserved */ 831 ctx->tx_ncm.ndp16.wNextNdpIndex = 0; /* reserved */
875 832
876 memcpy(((u8 *)skb_out->data) + ctx->tx_ncm.nth16.wNdpIndex, 833 memcpy(((u8 *)skb_out->data) + index,
877 &(ctx->tx_ncm.ndp16), 834 &(ctx->tx_ncm.ndp16),
878 sizeof(ctx->tx_ncm.ndp16)); 835 sizeof(ctx->tx_ncm.ndp16));
879 836
880 memcpy(((u8 *)skb_out->data) + ctx->tx_ncm.nth16.wNdpIndex + 837 memcpy(((u8 *)skb_out->data) + index + sizeof(ctx->tx_ncm.ndp16),
881 sizeof(ctx->tx_ncm.ndp16),
882 &(ctx->tx_ncm.dpe16), 838 &(ctx->tx_ncm.dpe16),
883 (ctx->tx_curr_frame_num + 1) * 839 (ctx->tx_curr_frame_num + 1) *
884 sizeof(struct usb_cdc_ncm_dpe16)); 840 sizeof(struct usb_cdc_ncm_dpe16));
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_hw.c b/drivers/net/wireless/ath/ath9k/ar9002_hw.c
index 9ff7c30573b..44d9d8d5649 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_hw.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_hw.c
@@ -309,11 +309,7 @@ static void ar9002_hw_configpcipowersave(struct ath_hw *ah,
309 u8 i; 309 u8 i;
310 u32 val; 310 u32 val;
311 311
312 if (ah->is_pciexpress != true) 312 if (ah->is_pciexpress != true || ah->aspm_enabled != true)
313 return;
314
315 /* Do not touch SerDes registers */
316 if (ah->config.pcie_powersave_enable == 2)
317 return; 313 return;
318 314
319 /* Nothing to do on restore for 11N */ 315 /* Nothing to do on restore for 11N */
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
index 8efdec247c0..ad2bb2bf4e8 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c
@@ -519,11 +519,7 @@ static void ar9003_hw_configpcipowersave(struct ath_hw *ah,
519 int restore, 519 int restore,
520 int power_off) 520 int power_off)
521{ 521{
522 if (ah->is_pciexpress != true) 522 if (ah->is_pciexpress != true || ah->aspm_enabled != true)
523 return;
524
525 /* Do not touch SerDes registers */
526 if (ah->config.pcie_powersave_enable == 2)
527 return; 523 return;
528 524
529 /* Nothing to do on restore for 11N */ 525 /* Nothing to do on restore for 11N */
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 8006ce0c735..8dcefe74f4c 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -318,6 +318,14 @@ static void ath9k_hw_disablepcie(struct ath_hw *ah)
318 REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000); 318 REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000);
319} 319}
320 320
321static void ath9k_hw_aspm_init(struct ath_hw *ah)
322{
323 struct ath_common *common = ath9k_hw_common(ah);
324
325 if (common->bus_ops->aspm_init)
326 common->bus_ops->aspm_init(common);
327}
328
321/* This should work for all families including legacy */ 329/* This should work for all families including legacy */
322static bool ath9k_hw_chip_test(struct ath_hw *ah) 330static bool ath9k_hw_chip_test(struct ath_hw *ah)
323{ 331{
@@ -378,7 +386,6 @@ static void ath9k_hw_init_config(struct ath_hw *ah)
378 ah->config.additional_swba_backoff = 0; 386 ah->config.additional_swba_backoff = 0;
379 ah->config.ack_6mb = 0x0; 387 ah->config.ack_6mb = 0x0;
380 ah->config.cwm_ignore_extcca = 0; 388 ah->config.cwm_ignore_extcca = 0;
381 ah->config.pcie_powersave_enable = 0;
382 ah->config.pcie_clock_req = 0; 389 ah->config.pcie_clock_req = 0;
383 ah->config.pcie_waen = 0; 390 ah->config.pcie_waen = 0;
384 ah->config.analog_shiftreg = 1; 391 ah->config.analog_shiftreg = 1;
@@ -598,7 +605,7 @@ static int __ath9k_hw_init(struct ath_hw *ah)
598 605
599 606
600 if (ah->is_pciexpress) 607 if (ah->is_pciexpress)
601 ath9k_hw_configpcipowersave(ah, 0, 0); 608 ath9k_hw_aspm_init(ah);
602 else 609 else
603 ath9k_hw_disablepcie(ah); 610 ath9k_hw_disablepcie(ah);
604 611
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 6acd0f975ae..c79889036ec 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -219,7 +219,6 @@ struct ath9k_ops_config {
219 int additional_swba_backoff; 219 int additional_swba_backoff;
220 int ack_6mb; 220 int ack_6mb;
221 u32 cwm_ignore_extcca; 221 u32 cwm_ignore_extcca;
222 u8 pcie_powersave_enable;
223 bool pcieSerDesWrite; 222 bool pcieSerDesWrite;
224 u8 pcie_clock_req; 223 u8 pcie_clock_req;
225 u32 pcie_waen; 224 u32 pcie_waen;
@@ -673,6 +672,7 @@ struct ath_hw {
673 672
674 bool sw_mgmt_crypto; 673 bool sw_mgmt_crypto;
675 bool is_pciexpress; 674 bool is_pciexpress;
675 bool aspm_enabled;
676 bool is_monitoring; 676 bool is_monitoring;
677 bool need_an_top2_fixup; 677 bool need_an_top2_fixup;
678 u16 tx_trig_level; 678 u16 tx_trig_level;
@@ -874,6 +874,7 @@ struct ath_bus_ops {
874 bool (*eeprom_read)(struct ath_common *common, u32 off, u16 *data); 874 bool (*eeprom_read)(struct ath_common *common, u32 off, u16 *data);
875 void (*bt_coex_prep)(struct ath_common *common); 875 void (*bt_coex_prep)(struct ath_common *common);
876 void (*extn_synch_en)(struct ath_common *common); 876 void (*extn_synch_en)(struct ath_common *common);
877 void (*aspm_init)(struct ath_common *common);
877}; 878};
878 879
879static inline struct ath_common *ath9k_hw_common(struct ath_hw *ah) 880static inline struct ath_common *ath9k_hw_common(struct ath_hw *ah)
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index ac5107172f9..aa0ff7e2c92 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -670,8 +670,10 @@ static void ath9k_init_band_txpower(struct ath_softc *sc, int band)
670static void ath9k_init_txpower_limits(struct ath_softc *sc) 670static void ath9k_init_txpower_limits(struct ath_softc *sc)
671{ 671{
672 struct ath_hw *ah = sc->sc_ah; 672 struct ath_hw *ah = sc->sc_ah;
673 struct ath_common *common = ath9k_hw_common(sc->sc_ah);
673 struct ath9k_channel *curchan = ah->curchan; 674 struct ath9k_channel *curchan = ah->curchan;
674 675
676 ah->txchainmask = common->tx_chainmask;
675 if (ah->caps.hw_caps & ATH9K_HW_CAP_2GHZ) 677 if (ah->caps.hw_caps & ATH9K_HW_CAP_2GHZ)
676 ath9k_init_band_txpower(sc, IEEE80211_BAND_2GHZ); 678 ath9k_init_band_txpower(sc, IEEE80211_BAND_2GHZ);
677 if (ah->caps.hw_caps & ATH9K_HW_CAP_5GHZ) 679 if (ah->caps.hw_caps & ATH9K_HW_CAP_5GHZ)
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index 3bad0b2cf9a..be4ea132981 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/nl80211.h> 17#include <linux/nl80211.h>
18#include <linux/pci.h> 18#include <linux/pci.h>
19#include <linux/pci-aspm.h>
19#include <linux/ath9k_platform.h> 20#include <linux/ath9k_platform.h>
20#include "ath9k.h" 21#include "ath9k.h"
21 22
@@ -115,12 +116,38 @@ static void ath_pci_extn_synch_enable(struct ath_common *common)
115 pci_write_config_byte(pdev, sc->sc_ah->caps.pcie_lcr_offset, lnkctl); 116 pci_write_config_byte(pdev, sc->sc_ah->caps.pcie_lcr_offset, lnkctl);
116} 117}
117 118
119static void ath_pci_aspm_init(struct ath_common *common)
120{
121 struct ath_softc *sc = (struct ath_softc *) common->priv;
122 struct ath_hw *ah = sc->sc_ah;
123 struct pci_dev *pdev = to_pci_dev(sc->dev);
124 struct pci_dev *parent;
125 int pos;
126 u8 aspm;
127
128 if (!pci_is_pcie(pdev))
129 return;
130
131 parent = pdev->bus->self;
132 if (WARN_ON(!parent))
133 return;
134
135 pos = pci_pcie_cap(parent);
136 pci_read_config_byte(parent, pos + PCI_EXP_LNKCTL, &aspm);
137 if (aspm & (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)) {
138 ah->aspm_enabled = true;
139 /* Initialize PCIe PM and SERDES registers. */
140 ath9k_hw_configpcipowersave(ah, 0, 0);
141 }
142}
143
118static const struct ath_bus_ops ath_pci_bus_ops = { 144static const struct ath_bus_ops ath_pci_bus_ops = {
119 .ath_bus_type = ATH_PCI, 145 .ath_bus_type = ATH_PCI,
120 .read_cachesize = ath_pci_read_cachesize, 146 .read_cachesize = ath_pci_read_cachesize,
121 .eeprom_read = ath_pci_eeprom_read, 147 .eeprom_read = ath_pci_eeprom_read,
122 .bt_coex_prep = ath_pci_bt_coex_prep, 148 .bt_coex_prep = ath_pci_bt_coex_prep,
123 .extn_synch_en = ath_pci_extn_synch_enable, 149 .extn_synch_en = ath_pci_extn_synch_enable,
150 .aspm_init = ath_pci_aspm_init,
124}; 151};
125 152
126static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 153static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/wireless/iwlegacy/iwl-3945.c b/drivers/net/wireless/iwlegacy/iwl-3945.c
index dab67a12d73..73fe3cdf796 100644
--- a/drivers/net/wireless/iwlegacy/iwl-3945.c
+++ b/drivers/net/wireless/iwlegacy/iwl-3945.c
@@ -1746,7 +1746,11 @@ int iwl3945_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *ctx)
1746 } 1746 }
1747 1747
1748 memcpy(active_rxon, staging_rxon, sizeof(*active_rxon)); 1748 memcpy(active_rxon, staging_rxon, sizeof(*active_rxon));
1749 1749 /*
1750 * We do not commit tx power settings while channel changing,
1751 * do it now if tx power changed.
1752 */
1753 iwl_legacy_set_tx_power(priv, priv->tx_power_next, false);
1750 return 0; 1754 return 0;
1751 } 1755 }
1752 1756
diff --git a/drivers/net/wireless/iwlegacy/iwl-4965.c b/drivers/net/wireless/iwlegacy/iwl-4965.c
index bd4b000733f..ecdc6e55742 100644
--- a/drivers/net/wireless/iwlegacy/iwl-4965.c
+++ b/drivers/net/wireless/iwlegacy/iwl-4965.c
@@ -1235,7 +1235,12 @@ static int iwl4965_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *c
1235 1235
1236 memcpy(active_rxon, &ctx->staging, sizeof(*active_rxon)); 1236 memcpy(active_rxon, &ctx->staging, sizeof(*active_rxon));
1237 iwl_legacy_print_rx_config_cmd(priv, ctx); 1237 iwl_legacy_print_rx_config_cmd(priv, ctx);
1238 goto set_tx_power; 1238 /*
1239 * We do not commit tx power settings while channel changing,
1240 * do it now if tx power changed.
1241 */
1242 iwl_legacy_set_tx_power(priv, priv->tx_power_next, false);
1243 return 0;
1239 } 1244 }
1240 1245
1241 /* If we are currently associated and the new config requires 1246 /* If we are currently associated and the new config requires
@@ -1315,7 +1320,6 @@ static int iwl4965_commit_rxon(struct iwl_priv *priv, struct iwl_rxon_context *c
1315 1320
1316 iwl4965_init_sensitivity(priv); 1321 iwl4965_init_sensitivity(priv);
1317 1322
1318set_tx_power:
1319 /* If we issue a new RXON command which required a tune then we must 1323 /* If we issue a new RXON command which required a tune then we must
1320 * send a new TXPOWER command or we won't be able to Tx any frames */ 1324 * send a new TXPOWER command or we won't be able to Tx any frames */
1321 ret = iwl_legacy_set_tx_power(priv, priv->tx_power_next, true); 1325 ret = iwl_legacy_set_tx_power(priv, priv->tx_power_next, true);
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 3eeb12ebe6e..c95cefd529d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -365,6 +365,7 @@ static struct iwl_base_params iwl5000_base_params = {
365 .chain_noise_scale = 1000, 365 .chain_noise_scale = 1000,
366 .wd_timeout = IWL_LONG_WD_TIMEOUT, 366 .wd_timeout = IWL_LONG_WD_TIMEOUT,
367 .max_event_log_size = 512, 367 .max_event_log_size = 512,
368 .no_idle_support = true,
368}; 369};
369static struct iwl_ht_params iwl5000_ht_params = { 370static struct iwl_ht_params iwl5000_ht_params = {
370 .ht_greenfield_support = true, 371 .ht_greenfield_support = true,
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index 3e6bb734dcb..02817a43855 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -135,6 +135,7 @@ struct iwl_mod_params {
135 * @temperature_kelvin: temperature report by uCode in kelvin 135 * @temperature_kelvin: temperature report by uCode in kelvin
136 * @max_event_log_size: size of event log buffer size for ucode event logging 136 * @max_event_log_size: size of event log buffer size for ucode event logging
137 * @shadow_reg_enable: HW shadhow register bit 137 * @shadow_reg_enable: HW shadhow register bit
138 * @no_idle_support: do not support idle mode
138 */ 139 */
139struct iwl_base_params { 140struct iwl_base_params {
140 int eeprom_size; 141 int eeprom_size;
@@ -156,6 +157,7 @@ struct iwl_base_params {
156 bool temperature_kelvin; 157 bool temperature_kelvin;
157 u32 max_event_log_size; 158 u32 max_event_log_size;
158 const bool shadow_reg_enable; 159 const bool shadow_reg_enable;
160 const bool no_idle_support;
159}; 161};
160/* 162/*
161 * @advanced_bt_coexist: support advanced bt coexist 163 * @advanced_bt_coexist: support advanced bt coexist
diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c
index fb7e436b40c..69d4ec467dc 100644
--- a/drivers/net/wireless/iwlwifi/iwl-pci.c
+++ b/drivers/net/wireless/iwlwifi/iwl-pci.c
@@ -134,6 +134,7 @@ static void iwl_pci_apm_config(struct iwl_bus *bus)
134static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data) 134static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data)
135{ 135{
136 bus->drv_data = drv_data; 136 bus->drv_data = drv_data;
137 pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data);
137} 138}
138 139
139static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[], 140static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[],
@@ -454,8 +455,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
454 pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); 455 pci_write_config_word(pdev, PCI_COMMAND, pci_cmd);
455 } 456 }
456 457
457 pci_set_drvdata(pdev, bus);
458
459 bus->dev = &pdev->dev; 458 bus->dev = &pdev->dev;
460 bus->irq = pdev->irq; 459 bus->irq = pdev->irq;
461 bus->ops = &pci_ops; 460 bus->ops = &pci_ops;
@@ -494,11 +493,12 @@ static void iwl_pci_down(struct iwl_bus *bus)
494 493
495static void __devexit iwl_pci_remove(struct pci_dev *pdev) 494static void __devexit iwl_pci_remove(struct pci_dev *pdev)
496{ 495{
497 struct iwl_bus *bus = pci_get_drvdata(pdev); 496 struct iwl_priv *priv = pci_get_drvdata(pdev);
497 void *bus_specific = priv->bus->bus_specific;
498 498
499 iwl_remove(bus->drv_data); 499 iwl_remove(priv);
500 500
501 iwl_pci_down(bus); 501 iwl_pci_down(bus_specific);
502} 502}
503 503
504#ifdef CONFIG_PM 504#ifdef CONFIG_PM
@@ -506,20 +506,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev)
506static int iwl_pci_suspend(struct device *device) 506static int iwl_pci_suspend(struct device *device)
507{ 507{
508 struct pci_dev *pdev = to_pci_dev(device); 508 struct pci_dev *pdev = to_pci_dev(device);
509 struct iwl_bus *bus = pci_get_drvdata(pdev); 509 struct iwl_priv *priv = pci_get_drvdata(pdev);
510 510
511 /* Before you put code here, think about WoWLAN. You cannot check here 511 /* Before you put code here, think about WoWLAN. You cannot check here
512 * whether WoWLAN is enabled or not, and your code will run even if 512 * whether WoWLAN is enabled or not, and your code will run even if
513 * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx. 513 * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx.
514 */ 514 */
515 515
516 return iwl_suspend(bus->drv_data); 516 return iwl_suspend(priv);
517} 517}
518 518
519static int iwl_pci_resume(struct device *device) 519static int iwl_pci_resume(struct device *device)
520{ 520{
521 struct pci_dev *pdev = to_pci_dev(device); 521 struct pci_dev *pdev = to_pci_dev(device);
522 struct iwl_bus *bus = pci_get_drvdata(pdev); 522 struct iwl_priv *priv = pci_get_drvdata(pdev);
523 523
524 /* Before you put code here, think about WoWLAN. You cannot check here 524 /* Before you put code here, think about WoWLAN. You cannot check here
525 * whether WoWLAN is enabled or not, and your code will run even if 525 * whether WoWLAN is enabled or not, and your code will run even if
@@ -532,7 +532,7 @@ static int iwl_pci_resume(struct device *device)
532 */ 532 */
533 pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); 533 pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00);
534 534
535 return iwl_resume(bus->drv_data); 535 return iwl_resume(priv);
536} 536}
537 537
538static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); 538static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume);
diff --git a/drivers/net/wireless/iwlwifi/iwl-power.c b/drivers/net/wireless/iwlwifi/iwl-power.c
index 3ec619c6881..cd64df05f9e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-power.c
+++ b/drivers/net/wireless/iwlwifi/iwl-power.c
@@ -349,7 +349,8 @@ static void iwl_power_build_cmd(struct iwl_priv *priv,
349 349
350 if (priv->wowlan) 350 if (priv->wowlan)
351 iwl_static_sleep_cmd(priv, cmd, IWL_POWER_INDEX_5, dtimper); 351 iwl_static_sleep_cmd(priv, cmd, IWL_POWER_INDEX_5, dtimper);
352 else if (priv->hw->conf.flags & IEEE80211_CONF_IDLE) 352 else if (!priv->cfg->base_params->no_idle_support &&
353 priv->hw->conf.flags & IEEE80211_CONF_IDLE)
353 iwl_static_sleep_cmd(priv, cmd, IWL_POWER_INDEX_5, 20); 354 iwl_static_sleep_cmd(priv, cmd, IWL_POWER_INDEX_5, 20);
354 else if (iwl_tt_is_low_power_state(priv)) { 355 else if (iwl_tt_is_low_power_state(priv)) {
355 /* in thermal throttling low power state */ 356 /* in thermal throttling low power state */
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 84ab7d1acb6..ef67f6786a8 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -703,8 +703,7 @@ void rt2800_write_beacon(struct queue_entry *entry, struct txentry_desc *txdesc)
703 /* 703 /*
704 * Add space for the TXWI in front of the skb. 704 * Add space for the TXWI in front of the skb.
705 */ 705 */
706 skb_push(entry->skb, TXWI_DESC_SIZE); 706 memset(skb_push(entry->skb, TXWI_DESC_SIZE), 0, TXWI_DESC_SIZE);
707 memset(entry->skb, 0, TXWI_DESC_SIZE);
708 707
709 /* 708 /*
710 * Register descriptor details in skb frame descriptor. 709 * Register descriptor details in skb frame descriptor.
diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h
index 15cdc7e57fc..4cdf247a870 100644
--- a/drivers/net/wireless/rt2x00/rt2x00lib.h
+++ b/drivers/net/wireless/rt2x00/rt2x00lib.h
@@ -355,7 +355,8 @@ static inline enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *
355 return CIPHER_NONE; 355 return CIPHER_NONE;
356} 356}
357 357
358static inline void rt2x00crypto_create_tx_descriptor(struct queue_entry *entry, 358static inline void rt2x00crypto_create_tx_descriptor(struct rt2x00_dev *rt2x00dev,
359 struct sk_buff *skb,
359 struct txentry_desc *txdesc) 360 struct txentry_desc *txdesc)
360{ 361{
361} 362}
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 8efab398352..4ccf2380597 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -113,7 +113,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
113 * due to possible race conditions in mac80211. 113 * due to possible race conditions in mac80211.
114 */ 114 */
115 if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags)) 115 if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
116 goto exit_fail; 116 goto exit_free_skb;
117 117
118 /* 118 /*
119 * Use the ATIM queue if appropriate and present. 119 * Use the ATIM queue if appropriate and present.
@@ -127,7 +127,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
127 ERROR(rt2x00dev, 127 ERROR(rt2x00dev,
128 "Attempt to send packet over invalid queue %d.\n" 128 "Attempt to send packet over invalid queue %d.\n"
129 "Please file bug report to %s.\n", qid, DRV_PROJECT); 129 "Please file bug report to %s.\n", qid, DRV_PROJECT);
130 goto exit_fail; 130 goto exit_free_skb;
131 } 131 }
132 132
133 /* 133 /*
@@ -159,6 +159,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
159 159
160 exit_fail: 160 exit_fail:
161 rt2x00queue_pause_queue(queue); 161 rt2x00queue_pause_queue(queue);
162 exit_free_skb:
162 dev_kfree_skb_any(skb); 163 dev_kfree_skb_any(skb);
163} 164}
164EXPORT_SYMBOL_GPL(rt2x00mac_tx); 165EXPORT_SYMBOL_GPL(rt2x00mac_tx);
diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
index 5efd5783348..56f12358389 100644
--- a/drivers/net/wireless/rtlwifi/pci.c
+++ b/drivers/net/wireless/rtlwifi/pci.c
@@ -1696,15 +1696,17 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev,
1696 pcipriv->ndis_adapter.devnumber = PCI_SLOT(pdev->devfn); 1696 pcipriv->ndis_adapter.devnumber = PCI_SLOT(pdev->devfn);
1697 pcipriv->ndis_adapter.funcnumber = PCI_FUNC(pdev->devfn); 1697 pcipriv->ndis_adapter.funcnumber = PCI_FUNC(pdev->devfn);
1698 1698
1699 /*find bridge info */ 1699 if (bridge_pdev) {
1700 pcipriv->ndis_adapter.pcibridge_vendorid = bridge_pdev->vendor; 1700 /*find bridge info if available */
1701 for (tmp = 0; tmp < PCI_BRIDGE_VENDOR_MAX; tmp++) { 1701 pcipriv->ndis_adapter.pcibridge_vendorid = bridge_pdev->vendor;
1702 if (bridge_pdev->vendor == pcibridge_vendors[tmp]) { 1702 for (tmp = 0; tmp < PCI_BRIDGE_VENDOR_MAX; tmp++) {
1703 pcipriv->ndis_adapter.pcibridge_vendor = tmp; 1703 if (bridge_pdev->vendor == pcibridge_vendors[tmp]) {
1704 RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, 1704 pcipriv->ndis_adapter.pcibridge_vendor = tmp;
1705 ("Pci Bridge Vendor is found index: %d\n", 1705 RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG,
1706 tmp)); 1706 ("Pci Bridge Vendor is found index:"
1707 break; 1707 " %d\n", tmp));
1708 break;
1709 }
1708 } 1710 }
1709 } 1711 }
1710 1712
diff --git a/drivers/of/address.c b/drivers/of/address.c
index da1f4b9605d..72c33fbe451 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -610,6 +610,6 @@ void __iomem *of_iomap(struct device_node *np, int index)
610 if (of_address_to_resource(np, index, &res)) 610 if (of_address_to_resource(np, index, &res))
611 return NULL; 611 return NULL;
612 612
613 return ioremap(res.start, 1 + res.end - res.start); 613 return ioremap(res.start, resource_size(&res));
614} 614}
615EXPORT_SYMBOL(of_iomap); 615EXPORT_SYMBOL(of_iomap);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 02ed36719de..3ff22e32b60 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -610,8 +610,9 @@ EXPORT_SYMBOL(of_find_node_by_phandle);
610 * 610 *
611 * The out_value is modified only if a valid u32 value can be decoded. 611 * The out_value is modified only if a valid u32 value can be decoded.
612 */ 612 */
613int of_property_read_u32_array(const struct device_node *np, char *propname, 613int of_property_read_u32_array(const struct device_node *np,
614 u32 *out_values, size_t sz) 614 const char *propname, u32 *out_values,
615 size_t sz)
615{ 616{
616 struct property *prop = of_find_property(np, propname, NULL); 617 struct property *prop = of_find_property(np, propname, NULL);
617 const __be32 *val; 618 const __be32 *val;
@@ -645,7 +646,7 @@ EXPORT_SYMBOL_GPL(of_property_read_u32_array);
645 * 646 *
646 * The out_string pointer is modified only if a valid string can be decoded. 647 * The out_string pointer is modified only if a valid string can be decoded.
647 */ 648 */
648int of_property_read_string(struct device_node *np, char *propname, 649int of_property_read_string(struct device_node *np, const char *propname,
649 const char **out_string) 650 const char **out_string)
650{ 651{
651 struct property *prop = of_find_property(np, propname, NULL); 652 struct property *prop = of_find_property(np, propname, NULL);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index a70fa89f76f..220285760b6 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val,
110} 110}
111 111
112 112
113static struct acpi_dock_ops acpiphp_dock_ops = { 113static const struct acpi_dock_ops acpiphp_dock_ops = {
114 .handler = handle_hotplug_event_func, 114 .handler = handle_hotplug_event_func,
115}; 115};
116 116
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 45e0191c35d..1e88d478532 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -769,4 +769,12 @@ config INTEL_OAKTRAIL
769 enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y 769 enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y
770 here; it will only load on supported platforms. 770 here; it will only load on supported platforms.
771 771
772config SAMSUNG_Q10
773 tristate "Samsung Q10 Extras"
774 depends on SERIO_I8042
775 select BACKLIGHT_CLASS_DEVICE
776 ---help---
777 This driver provides support for backlight control on Samsung Q10
778 and related laptops, including Dell Latitude X200.
779
772endif # X86_PLATFORM_DEVICES 780endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index afc1f832aa6..293a320d9fa 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -44,3 +44,4 @@ obj-$(CONFIG_SAMSUNG_LAPTOP) += samsung-laptop.o
44obj-$(CONFIG_MXM_WMI) += mxm-wmi.o 44obj-$(CONFIG_MXM_WMI) += mxm-wmi.o
45obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o 45obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o
46obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o 46obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o
47obj-$(CONFIG_SAMSUNG_Q10) += samsung-q10.o
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index e1c4938b301..af2bb20cb2f 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -99,6 +99,7 @@ enum acer_wmi_event_ids {
99static const struct key_entry acer_wmi_keymap[] = { 99static const struct key_entry acer_wmi_keymap[] = {
100 {KE_KEY, 0x01, {KEY_WLAN} }, /* WiFi */ 100 {KE_KEY, 0x01, {KEY_WLAN} }, /* WiFi */
101 {KE_KEY, 0x03, {KEY_WLAN} }, /* WiFi */ 101 {KE_KEY, 0x03, {KEY_WLAN} }, /* WiFi */
102 {KE_KEY, 0x04, {KEY_WLAN} }, /* WiFi */
102 {KE_KEY, 0x12, {KEY_BLUETOOTH} }, /* BT */ 103 {KE_KEY, 0x12, {KEY_BLUETOOTH} }, /* BT */
103 {KE_KEY, 0x21, {KEY_PROG1} }, /* Backup */ 104 {KE_KEY, 0x21, {KEY_PROG1} }, /* Backup */
104 {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */ 105 {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */
@@ -304,6 +305,10 @@ static struct quirk_entry quirk_fujitsu_amilo_li_1718 = {
304 .wireless = 2, 305 .wireless = 2,
305}; 306};
306 307
308static struct quirk_entry quirk_lenovo_ideapad_s205 = {
309 .wireless = 3,
310};
311
307/* The Aspire One has a dummy ACPI-WMI interface - disable it */ 312/* The Aspire One has a dummy ACPI-WMI interface - disable it */
308static struct dmi_system_id __devinitdata acer_blacklist[] = { 313static struct dmi_system_id __devinitdata acer_blacklist[] = {
309 { 314 {
@@ -450,6 +455,15 @@ static struct dmi_system_id acer_quirks[] = {
450 }, 455 },
451 .driver_data = &quirk_medion_md_98300, 456 .driver_data = &quirk_medion_md_98300,
452 }, 457 },
458 {
459 .callback = dmi_matched,
460 .ident = "Lenovo Ideapad S205",
461 .matches = {
462 DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
463 DMI_MATCH(DMI_PRODUCT_NAME, "10382LG"),
464 },
465 .driver_data = &quirk_lenovo_ideapad_s205,
466 },
453 {} 467 {}
454}; 468};
455 469
@@ -542,6 +556,12 @@ struct wmi_interface *iface)
542 return AE_ERROR; 556 return AE_ERROR;
543 *value = result & 0x1; 557 *value = result & 0x1;
544 return AE_OK; 558 return AE_OK;
559 case 3:
560 err = ec_read(0x78, &result);
561 if (err)
562 return AE_ERROR;
563 *value = result & 0x1;
564 return AE_OK;
545 default: 565 default:
546 err = ec_read(0xA, &result); 566 err = ec_read(0xA, &result);
547 if (err) 567 if (err)
@@ -1266,8 +1286,13 @@ static void acer_rfkill_update(struct work_struct *ignored)
1266 acpi_status status; 1286 acpi_status status;
1267 1287
1268 status = get_u32(&state, ACER_CAP_WIRELESS); 1288 status = get_u32(&state, ACER_CAP_WIRELESS);
1269 if (ACPI_SUCCESS(status)) 1289 if (ACPI_SUCCESS(status)) {
1270 rfkill_set_sw_state(wireless_rfkill, !state); 1290 if (quirks->wireless == 3) {
1291 rfkill_set_hw_state(wireless_rfkill, !state);
1292 } else {
1293 rfkill_set_sw_state(wireless_rfkill, !state);
1294 }
1295 }
1271 1296
1272 if (has_cap(ACER_CAP_BLUETOOTH)) { 1297 if (has_cap(ACER_CAP_BLUETOOTH)) {
1273 status = get_u32(&state, ACER_CAP_BLUETOOTH); 1298 status = get_u32(&state, ACER_CAP_BLUETOOTH);
@@ -1400,6 +1425,9 @@ static ssize_t show_bool_threeg(struct device *dev,
1400{ 1425{
1401 u32 result; \ 1426 u32 result; \
1402 acpi_status status; 1427 acpi_status status;
1428
1429 pr_info("This threeg sysfs will be removed in 2012"
1430 " - used by: %s\n", current->comm);
1403 if (wmi_has_guid(WMID_GUID3)) 1431 if (wmi_has_guid(WMID_GUID3))
1404 status = wmid3_get_device_status(&result, 1432 status = wmid3_get_device_status(&result,
1405 ACER_WMID3_GDS_THREEG); 1433 ACER_WMID3_GDS_THREEG);
@@ -1415,8 +1443,10 @@ static ssize_t set_bool_threeg(struct device *dev,
1415{ 1443{
1416 u32 tmp = simple_strtoul(buf, NULL, 10); 1444 u32 tmp = simple_strtoul(buf, NULL, 10);
1417 acpi_status status = set_u32(tmp, ACER_CAP_THREEG); 1445 acpi_status status = set_u32(tmp, ACER_CAP_THREEG);
1418 if (ACPI_FAILURE(status)) 1446 pr_info("This threeg sysfs will be removed in 2012"
1419 return -EINVAL; 1447 " - used by: %s\n", current->comm);
1448 if (ACPI_FAILURE(status))
1449 return -EINVAL;
1420 return count; 1450 return count;
1421} 1451}
1422static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg, 1452static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg,
@@ -1425,6 +1455,8 @@ static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg,
1425static ssize_t show_interface(struct device *dev, struct device_attribute *attr, 1455static ssize_t show_interface(struct device *dev, struct device_attribute *attr,
1426 char *buf) 1456 char *buf)
1427{ 1457{
1458 pr_info("This interface sysfs will be removed in 2012"
1459 " - used by: %s\n", current->comm);
1428 switch (interface->type) { 1460 switch (interface->type) {
1429 case ACER_AMW0: 1461 case ACER_AMW0:
1430 return sprintf(buf, "AMW0\n"); 1462 return sprintf(buf, "AMW0\n");
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index fca3489218b..760c6d7624f 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -182,6 +182,7 @@ static const struct bios_settings_t bios_tbl[] = {
182 {"Acer", "Aspire 1810T", "v1.3308", 0x55, 0x58, {0x9e, 0x00} }, 182 {"Acer", "Aspire 1810T", "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
183 {"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00} }, 183 {"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
184 {"Acer", "Aspire 1810T", "v1.3310", 0x55, 0x58, {0x9e, 0x00} }, 184 {"Acer", "Aspire 1810T", "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
185 {"Acer", "Aspire 1810TZ", "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
185 /* Acer 531 */ 186 /* Acer 531 */
186 {"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00} }, 187 {"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00} },
187 /* Gateway */ 188 /* Gateway */
@@ -703,15 +704,15 @@ MODULE_LICENSE("GPL");
703MODULE_AUTHOR("Peter Feuerer"); 704MODULE_AUTHOR("Peter Feuerer");
704MODULE_DESCRIPTION("Aspire One temperature and fan driver"); 705MODULE_DESCRIPTION("Aspire One temperature and fan driver");
705MODULE_ALIAS("dmi:*:*Acer*:pnAOA*:"); 706MODULE_ALIAS("dmi:*:*Acer*:pnAOA*:");
706MODULE_ALIAS("dmi:*:*Acer*:pnAspire 1410*:"); 707MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1410*:");
707MODULE_ALIAS("dmi:*:*Acer*:pnAspire 1810*:"); 708MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1810*:");
708MODULE_ALIAS("dmi:*:*Acer*:pnAO531*:"); 709MODULE_ALIAS("dmi:*:*Acer*:pnAO531*:");
709MODULE_ALIAS("dmi:*:*Gateway*:pnAOA*:"); 710MODULE_ALIAS("dmi:*:*Gateway*:pnAOA*:");
710MODULE_ALIAS("dmi:*:*Gateway*:pnLT31*:"); 711MODULE_ALIAS("dmi:*:*Gateway*:pnLT31*:");
711MODULE_ALIAS("dmi:*:*Packard Bell*:pnAOA*:"); 712MODULE_ALIAS("dmi:*:*Packard*Bell*:pnAOA*:");
712MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOA*:"); 713MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOA*:");
713MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOTMU*:"); 714MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMU*:");
714MODULE_ALIAS("dmi:*:*Packard Bell*:pnDOTMA*:"); 715MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMA*:");
715 716
716module_init(acerhdf_init); 717module_init(acerhdf_init);
717module_exit(acerhdf_exit); 718module_exit(acerhdf_exit);
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index d65df92e2ac..fa6d7ec68b2 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -70,11 +70,10 @@ MODULE_LICENSE("GPL");
70 * WAPF defines the behavior of the Fn+Fx wlan key 70 * WAPF defines the behavior of the Fn+Fx wlan key
71 * The significance of values is yet to be found, but 71 * The significance of values is yet to be found, but
72 * most of the time: 72 * most of the time:
73 * 0x0 will do nothing 73 * Bit | Bluetooth | WLAN
74 * 0x1 will allow to control the device with Fn+Fx key. 74 * 0 | Hardware | Hardware
75 * 0x4 will send an ACPI event (0x88) while pressing the Fn+Fx key 75 * 1 | Hardware | Software
76 * 0x5 like 0x1 or 0x4 76 * 4 | Software | Software
77 * So, if something doesn't work as you want, just try other values =)
78 */ 77 */
79static uint wapf = 1; 78static uint wapf = 1;
80module_param(wapf, uint, 0444); 79module_param(wapf, uint, 0444);
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 0580d99b079..b0859d4183e 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -38,6 +38,24 @@ MODULE_LICENSE("GPL");
38 38
39MODULE_ALIAS("wmi:"ASUS_NB_WMI_EVENT_GUID); 39MODULE_ALIAS("wmi:"ASUS_NB_WMI_EVENT_GUID);
40 40
41/*
42 * WAPF defines the behavior of the Fn+Fx wlan key
43 * The significance of values is yet to be found, but
44 * most of the time:
45 * Bit | Bluetooth | WLAN
46 * 0 | Hardware | Hardware
47 * 1 | Hardware | Software
48 * 4 | Software | Software
49 */
50static uint wapf;
51module_param(wapf, uint, 0444);
52MODULE_PARM_DESC(wapf, "WAPF value");
53
54static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver)
55{
56 driver->wapf = wapf;
57}
58
41static const struct key_entry asus_nb_wmi_keymap[] = { 59static const struct key_entry asus_nb_wmi_keymap[] = {
42 { KE_KEY, 0x30, { KEY_VOLUMEUP } }, 60 { KE_KEY, 0x30, { KEY_VOLUMEUP } },
43 { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, 61 { KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
@@ -53,16 +71,16 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
53 { KE_KEY, 0x51, { KEY_WWW } }, 71 { KE_KEY, 0x51, { KEY_WWW } },
54 { KE_KEY, 0x55, { KEY_CALC } }, 72 { KE_KEY, 0x55, { KEY_CALC } },
55 { KE_KEY, 0x5C, { KEY_F15 } }, /* Power Gear key */ 73 { KE_KEY, 0x5C, { KEY_F15 } }, /* Power Gear key */
56 { KE_KEY, 0x5D, { KEY_WLAN } }, 74 { KE_KEY, 0x5D, { KEY_WLAN } }, /* Wireless console Toggle */
57 { KE_KEY, 0x5E, { KEY_WLAN } }, 75 { KE_KEY, 0x5E, { KEY_WLAN } }, /* Wireless console Enable */
58 { KE_KEY, 0x5F, { KEY_WLAN } }, 76 { KE_KEY, 0x5F, { KEY_WLAN } }, /* Wireless console Disable */
59 { KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } }, 77 { KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } },
60 { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, 78 { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } },
61 { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, 79 { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } },
62 { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, 80 { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } },
63 { KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, 81 { KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } },
64 { KE_KEY, 0x7E, { KEY_BLUETOOTH } },
65 { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, 82 { KE_KEY, 0x7D, { KEY_BLUETOOTH } },
83 { KE_KEY, 0x7E, { KEY_BLUETOOTH } },
66 { KE_KEY, 0x82, { KEY_CAMERA } }, 84 { KE_KEY, 0x82, { KEY_CAMERA } },
67 { KE_KEY, 0x88, { KEY_RFKILL } }, 85 { KE_KEY, 0x88, { KEY_RFKILL } },
68 { KE_KEY, 0x8A, { KEY_PROG1 } }, 86 { KE_KEY, 0x8A, { KEY_PROG1 } },
@@ -81,6 +99,7 @@ static struct asus_wmi_driver asus_nb_wmi_driver = {
81 .keymap = asus_nb_wmi_keymap, 99 .keymap = asus_nb_wmi_keymap,
82 .input_name = "Asus WMI hotkeys", 100 .input_name = "Asus WMI hotkeys",
83 .input_phys = ASUS_NB_WMI_FILE "/input0", 101 .input_phys = ASUS_NB_WMI_FILE "/input0",
102 .quirks = asus_nb_wmi_quirks,
84}; 103};
85 104
86 105
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 65b66aa44c7..95cba9ebf6c 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -44,6 +44,7 @@
44#include <linux/debugfs.h> 44#include <linux/debugfs.h>
45#include <linux/seq_file.h> 45#include <linux/seq_file.h>
46#include <linux/platform_device.h> 46#include <linux/platform_device.h>
47#include <linux/thermal.h>
47#include <acpi/acpi_bus.h> 48#include <acpi/acpi_bus.h>
48#include <acpi/acpi_drivers.h> 49#include <acpi/acpi_drivers.h>
49 50
@@ -66,6 +67,8 @@ MODULE_LICENSE("GPL");
66#define NOTIFY_BRNUP_MAX 0x1f 67#define NOTIFY_BRNUP_MAX 0x1f
67#define NOTIFY_BRNDOWN_MIN 0x20 68#define NOTIFY_BRNDOWN_MIN 0x20
68#define NOTIFY_BRNDOWN_MAX 0x2e 69#define NOTIFY_BRNDOWN_MAX 0x2e
70#define NOTIFY_KBD_BRTUP 0xc4
71#define NOTIFY_KBD_BRTDWN 0xc5
69 72
70/* WMI Methods */ 73/* WMI Methods */
71#define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */ 74#define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */
@@ -93,6 +96,7 @@ MODULE_LICENSE("GPL");
93/* Wireless */ 96/* Wireless */
94#define ASUS_WMI_DEVID_HW_SWITCH 0x00010001 97#define ASUS_WMI_DEVID_HW_SWITCH 0x00010001
95#define ASUS_WMI_DEVID_WIRELESS_LED 0x00010002 98#define ASUS_WMI_DEVID_WIRELESS_LED 0x00010002
99#define ASUS_WMI_DEVID_CWAP 0x00010003
96#define ASUS_WMI_DEVID_WLAN 0x00010011 100#define ASUS_WMI_DEVID_WLAN 0x00010011
97#define ASUS_WMI_DEVID_BLUETOOTH 0x00010013 101#define ASUS_WMI_DEVID_BLUETOOTH 0x00010013
98#define ASUS_WMI_DEVID_GPS 0x00010015 102#define ASUS_WMI_DEVID_GPS 0x00010015
@@ -102,6 +106,12 @@ MODULE_LICENSE("GPL");
102 106
103/* Leds */ 107/* Leds */
104/* 0x000200XX and 0x000400XX */ 108/* 0x000200XX and 0x000400XX */
109#define ASUS_WMI_DEVID_LED1 0x00020011
110#define ASUS_WMI_DEVID_LED2 0x00020012
111#define ASUS_WMI_DEVID_LED3 0x00020013
112#define ASUS_WMI_DEVID_LED4 0x00020014
113#define ASUS_WMI_DEVID_LED5 0x00020015
114#define ASUS_WMI_DEVID_LED6 0x00020016
105 115
106/* Backlight and Brightness */ 116/* Backlight and Brightness */
107#define ASUS_WMI_DEVID_BACKLIGHT 0x00050011 117#define ASUS_WMI_DEVID_BACKLIGHT 0x00050011
@@ -174,13 +184,18 @@ struct asus_wmi {
174 184
175 struct led_classdev tpd_led; 185 struct led_classdev tpd_led;
176 int tpd_led_wk; 186 int tpd_led_wk;
187 struct led_classdev kbd_led;
188 int kbd_led_wk;
177 struct workqueue_struct *led_workqueue; 189 struct workqueue_struct *led_workqueue;
178 struct work_struct tpd_led_work; 190 struct work_struct tpd_led_work;
191 struct work_struct kbd_led_work;
179 192
180 struct asus_rfkill wlan; 193 struct asus_rfkill wlan;
181 struct asus_rfkill bluetooth; 194 struct asus_rfkill bluetooth;
182 struct asus_rfkill wimax; 195 struct asus_rfkill wimax;
183 struct asus_rfkill wwan3g; 196 struct asus_rfkill wwan3g;
197 struct asus_rfkill gps;
198 struct asus_rfkill uwb;
184 199
185 struct hotplug_slot *hotplug_slot; 200 struct hotplug_slot *hotplug_slot;
186 struct mutex hotplug_lock; 201 struct mutex hotplug_lock;
@@ -205,6 +220,7 @@ static int asus_wmi_input_init(struct asus_wmi *asus)
205 asus->inputdev->phys = asus->driver->input_phys; 220 asus->inputdev->phys = asus->driver->input_phys;
206 asus->inputdev->id.bustype = BUS_HOST; 221 asus->inputdev->id.bustype = BUS_HOST;
207 asus->inputdev->dev.parent = &asus->platform_device->dev; 222 asus->inputdev->dev.parent = &asus->platform_device->dev;
223 set_bit(EV_REP, asus->inputdev->evbit);
208 224
209 err = sparse_keymap_setup(asus->inputdev, asus->driver->keymap, NULL); 225 err = sparse_keymap_setup(asus->inputdev, asus->driver->keymap, NULL);
210 if (err) 226 if (err)
@@ -359,30 +375,80 @@ static enum led_brightness tpd_led_get(struct led_classdev *led_cdev)
359 return read_tpd_led_state(asus); 375 return read_tpd_led_state(asus);
360} 376}
361 377
362static int asus_wmi_led_init(struct asus_wmi *asus) 378static void kbd_led_update(struct work_struct *work)
363{ 379{
364 int rv; 380 int ctrl_param = 0;
381 struct asus_wmi *asus;
365 382
366 if (read_tpd_led_state(asus) < 0) 383 asus = container_of(work, struct asus_wmi, kbd_led_work);
367 return 0;
368 384
369 asus->led_workqueue = create_singlethread_workqueue("led_workqueue"); 385 /*
370 if (!asus->led_workqueue) 386 * bits 0-2: level
371 return -ENOMEM; 387 * bit 7: light on/off
372 INIT_WORK(&asus->tpd_led_work, tpd_led_update); 388 */
389 if (asus->kbd_led_wk > 0)
390 ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F);
373 391
374 asus->tpd_led.name = "asus::touchpad"; 392 asus_wmi_set_devstate(ASUS_WMI_DEVID_KBD_BACKLIGHT, ctrl_param, NULL);
375 asus->tpd_led.brightness_set = tpd_led_set; 393}
376 asus->tpd_led.brightness_get = tpd_led_get;
377 asus->tpd_led.max_brightness = 1;
378 394
379 rv = led_classdev_register(&asus->platform_device->dev, &asus->tpd_led); 395static int kbd_led_read(struct asus_wmi *asus, int *level, int *env)
380 if (rv) { 396{
381 destroy_workqueue(asus->led_workqueue); 397 int retval;
382 return rv; 398
399 /*
400 * bits 0-2: level
401 * bit 7: light on/off
402 * bit 8-10: environment (0: dark, 1: normal, 2: light)
403 * bit 17: status unknown
404 */
405 retval = asus_wmi_get_devstate_bits(asus, ASUS_WMI_DEVID_KBD_BACKLIGHT,
406 0xFFFF);
407
408 /* Unknown status is considered as off */
409 if (retval == 0x8000)
410 retval = 0;
411
412 if (retval >= 0) {
413 if (level)
414 *level = retval & 0x80 ? retval & 0x7F : 0;
415 if (env)
416 *env = (retval >> 8) & 0x7F;
417 retval = 0;
383 } 418 }
384 419
385 return 0; 420 return retval;
421}
422
423static void kbd_led_set(struct led_classdev *led_cdev,
424 enum led_brightness value)
425{
426 struct asus_wmi *asus;
427
428 asus = container_of(led_cdev, struct asus_wmi, kbd_led);
429
430 if (value > asus->kbd_led.max_brightness)
431 value = asus->kbd_led.max_brightness;
432 else if (value < 0)
433 value = 0;
434
435 asus->kbd_led_wk = value;
436 queue_work(asus->led_workqueue, &asus->kbd_led_work);
437}
438
439static enum led_brightness kbd_led_get(struct led_classdev *led_cdev)
440{
441 struct asus_wmi *asus;
442 int retval, value;
443
444 asus = container_of(led_cdev, struct asus_wmi, kbd_led);
445
446 retval = kbd_led_read(asus, &value, NULL);
447
448 if (retval < 0)
449 return retval;
450
451 return value;
386} 452}
387 453
388static void asus_wmi_led_exit(struct asus_wmi *asus) 454static void asus_wmi_led_exit(struct asus_wmi *asus)
@@ -393,6 +459,48 @@ static void asus_wmi_led_exit(struct asus_wmi *asus)
393 destroy_workqueue(asus->led_workqueue); 459 destroy_workqueue(asus->led_workqueue);
394} 460}
395 461
462static int asus_wmi_led_init(struct asus_wmi *asus)
463{
464 int rv = 0;
465
466 asus->led_workqueue = create_singlethread_workqueue("led_workqueue");
467 if (!asus->led_workqueue)
468 return -ENOMEM;
469
470 if (read_tpd_led_state(asus) >= 0) {
471 INIT_WORK(&asus->tpd_led_work, tpd_led_update);
472
473 asus->tpd_led.name = "asus::touchpad";
474 asus->tpd_led.brightness_set = tpd_led_set;
475 asus->tpd_led.brightness_get = tpd_led_get;
476 asus->tpd_led.max_brightness = 1;
477
478 rv = led_classdev_register(&asus->platform_device->dev,
479 &asus->tpd_led);
480 if (rv)
481 goto error;
482 }
483
484 if (kbd_led_read(asus, NULL, NULL) >= 0) {
485 INIT_WORK(&asus->kbd_led_work, kbd_led_update);
486
487 asus->kbd_led.name = "asus::kbd_backlight";
488 asus->kbd_led.brightness_set = kbd_led_set;
489 asus->kbd_led.brightness_get = kbd_led_get;
490 asus->kbd_led.max_brightness = 3;
491
492 rv = led_classdev_register(&asus->platform_device->dev,
493 &asus->kbd_led);
494 }
495
496error:
497 if (rv)
498 asus_wmi_led_exit(asus);
499
500 return rv;
501}
502
503
396/* 504/*
397 * PCI hotplug (for wlan rfkill) 505 * PCI hotplug (for wlan rfkill)
398 */ 506 */
@@ -729,6 +837,16 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
729 rfkill_destroy(asus->wwan3g.rfkill); 837 rfkill_destroy(asus->wwan3g.rfkill);
730 asus->wwan3g.rfkill = NULL; 838 asus->wwan3g.rfkill = NULL;
731 } 839 }
840 if (asus->gps.rfkill) {
841 rfkill_unregister(asus->gps.rfkill);
842 rfkill_destroy(asus->gps.rfkill);
843 asus->gps.rfkill = NULL;
844 }
845 if (asus->uwb.rfkill) {
846 rfkill_unregister(asus->uwb.rfkill);
847 rfkill_destroy(asus->uwb.rfkill);
848 asus->uwb.rfkill = NULL;
849 }
732} 850}
733 851
734static int asus_wmi_rfkill_init(struct asus_wmi *asus) 852static int asus_wmi_rfkill_init(struct asus_wmi *asus)
@@ -763,6 +881,18 @@ static int asus_wmi_rfkill_init(struct asus_wmi *asus)
763 if (result && result != -ENODEV) 881 if (result && result != -ENODEV)
764 goto exit; 882 goto exit;
765 883
884 result = asus_new_rfkill(asus, &asus->gps, "asus-gps",
885 RFKILL_TYPE_GPS, ASUS_WMI_DEVID_GPS);
886
887 if (result && result != -ENODEV)
888 goto exit;
889
890 result = asus_new_rfkill(asus, &asus->uwb, "asus-uwb",
891 RFKILL_TYPE_UWB, ASUS_WMI_DEVID_UWB);
892
893 if (result && result != -ENODEV)
894 goto exit;
895
766 if (!asus->driver->hotplug_wireless) 896 if (!asus->driver->hotplug_wireless)
767 goto exit; 897 goto exit;
768 898
@@ -797,8 +927,8 @@ exit:
797 * Hwmon device 927 * Hwmon device
798 */ 928 */
799static ssize_t asus_hwmon_pwm1(struct device *dev, 929static ssize_t asus_hwmon_pwm1(struct device *dev,
800 struct device_attribute *attr, 930 struct device_attribute *attr,
801 char *buf) 931 char *buf)
802{ 932{
803 struct asus_wmi *asus = dev_get_drvdata(dev); 933 struct asus_wmi *asus = dev_get_drvdata(dev);
804 u32 value; 934 u32 value;
@@ -809,7 +939,7 @@ static ssize_t asus_hwmon_pwm1(struct device *dev,
809 if (err < 0) 939 if (err < 0)
810 return err; 940 return err;
811 941
812 value |= 0xFF; 942 value &= 0xFF;
813 943
814 if (value == 1) /* Low Speed */ 944 if (value == 1) /* Low Speed */
815 value = 85; 945 value = 85;
@@ -825,7 +955,26 @@ static ssize_t asus_hwmon_pwm1(struct device *dev,
825 return sprintf(buf, "%d\n", value); 955 return sprintf(buf, "%d\n", value);
826} 956}
827 957
958static ssize_t asus_hwmon_temp1(struct device *dev,
959 struct device_attribute *attr,
960 char *buf)
961{
962 struct asus_wmi *asus = dev_get_drvdata(dev);
963 u32 value;
964 int err;
965
966 err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_THERMAL_CTRL, &value);
967
968 if (err < 0)
969 return err;
970
971 value = KELVIN_TO_CELSIUS((value & 0xFFFF)) * 1000;
972
973 return sprintf(buf, "%d\n", value);
974}
975
828static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO, asus_hwmon_pwm1, NULL, 0); 976static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO, asus_hwmon_pwm1, NULL, 0);
977static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, asus_hwmon_temp1, NULL, 0);
829 978
830static ssize_t 979static ssize_t
831show_name(struct device *dev, struct device_attribute *attr, char *buf) 980show_name(struct device *dev, struct device_attribute *attr, char *buf)
@@ -836,12 +985,13 @@ static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, 0);
836 985
837static struct attribute *hwmon_attributes[] = { 986static struct attribute *hwmon_attributes[] = {
838 &sensor_dev_attr_pwm1.dev_attr.attr, 987 &sensor_dev_attr_pwm1.dev_attr.attr,
988 &sensor_dev_attr_temp1_input.dev_attr.attr,
839 &sensor_dev_attr_name.dev_attr.attr, 989 &sensor_dev_attr_name.dev_attr.attr,
840 NULL 990 NULL
841}; 991};
842 992
843static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj, 993static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
844 struct attribute *attr, int idx) 994 struct attribute *attr, int idx)
845{ 995{
846 struct device *dev = container_of(kobj, struct device, kobj); 996 struct device *dev = container_of(kobj, struct device, kobj);
847 struct platform_device *pdev = to_platform_device(dev->parent); 997 struct platform_device *pdev = to_platform_device(dev->parent);
@@ -852,6 +1002,8 @@ static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
852 1002
853 if (attr == &sensor_dev_attr_pwm1.dev_attr.attr) 1003 if (attr == &sensor_dev_attr_pwm1.dev_attr.attr)
854 dev_id = ASUS_WMI_DEVID_FAN_CTRL; 1004 dev_id = ASUS_WMI_DEVID_FAN_CTRL;
1005 else if (attr == &sensor_dev_attr_temp1_input.dev_attr.attr)
1006 dev_id = ASUS_WMI_DEVID_THERMAL_CTRL;
855 1007
856 if (dev_id != -1) { 1008 if (dev_id != -1) {
857 int err = asus_wmi_get_devstate(asus, dev_id, &value); 1009 int err = asus_wmi_get_devstate(asus, dev_id, &value);
@@ -869,9 +1021,13 @@ static mode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
869 * - reverved bits are non-zero 1021 * - reverved bits are non-zero
870 * - sfun and presence bit are not set 1022 * - sfun and presence bit are not set
871 */ 1023 */
872 if (value != ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000 1024 if (value == ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000
873 || (!asus->sfun && !(value & ASUS_WMI_DSTS_PRESENCE_BIT))) 1025 || (!asus->sfun && !(value & ASUS_WMI_DSTS_PRESENCE_BIT)))
874 ok = false; 1026 ok = false;
1027 } else if (dev_id == ASUS_WMI_DEVID_THERMAL_CTRL) {
1028 /* If value is zero, something is clearly wrong */
1029 if (value == 0)
1030 ok = false;
875 } 1031 }
876 1032
877 return ok ? attr->mode : 0; 1033 return ok ? attr->mode : 0;
@@ -904,6 +1060,7 @@ static int asus_wmi_hwmon_init(struct asus_wmi *asus)
904 pr_err("Could not register asus hwmon device\n"); 1060 pr_err("Could not register asus hwmon device\n");
905 return PTR_ERR(hwmon); 1061 return PTR_ERR(hwmon);
906 } 1062 }
1063 dev_set_drvdata(hwmon, asus);
907 asus->hwmon_device = hwmon; 1064 asus->hwmon_device = hwmon;
908 result = sysfs_create_group(&hwmon->kobj, &hwmon_attribute_group); 1065 result = sysfs_create_group(&hwmon->kobj, &hwmon_attribute_group);
909 if (result) 1066 if (result)
@@ -1060,6 +1217,8 @@ static void asus_wmi_notify(u32 value, void *context)
1060 acpi_status status; 1217 acpi_status status;
1061 int code; 1218 int code;
1062 int orig_code; 1219 int orig_code;
1220 unsigned int key_value = 1;
1221 bool autorelease = 1;
1063 1222
1064 status = wmi_get_event_data(value, &response); 1223 status = wmi_get_event_data(value, &response);
1065 if (status != AE_OK) { 1224 if (status != AE_OK) {
@@ -1075,6 +1234,13 @@ static void asus_wmi_notify(u32 value, void *context)
1075 code = obj->integer.value; 1234 code = obj->integer.value;
1076 orig_code = code; 1235 orig_code = code;
1077 1236
1237 if (asus->driver->key_filter) {
1238 asus->driver->key_filter(asus->driver, &code, &key_value,
1239 &autorelease);
1240 if (code == ASUS_WMI_KEY_IGNORE)
1241 goto exit;
1242 }
1243
1078 if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) 1244 if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX)
1079 code = NOTIFY_BRNUP_MIN; 1245 code = NOTIFY_BRNUP_MIN;
1080 else if (code >= NOTIFY_BRNDOWN_MIN && 1246 else if (code >= NOTIFY_BRNDOWN_MIN &&
@@ -1084,7 +1250,8 @@ static void asus_wmi_notify(u32 value, void *context)
1084 if (code == NOTIFY_BRNUP_MIN || code == NOTIFY_BRNDOWN_MIN) { 1250 if (code == NOTIFY_BRNUP_MIN || code == NOTIFY_BRNDOWN_MIN) {
1085 if (!acpi_video_backlight_support()) 1251 if (!acpi_video_backlight_support())
1086 asus_wmi_backlight_notify(asus, orig_code); 1252 asus_wmi_backlight_notify(asus, orig_code);
1087 } else if (!sparse_keymap_report_event(asus->inputdev, code, 1, true)) 1253 } else if (!sparse_keymap_report_event(asus->inputdev, code,
1254 key_value, autorelease))
1088 pr_info("Unknown key %x pressed\n", code); 1255 pr_info("Unknown key %x pressed\n", code);
1089 1256
1090exit: 1257exit:
@@ -1164,14 +1331,18 @@ ASUS_WMI_CREATE_DEVICE_ATTR(cardr, 0644, ASUS_WMI_DEVID_CARDREADER);
1164static ssize_t store_cpufv(struct device *dev, struct device_attribute *attr, 1331static ssize_t store_cpufv(struct device *dev, struct device_attribute *attr,
1165 const char *buf, size_t count) 1332 const char *buf, size_t count)
1166{ 1333{
1167 int value; 1334 int value, rv;
1168 1335
1169 if (!count || sscanf(buf, "%i", &value) != 1) 1336 if (!count || sscanf(buf, "%i", &value) != 1)
1170 return -EINVAL; 1337 return -EINVAL;
1171 if (value < 0 || value > 2) 1338 if (value < 0 || value > 2)
1172 return -EINVAL; 1339 return -EINVAL;
1173 1340
1174 return asus_wmi_evaluate_method(ASUS_WMI_METHODID_CFVS, value, 0, NULL); 1341 rv = asus_wmi_evaluate_method(ASUS_WMI_METHODID_CFVS, value, 0, NULL);
1342 if (rv < 0)
1343 return rv;
1344
1345 return count;
1175} 1346}
1176 1347
1177static DEVICE_ATTR(cpufv, S_IRUGO | S_IWUSR, NULL, store_cpufv); 1348static DEVICE_ATTR(cpufv, S_IRUGO | S_IWUSR, NULL, store_cpufv);
@@ -1234,7 +1405,7 @@ static int asus_wmi_platform_init(struct asus_wmi *asus)
1234 1405
1235 /* We don't know yet what to do with this version... */ 1406 /* We don't know yet what to do with this version... */
1236 if (!asus_wmi_evaluate_method(ASUS_WMI_METHODID_SPEC, 0, 0x9, &rv)) { 1407 if (!asus_wmi_evaluate_method(ASUS_WMI_METHODID_SPEC, 0, 0x9, &rv)) {
1237 pr_info("BIOS WMI version: %d.%d", rv >> 8, rv & 0xFF); 1408 pr_info("BIOS WMI version: %d.%d", rv >> 16, rv & 0xFF);
1238 asus->spec = rv; 1409 asus->spec = rv;
1239 } 1410 }
1240 1411
@@ -1266,6 +1437,12 @@ static int asus_wmi_platform_init(struct asus_wmi *asus)
1266 return -ENODEV; 1437 return -ENODEV;
1267 } 1438 }
1268 1439
1440 /* CWAP allow to define the behavior of the Fn+F2 key,
1441 * this method doesn't seems to be present on Eee PCs */
1442 if (asus->driver->wapf >= 0)
1443 asus_wmi_set_devstate(ASUS_WMI_DEVID_CWAP,
1444 asus->driver->wapf, NULL);
1445
1269 return asus_wmi_sysfs_init(asus->platform_device); 1446 return asus_wmi_sysfs_init(asus->platform_device);
1270} 1447}
1271 1448
@@ -1568,6 +1745,14 @@ static int asus_hotk_restore(struct device *device)
1568 bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_WWAN3G); 1745 bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_WWAN3G);
1569 rfkill_set_sw_state(asus->wwan3g.rfkill, bl); 1746 rfkill_set_sw_state(asus->wwan3g.rfkill, bl);
1570 } 1747 }
1748 if (asus->gps.rfkill) {
1749 bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_GPS);
1750 rfkill_set_sw_state(asus->gps.rfkill, bl);
1751 }
1752 if (asus->uwb.rfkill) {
1753 bl = !asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_UWB);
1754 rfkill_set_sw_state(asus->uwb.rfkill, bl);
1755 }
1571 1756
1572 return 0; 1757 return 0;
1573} 1758}
@@ -1604,7 +1789,7 @@ static int asus_wmi_probe(struct platform_device *pdev)
1604 1789
1605static bool used; 1790static bool used;
1606 1791
1607int asus_wmi_register_driver(struct asus_wmi_driver *driver) 1792int __init_or_module asus_wmi_register_driver(struct asus_wmi_driver *driver)
1608{ 1793{
1609 struct platform_driver *platform_driver; 1794 struct platform_driver *platform_driver;
1610 struct platform_device *platform_device; 1795 struct platform_device *platform_device;
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index c044522c876..8147c10161c 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -29,12 +29,15 @@
29 29
30#include <linux/platform_device.h> 30#include <linux/platform_device.h>
31 31
32#define ASUS_WMI_KEY_IGNORE (-1)
33
32struct module; 34struct module;
33struct key_entry; 35struct key_entry;
34struct asus_wmi; 36struct asus_wmi;
35 37
36struct asus_wmi_driver { 38struct asus_wmi_driver {
37 bool hotplug_wireless; 39 bool hotplug_wireless;
40 int wapf;
38 41
39 const char *name; 42 const char *name;
40 struct module *owner; 43 struct module *owner;
@@ -44,6 +47,10 @@ struct asus_wmi_driver {
44 const struct key_entry *keymap; 47 const struct key_entry *keymap;
45 const char *input_name; 48 const char *input_name;
46 const char *input_phys; 49 const char *input_phys;
50 /* Returns new code, value, and autorelease values in arguments.
51 * Return ASUS_WMI_KEY_IGNORE in code if event should be ignored. */
52 void (*key_filter) (struct asus_wmi_driver *driver, int *code,
53 unsigned int *value, bool *autorelease);
47 54
48 int (*probe) (struct platform_device *device); 55 int (*probe) (struct platform_device *device);
49 void (*quirks) (struct asus_wmi_driver *driver); 56 void (*quirks) (struct asus_wmi_driver *driver);
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index e39ab1d3ed8..f31fa4efa72 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -612,7 +612,6 @@ static int __init dell_init(void)
612 if (!bufferpage) 612 if (!bufferpage)
613 goto fail_buffer; 613 goto fail_buffer;
614 buffer = page_address(bufferpage); 614 buffer = page_address(bufferpage);
615 mutex_init(&buffer_mutex);
616 615
617 ret = dell_setup_rfkill(); 616 ret = dell_setup_rfkill();
618 617
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index ce790827e19..fa9a2171cc1 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -54,6 +54,8 @@ MODULE_ALIAS("wmi:"DELL_EVENT_GUID);
54 */ 54 */
55 55
56static const struct key_entry dell_wmi_legacy_keymap[] __initconst = { 56static const struct key_entry dell_wmi_legacy_keymap[] __initconst = {
57 { KE_IGNORE, 0x003a, { KEY_CAPSLOCK } },
58
57 { KE_KEY, 0xe045, { KEY_PROG1 } }, 59 { KE_KEY, 0xe045, { KEY_PROG1 } },
58 { KE_KEY, 0xe009, { KEY_EJECTCD } }, 60 { KE_KEY, 0xe009, { KEY_EJECTCD } },
59 61
@@ -85,6 +87,11 @@ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = {
85 { KE_IGNORE, 0xe013, { KEY_RESERVED } }, 87 { KE_IGNORE, 0xe013, { KEY_RESERVED } },
86 88
87 { KE_IGNORE, 0xe020, { KEY_MUTE } }, 89 { KE_IGNORE, 0xe020, { KEY_MUTE } },
90
91 /* Shortcut and audio panel keys */
92 { KE_IGNORE, 0xe025, { KEY_RESERVED } },
93 { KE_IGNORE, 0xe026, { KEY_RESERVED } },
94
88 { KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } }, 95 { KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } },
89 { KE_IGNORE, 0xe030, { KEY_VOLUMEUP } }, 96 { KE_IGNORE, 0xe030, { KEY_VOLUMEUP } },
90 { KE_IGNORE, 0xe033, { KEY_KBDILLUMUP } }, 97 { KE_IGNORE, 0xe033, { KEY_KBDILLUMUP } },
@@ -92,6 +99,9 @@ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = {
92 { KE_IGNORE, 0xe03a, { KEY_CAPSLOCK } }, 99 { KE_IGNORE, 0xe03a, { KEY_CAPSLOCK } },
93 { KE_IGNORE, 0xe045, { KEY_NUMLOCK } }, 100 { KE_IGNORE, 0xe045, { KEY_NUMLOCK } },
94 { KE_IGNORE, 0xe046, { KEY_SCROLLLOCK } }, 101 { KE_IGNORE, 0xe046, { KEY_SCROLLLOCK } },
102 { KE_IGNORE, 0xe0f7, { KEY_MUTE } },
103 { KE_IGNORE, 0xe0f8, { KEY_VOLUMEDOWN } },
104 { KE_IGNORE, 0xe0f9, { KEY_VOLUMEUP } },
95 { KE_END, 0 } 105 { KE_END, 0 }
96}; 106};
97 107
diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 4aa867a9b88..9f6e64302b4 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c
@@ -56,6 +56,11 @@ MODULE_PARM_DESC(hotplug_wireless,
56 "If your laptop needs that, please report to " 56 "If your laptop needs that, please report to "
57 "acpi4asus-user@lists.sourceforge.net."); 57 "acpi4asus-user@lists.sourceforge.net.");
58 58
59/* Values for T101MT "Home" key */
60#define HOME_PRESS 0xe4
61#define HOME_HOLD 0xea
62#define HOME_RELEASE 0xe5
63
59static const struct key_entry eeepc_wmi_keymap[] = { 64static const struct key_entry eeepc_wmi_keymap[] = {
60 /* Sleep already handled via generic ACPI code */ 65 /* Sleep already handled via generic ACPI code */
61 { KE_KEY, 0x30, { KEY_VOLUMEUP } }, 66 { KE_KEY, 0x30, { KEY_VOLUMEUP } },
@@ -71,6 +76,7 @@ static const struct key_entry eeepc_wmi_keymap[] = {
71 { KE_KEY, 0xcc, { KEY_SWITCHVIDEOMODE } }, 76 { KE_KEY, 0xcc, { KEY_SWITCHVIDEOMODE } },
72 { KE_KEY, 0xe0, { KEY_PROG1 } }, /* Task Manager */ 77 { KE_KEY, 0xe0, { KEY_PROG1 } }, /* Task Manager */
73 { KE_KEY, 0xe1, { KEY_F14 } }, /* Change Resolution */ 78 { KE_KEY, 0xe1, { KEY_F14 } }, /* Change Resolution */
79 { KE_KEY, HOME_PRESS, { KEY_CONFIG } }, /* Home/Express gate key */
74 { KE_KEY, 0xe8, { KEY_SCREENLOCK } }, 80 { KE_KEY, 0xe8, { KEY_SCREENLOCK } },
75 { KE_KEY, 0xe9, { KEY_BRIGHTNESS_ZERO } }, 81 { KE_KEY, 0xe9, { KEY_BRIGHTNESS_ZERO } },
76 { KE_KEY, 0xeb, { KEY_CAMERA_ZOOMOUT } }, 82 { KE_KEY, 0xeb, { KEY_CAMERA_ZOOMOUT } },
@@ -81,6 +87,25 @@ static const struct key_entry eeepc_wmi_keymap[] = {
81 { KE_END, 0}, 87 { KE_END, 0},
82}; 88};
83 89
90static void eeepc_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code,
91 unsigned int *value, bool *autorelease)
92{
93 switch (*code) {
94 case HOME_PRESS:
95 *value = 1;
96 *autorelease = 0;
97 break;
98 case HOME_HOLD:
99 *code = ASUS_WMI_KEY_IGNORE;
100 break;
101 case HOME_RELEASE:
102 *code = HOME_PRESS;
103 *value = 0;
104 *autorelease = 0;
105 break;
106 }
107}
108
84static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level, 109static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level,
85 void *context, void **retval) 110 void *context, void **retval)
86{ 111{
@@ -141,6 +166,7 @@ static void eeepc_dmi_check(struct asus_wmi_driver *driver)
141static void eeepc_wmi_quirks(struct asus_wmi_driver *driver) 166static void eeepc_wmi_quirks(struct asus_wmi_driver *driver)
142{ 167{
143 driver->hotplug_wireless = hotplug_wireless; 168 driver->hotplug_wireless = hotplug_wireless;
169 driver->wapf = -1;
144 eeepc_dmi_check(driver); 170 eeepc_dmi_check(driver);
145} 171}
146 172
@@ -151,6 +177,7 @@ static struct asus_wmi_driver asus_wmi_driver = {
151 .keymap = eeepc_wmi_keymap, 177 .keymap = eeepc_wmi_keymap,
152 .input_name = "Eee PC WMI hotkeys", 178 .input_name = "Eee PC WMI hotkeys",
153 .input_phys = EEEPC_WMI_FILE "/input0", 179 .input_phys = EEEPC_WMI_FILE "/input0",
180 .key_filter = eeepc_wmi_key_filter,
154 .probe = eeepc_wmi_probe, 181 .probe = eeepc_wmi_probe,
155 .quirks = eeepc_wmi_quirks, 182 .quirks = eeepc_wmi_quirks,
156}; 183};
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index bfdda33feb2..0c595410e78 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -32,13 +32,22 @@
32#include <linux/platform_device.h> 32#include <linux/platform_device.h>
33#include <linux/input.h> 33#include <linux/input.h>
34#include <linux/input/sparse-keymap.h> 34#include <linux/input/sparse-keymap.h>
35#include <linux/backlight.h>
36#include <linux/fb.h>
35 37
36#define IDEAPAD_RFKILL_DEV_NUM (3) 38#define IDEAPAD_RFKILL_DEV_NUM (3)
37 39
40#define CFG_BT_BIT (16)
41#define CFG_3G_BIT (17)
42#define CFG_WIFI_BIT (18)
43#define CFG_CAMERA_BIT (19)
44
38struct ideapad_private { 45struct ideapad_private {
39 struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; 46 struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM];
40 struct platform_device *platform_device; 47 struct platform_device *platform_device;
41 struct input_dev *inputdev; 48 struct input_dev *inputdev;
49 struct backlight_device *blightdev;
50 unsigned long cfg;
42}; 51};
43 52
44static acpi_handle ideapad_handle; 53static acpi_handle ideapad_handle;
@@ -155,7 +164,7 @@ static int write_ec_cmd(acpi_handle handle, int cmd, unsigned long data)
155} 164}
156 165
157/* 166/*
158 * camera power 167 * sysfs
159 */ 168 */
160static ssize_t show_ideapad_cam(struct device *dev, 169static ssize_t show_ideapad_cam(struct device *dev,
161 struct device_attribute *attr, 170 struct device_attribute *attr,
@@ -186,6 +195,44 @@ static ssize_t store_ideapad_cam(struct device *dev,
186 195
187static DEVICE_ATTR(camera_power, 0644, show_ideapad_cam, store_ideapad_cam); 196static DEVICE_ATTR(camera_power, 0644, show_ideapad_cam, store_ideapad_cam);
188 197
198static ssize_t show_ideapad_cfg(struct device *dev,
199 struct device_attribute *attr,
200 char *buf)
201{
202 struct ideapad_private *priv = dev_get_drvdata(dev);
203
204 return sprintf(buf, "0x%.8lX\n", priv->cfg);
205}
206
207static DEVICE_ATTR(cfg, 0444, show_ideapad_cfg, NULL);
208
209static struct attribute *ideapad_attributes[] = {
210 &dev_attr_camera_power.attr,
211 &dev_attr_cfg.attr,
212 NULL
213};
214
215static mode_t ideapad_is_visible(struct kobject *kobj,
216 struct attribute *attr,
217 int idx)
218{
219 struct device *dev = container_of(kobj, struct device, kobj);
220 struct ideapad_private *priv = dev_get_drvdata(dev);
221 bool supported;
222
223 if (attr == &dev_attr_camera_power.attr)
224 supported = test_bit(CFG_CAMERA_BIT, &(priv->cfg));
225 else
226 supported = true;
227
228 return supported ? attr->mode : 0;
229}
230
231static struct attribute_group ideapad_attribute_group = {
232 .is_visible = ideapad_is_visible,
233 .attrs = ideapad_attributes
234};
235
189/* 236/*
190 * Rfkill 237 * Rfkill
191 */ 238 */
@@ -197,9 +244,9 @@ struct ideapad_rfk_data {
197}; 244};
198 245
199const struct ideapad_rfk_data ideapad_rfk_data[] = { 246const struct ideapad_rfk_data ideapad_rfk_data[] = {
200 { "ideapad_wlan", 18, 0x15, RFKILL_TYPE_WLAN }, 247 { "ideapad_wlan", CFG_WIFI_BIT, 0x15, RFKILL_TYPE_WLAN },
201 { "ideapad_bluetooth", 16, 0x17, RFKILL_TYPE_BLUETOOTH }, 248 { "ideapad_bluetooth", CFG_BT_BIT, 0x17, RFKILL_TYPE_BLUETOOTH },
202 { "ideapad_3g", 17, 0x20, RFKILL_TYPE_WWAN }, 249 { "ideapad_3g", CFG_3G_BIT, 0x20, RFKILL_TYPE_WWAN },
203}; 250};
204 251
205static int ideapad_rfk_set(void *data, bool blocked) 252static int ideapad_rfk_set(void *data, bool blocked)
@@ -265,8 +312,7 @@ static int __devinit ideapad_register_rfkill(struct acpi_device *adevice,
265 return 0; 312 return 0;
266} 313}
267 314
268static void __devexit ideapad_unregister_rfkill(struct acpi_device *adevice, 315static void ideapad_unregister_rfkill(struct acpi_device *adevice, int dev)
269 int dev)
270{ 316{
271 struct ideapad_private *priv = dev_get_drvdata(&adevice->dev); 317 struct ideapad_private *priv = dev_get_drvdata(&adevice->dev);
272 318
@@ -280,15 +326,6 @@ static void __devexit ideapad_unregister_rfkill(struct acpi_device *adevice,
280/* 326/*
281 * Platform device 327 * Platform device
282 */ 328 */
283static struct attribute *ideapad_attributes[] = {
284 &dev_attr_camera_power.attr,
285 NULL
286};
287
288static struct attribute_group ideapad_attribute_group = {
289 .attrs = ideapad_attributes
290};
291
292static int __devinit ideapad_platform_init(struct ideapad_private *priv) 329static int __devinit ideapad_platform_init(struct ideapad_private *priv)
293{ 330{
294 int result; 331 int result;
@@ -369,7 +406,7 @@ err_free_dev:
369 return error; 406 return error;
370} 407}
371 408
372static void __devexit ideapad_input_exit(struct ideapad_private *priv) 409static void ideapad_input_exit(struct ideapad_private *priv)
373{ 410{
374 sparse_keymap_free(priv->inputdev); 411 sparse_keymap_free(priv->inputdev);
375 input_unregister_device(priv->inputdev); 412 input_unregister_device(priv->inputdev);
@@ -383,6 +420,98 @@ static void ideapad_input_report(struct ideapad_private *priv,
383} 420}
384 421
385/* 422/*
423 * backlight
424 */
425static int ideapad_backlight_get_brightness(struct backlight_device *blightdev)
426{
427 unsigned long now;
428
429 if (read_ec_data(ideapad_handle, 0x12, &now))
430 return -EIO;
431 return now;
432}
433
434static int ideapad_backlight_update_status(struct backlight_device *blightdev)
435{
436 if (write_ec_cmd(ideapad_handle, 0x13, blightdev->props.brightness))
437 return -EIO;
438 if (write_ec_cmd(ideapad_handle, 0x33,
439 blightdev->props.power == FB_BLANK_POWERDOWN ? 0 : 1))
440 return -EIO;
441
442 return 0;
443}
444
445static const struct backlight_ops ideapad_backlight_ops = {
446 .get_brightness = ideapad_backlight_get_brightness,
447 .update_status = ideapad_backlight_update_status,
448};
449
450static int ideapad_backlight_init(struct ideapad_private *priv)
451{
452 struct backlight_device *blightdev;
453 struct backlight_properties props;
454 unsigned long max, now, power;
455
456 if (read_ec_data(ideapad_handle, 0x11, &max))
457 return -EIO;
458 if (read_ec_data(ideapad_handle, 0x12, &now))
459 return -EIO;
460 if (read_ec_data(ideapad_handle, 0x18, &power))
461 return -EIO;
462
463 memset(&props, 0, sizeof(struct backlight_properties));
464 props.max_brightness = max;
465 props.type = BACKLIGHT_PLATFORM;
466 blightdev = backlight_device_register("ideapad",
467 &priv->platform_device->dev,
468 priv,
469 &ideapad_backlight_ops,
470 &props);
471 if (IS_ERR(blightdev)) {
472 pr_err("Could not register backlight device\n");
473 return PTR_ERR(blightdev);
474 }
475
476 priv->blightdev = blightdev;
477 blightdev->props.brightness = now;
478 blightdev->props.power = power ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN;
479 backlight_update_status(blightdev);
480
481 return 0;
482}
483
484static void ideapad_backlight_exit(struct ideapad_private *priv)
485{
486 if (priv->blightdev)
487 backlight_device_unregister(priv->blightdev);
488 priv->blightdev = NULL;
489}
490
491static void ideapad_backlight_notify_power(struct ideapad_private *priv)
492{
493 unsigned long power;
494 struct backlight_device *blightdev = priv->blightdev;
495
496 if (read_ec_data(ideapad_handle, 0x18, &power))
497 return;
498 blightdev->props.power = power ? FB_BLANK_UNBLANK : FB_BLANK_POWERDOWN;
499}
500
501static void ideapad_backlight_notify_brightness(struct ideapad_private *priv)
502{
503 unsigned long now;
504
505 /* if we control brightness via acpi video driver */
506 if (priv->blightdev == NULL) {
507 read_ec_data(ideapad_handle, 0x12, &now);
508 return;
509 }
510
511 backlight_force_update(priv->blightdev, BACKLIGHT_UPDATE_HOTKEY);
512}
513
514/*
386 * module init/exit 515 * module init/exit
387 */ 516 */
388static const struct acpi_device_id ideapad_device_ids[] = { 517static const struct acpi_device_id ideapad_device_ids[] = {
@@ -393,10 +522,11 @@ MODULE_DEVICE_TABLE(acpi, ideapad_device_ids);
393 522
394static int __devinit ideapad_acpi_add(struct acpi_device *adevice) 523static int __devinit ideapad_acpi_add(struct acpi_device *adevice)
395{ 524{
396 int ret, i, cfg; 525 int ret, i;
526 unsigned long cfg;
397 struct ideapad_private *priv; 527 struct ideapad_private *priv;
398 528
399 if (read_method_int(adevice->handle, "_CFG", &cfg)) 529 if (read_method_int(adevice->handle, "_CFG", (int *)&cfg))
400 return -ENODEV; 530 return -ENODEV;
401 531
402 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 532 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -404,6 +534,7 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice)
404 return -ENOMEM; 534 return -ENOMEM;
405 dev_set_drvdata(&adevice->dev, priv); 535 dev_set_drvdata(&adevice->dev, priv);
406 ideapad_handle = adevice->handle; 536 ideapad_handle = adevice->handle;
537 priv->cfg = cfg;
407 538
408 ret = ideapad_platform_init(priv); 539 ret = ideapad_platform_init(priv);
409 if (ret) 540 if (ret)
@@ -414,15 +545,25 @@ static int __devinit ideapad_acpi_add(struct acpi_device *adevice)
414 goto input_failed; 545 goto input_failed;
415 546
416 for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) { 547 for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) {
417 if (test_bit(ideapad_rfk_data[i].cfgbit, (unsigned long *)&cfg)) 548 if (test_bit(ideapad_rfk_data[i].cfgbit, &cfg))
418 ideapad_register_rfkill(adevice, i); 549 ideapad_register_rfkill(adevice, i);
419 else 550 else
420 priv->rfk[i] = NULL; 551 priv->rfk[i] = NULL;
421 } 552 }
422 ideapad_sync_rfk_state(adevice); 553 ideapad_sync_rfk_state(adevice);
423 554
555 if (!acpi_video_backlight_support()) {
556 ret = ideapad_backlight_init(priv);
557 if (ret && ret != -ENODEV)
558 goto backlight_failed;
559 }
560
424 return 0; 561 return 0;
425 562
563backlight_failed:
564 for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++)
565 ideapad_unregister_rfkill(adevice, i);
566 ideapad_input_exit(priv);
426input_failed: 567input_failed:
427 ideapad_platform_exit(priv); 568 ideapad_platform_exit(priv);
428platform_failed: 569platform_failed:
@@ -435,6 +576,7 @@ static int __devexit ideapad_acpi_remove(struct acpi_device *adevice, int type)
435 struct ideapad_private *priv = dev_get_drvdata(&adevice->dev); 576 struct ideapad_private *priv = dev_get_drvdata(&adevice->dev);
436 int i; 577 int i;
437 578
579 ideapad_backlight_exit(priv);
438 for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) 580 for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++)
439 ideapad_unregister_rfkill(adevice, i); 581 ideapad_unregister_rfkill(adevice, i);
440 ideapad_input_exit(priv); 582 ideapad_input_exit(priv);
@@ -459,12 +601,19 @@ static void ideapad_acpi_notify(struct acpi_device *adevice, u32 event)
459 vpc1 = (vpc2 << 8) | vpc1; 601 vpc1 = (vpc2 << 8) | vpc1;
460 for (vpc_bit = 0; vpc_bit < 16; vpc_bit++) { 602 for (vpc_bit = 0; vpc_bit < 16; vpc_bit++) {
461 if (test_bit(vpc_bit, &vpc1)) { 603 if (test_bit(vpc_bit, &vpc1)) {
462 if (vpc_bit == 9) 604 switch (vpc_bit) {
605 case 9:
463 ideapad_sync_rfk_state(adevice); 606 ideapad_sync_rfk_state(adevice);
464 else if (vpc_bit == 4) 607 break;
465 read_ec_data(handle, 0x12, &vpc2); 608 case 4:
466 else 609 ideapad_backlight_notify_brightness(priv);
610 break;
611 case 2:
612 ideapad_backlight_notify_power(priv);
613 break;
614 default:
467 ideapad_input_report(priv, vpc_bit); 615 ideapad_input_report(priv, vpc_bit);
616 }
468 } 617 }
469 } 618 }
470} 619}
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 5ffe7c39814..809a3ae943c 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -403,7 +403,7 @@ static void ips_cpu_raise(struct ips_driver *ips)
403 403
404 thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8); 404 thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
405 405
406 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN; 406 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
407 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 407 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
408 408
409 turbo_override &= ~TURBO_TDP_MASK; 409 turbo_override &= ~TURBO_TDP_MASK;
@@ -438,7 +438,7 @@ static void ips_cpu_lower(struct ips_driver *ips)
438 438
439 thm_writew(THM_MPCPC, (new_limit * 10) / 8); 439 thm_writew(THM_MPCPC, (new_limit * 10) / 8);
440 440
441 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN; 441 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN;
442 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override); 442 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
443 443
444 turbo_override &= ~TURBO_TDP_MASK; 444 turbo_override &= ~TURBO_TDP_MASK;
diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index 809adea4965..abddc83e9fd 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c
@@ -477,6 +477,8 @@ static acpi_status intel_menlow_register_sensor(acpi_handle handle, u32 lvl,
477 return AE_ERROR; 477 return AE_ERROR;
478 } 478 }
479 479
480 return AE_OK;
481
480 aux1_not_found: 482 aux1_not_found:
481 if (status == AE_NOT_FOUND) 483 if (status == AE_NOT_FOUND)
482 return AE_OK; 484 return AE_OK;
diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c
index 3a578323122..ccd7b1f8351 100644
--- a/drivers/platform/x86/intel_mid_thermal.c
+++ b/drivers/platform/x86/intel_mid_thermal.c
@@ -493,20 +493,30 @@ static int mid_thermal_probe(struct platform_device *pdev)
493 493
494 /* Register each sensor with the generic thermal framework*/ 494 /* Register each sensor with the generic thermal framework*/
495 for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { 495 for (i = 0; i < MSIC_THERMAL_SENSORS; i++) {
496 struct thermal_device_info *td_info = initialize_sensor(i);
497
498 if (!td_info) {
499 ret = -ENOMEM;
500 goto err;
501 }
496 pinfo->tzd[i] = thermal_zone_device_register(name[i], 502 pinfo->tzd[i] = thermal_zone_device_register(name[i],
497 0, initialize_sensor(i), &tzd_ops, 0, 0, 0, 0); 503 0, td_info, &tzd_ops, 0, 0, 0, 0);
498 if (IS_ERR(pinfo->tzd[i])) 504 if (IS_ERR(pinfo->tzd[i])) {
499 goto reg_fail; 505 kfree(td_info);
506 ret = PTR_ERR(pinfo->tzd[i]);
507 goto err;
508 }
500 } 509 }
501 510
502 pinfo->pdev = pdev; 511 pinfo->pdev = pdev;
503 platform_set_drvdata(pdev, pinfo); 512 platform_set_drvdata(pdev, pinfo);
504 return 0; 513 return 0;
505 514
506reg_fail: 515err:
507 ret = PTR_ERR(pinfo->tzd[i]); 516 while (--i >= 0) {
508 while (--i >= 0) 517 kfree(pinfo->tzd[i]->devdata);
509 thermal_zone_device_unregister(pinfo->tzd[i]); 518 thermal_zone_device_unregister(pinfo->tzd[i]);
519 }
510 configure_adc(0); 520 configure_adc(0);
511 kfree(pinfo); 521 kfree(pinfo);
512 return ret; 522 return ret;
@@ -524,8 +534,10 @@ static int mid_thermal_remove(struct platform_device *pdev)
524 int i; 534 int i;
525 struct platform_info *pinfo = platform_get_drvdata(pdev); 535 struct platform_info *pinfo = platform_get_drvdata(pdev);
526 536
527 for (i = 0; i < MSIC_THERMAL_SENSORS; i++) 537 for (i = 0; i < MSIC_THERMAL_SENSORS; i++) {
538 kfree(pinfo->tzd[i]->devdata);
528 thermal_zone_device_unregister(pinfo->tzd[i]); 539 thermal_zone_device_unregister(pinfo->tzd[i]);
540 }
529 541
530 kfree(pinfo); 542 kfree(pinfo);
531 platform_set_drvdata(pdev, NULL); 543 platform_set_drvdata(pdev, NULL);
diff --git a/drivers/platform/x86/intel_rar_register.c b/drivers/platform/x86/intel_rar_register.c
index bde47e9080c..c8a6aed4527 100644
--- a/drivers/platform/x86/intel_rar_register.c
+++ b/drivers/platform/x86/intel_rar_register.c
@@ -637,15 +637,13 @@ end_function:
637 return error; 637 return error;
638} 638}
639 639
640const struct pci_device_id rar_pci_id_tbl[] = { 640static DEFINE_PCI_DEVICE_TABLE(rar_pci_id_tbl) = {
641 { PCI_VDEVICE(INTEL, 0x4110) }, 641 { PCI_VDEVICE(INTEL, 0x4110) },
642 { 0 } 642 { 0 }
643}; 643};
644 644
645MODULE_DEVICE_TABLE(pci, rar_pci_id_tbl); 645MODULE_DEVICE_TABLE(pci, rar_pci_id_tbl);
646 646
647const struct pci_device_id *my_id_table = rar_pci_id_tbl;
648
649/* field for registering driver to PCI device */ 647/* field for registering driver to PCI device */
650static struct pci_driver rar_pci_driver = { 648static struct pci_driver rar_pci_driver = {
651 .name = "rar_register_driver", 649 .name = "rar_register_driver",
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 940accbe28d..c86665369a2 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -725,7 +725,7 @@ static void ipc_remove(struct pci_dev *pdev)
725 intel_scu_devices_destroy(); 725 intel_scu_devices_destroy();
726} 726}
727 727
728static const struct pci_device_id pci_ids[] = { 728static DEFINE_PCI_DEVICE_TABLE(pci_ids) = {
729 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080e)}, 729 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080e)},
730 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x082a)}, 730 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x082a)},
731 { 0,} 731 { 0,}
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 3ff629df9f0..f204643c505 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -538,6 +538,15 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = {
538 }, 538 },
539 .callback = dmi_check_cb 539 .callback = dmi_check_cb
540 }, 540 },
541 {
542 .ident = "MSI U270",
543 .matches = {
544 DMI_MATCH(DMI_SYS_VENDOR,
545 "Micro-Star International Co., Ltd."),
546 DMI_MATCH(DMI_PRODUCT_NAME, "U270 series"),
547 },
548 .callback = dmi_check_cb
549 },
541 { } 550 { }
542}; 551};
543 552
@@ -996,3 +1005,4 @@ MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N034:*");
996MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N051:*"); 1005MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N051:*");
997MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N014:*"); 1006MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N014:*");
998MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnCR620:*"); 1007MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnCR620:*");
1008MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnU270series:*");
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index c832e3356cd..6f40bf202dc 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -272,6 +272,7 @@ static int __init msi_wmi_init(void)
272err_free_backlight: 272err_free_backlight:
273 backlight_device_unregister(backlight); 273 backlight_device_unregister(backlight);
274err_free_input: 274err_free_input:
275 sparse_keymap_free(msi_wmi_input_dev);
275 input_unregister_device(msi_wmi_input_dev); 276 input_unregister_device(msi_wmi_input_dev);
276err_uninstall_notifier: 277err_uninstall_notifier:
277 wmi_remove_notify_handler(MSIWMI_EVENT_GUID); 278 wmi_remove_notify_handler(MSIWMI_EVENT_GUID);
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index d347116d150..35916301104 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -521,6 +521,16 @@ static struct dmi_system_id __initdata samsung_dmi_table[] = {
521 .callback = dmi_check_cb, 521 .callback = dmi_check_cb,
522 }, 522 },
523 { 523 {
524 .ident = "N510",
525 .matches = {
526 DMI_MATCH(DMI_SYS_VENDOR,
527 "SAMSUNG ELECTRONICS CO., LTD."),
528 DMI_MATCH(DMI_PRODUCT_NAME, "N510"),
529 DMI_MATCH(DMI_BOARD_NAME, "N510"),
530 },
531 .callback = dmi_check_cb,
532 },
533 {
524 .ident = "X125", 534 .ident = "X125",
525 .matches = { 535 .matches = {
526 DMI_MATCH(DMI_SYS_VENDOR, 536 DMI_MATCH(DMI_SYS_VENDOR,
@@ -601,6 +611,16 @@ static struct dmi_system_id __initdata samsung_dmi_table[] = {
601 .callback = dmi_check_cb, 611 .callback = dmi_check_cb,
602 }, 612 },
603 { 613 {
614 .ident = "N150/N210/N220",
615 .matches = {
616 DMI_MATCH(DMI_SYS_VENDOR,
617 "SAMSUNG ELECTRONICS CO., LTD."),
618 DMI_MATCH(DMI_PRODUCT_NAME, "N150/N210/N220"),
619 DMI_MATCH(DMI_BOARD_NAME, "N150/N210/N220"),
620 },
621 .callback = dmi_check_cb,
622 },
623 {
604 .ident = "N150/N210/N220/N230", 624 .ident = "N150/N210/N220/N230",
605 .matches = { 625 .matches = {
606 DMI_MATCH(DMI_SYS_VENDOR, 626 DMI_MATCH(DMI_SYS_VENDOR,
diff --git a/drivers/platform/x86/samsung-q10.c b/drivers/platform/x86/samsung-q10.c
new file mode 100644
index 00000000000..1e54ae74274
--- /dev/null
+++ b/drivers/platform/x86/samsung-q10.c
@@ -0,0 +1,196 @@
1/*
2 * Driver for Samsung Q10 and related laptops: controls the backlight
3 *
4 * Copyright (c) 2011 Frederick van der Wyck <fvanderwyck@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/platform_device.h>
16#include <linux/backlight.h>
17#include <linux/i8042.h>
18#include <linux/dmi.h>
19
20#define SAMSUNGQ10_BL_MAX_INTENSITY 255
21#define SAMSUNGQ10_BL_DEFAULT_INTENSITY 185
22
23#define SAMSUNGQ10_BL_8042_CMD 0xbe
24#define SAMSUNGQ10_BL_8042_DATA { 0x89, 0x91 }
25
26static int samsungq10_bl_brightness;
27
28static bool force;
29module_param(force, bool, 0);
30MODULE_PARM_DESC(force,
31 "Disable the DMI check and force the driver to be loaded");
32
33static int samsungq10_bl_set_intensity(struct backlight_device *bd)
34{
35
36 int brightness = bd->props.brightness;
37 unsigned char c[3] = SAMSUNGQ10_BL_8042_DATA;
38
39 c[2] = (unsigned char)brightness;
40 i8042_lock_chip();
41 i8042_command(c, (0x30 << 8) | SAMSUNGQ10_BL_8042_CMD);
42 i8042_unlock_chip();
43 samsungq10_bl_brightness = brightness;
44
45 return 0;
46}
47
48static int samsungq10_bl_get_intensity(struct backlight_device *bd)
49{
50 return samsungq10_bl_brightness;
51}
52
53static const struct backlight_ops samsungq10_bl_ops = {
54 .get_brightness = samsungq10_bl_get_intensity,
55 .update_status = samsungq10_bl_set_intensity,
56};
57
58#ifdef CONFIG_PM_SLEEP
59static int samsungq10_suspend(struct device *dev)
60{
61 return 0;
62}
63
64static int samsungq10_resume(struct device *dev)
65{
66
67 struct backlight_device *bd = dev_get_drvdata(dev);
68
69 samsungq10_bl_set_intensity(bd);
70 return 0;
71}
72#else
73#define samsungq10_suspend NULL
74#define samsungq10_resume NULL
75#endif
76
77static SIMPLE_DEV_PM_OPS(samsungq10_pm_ops,
78 samsungq10_suspend, samsungq10_resume);
79
80static int __devinit samsungq10_probe(struct platform_device *pdev)
81{
82
83 struct backlight_properties props;
84 struct backlight_device *bd;
85
86 memset(&props, 0, sizeof(struct backlight_properties));
87 props.type = BACKLIGHT_PLATFORM;
88 props.max_brightness = SAMSUNGQ10_BL_MAX_INTENSITY;
89 bd = backlight_device_register("samsung", &pdev->dev, NULL,
90 &samsungq10_bl_ops, &props);
91 if (IS_ERR(bd))
92 return PTR_ERR(bd);
93
94 platform_set_drvdata(pdev, bd);
95
96 bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY;
97 samsungq10_bl_set_intensity(bd);
98
99 return 0;
100}
101
102static int __devexit samsungq10_remove(struct platform_device *pdev)
103{
104
105 struct backlight_device *bd = platform_get_drvdata(pdev);
106
107 bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY;
108 samsungq10_bl_set_intensity(bd);
109
110 backlight_device_unregister(bd);
111
112 return 0;
113}
114
115static struct platform_driver samsungq10_driver = {
116 .driver = {
117 .name = KBUILD_MODNAME,
118 .owner = THIS_MODULE,
119 .pm = &samsungq10_pm_ops,
120 },
121 .probe = samsungq10_probe,
122 .remove = __devexit_p(samsungq10_remove),
123};
124
125static struct platform_device *samsungq10_device;
126
127static int __init dmi_check_callback(const struct dmi_system_id *id)
128{
129 printk(KERN_INFO KBUILD_MODNAME ": found model '%s'\n", id->ident);
130 return 1;
131}
132
133static struct dmi_system_id __initdata samsungq10_dmi_table[] = {
134 {
135 .ident = "Samsung Q10",
136 .matches = {
137 DMI_MATCH(DMI_SYS_VENDOR, "Samsung"),
138 DMI_MATCH(DMI_PRODUCT_NAME, "SQ10"),
139 },
140 .callback = dmi_check_callback,
141 },
142 {
143 .ident = "Samsung Q20",
144 .matches = {
145 DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG Electronics"),
146 DMI_MATCH(DMI_PRODUCT_NAME, "SENS Q20"),
147 },
148 .callback = dmi_check_callback,
149 },
150 {
151 .ident = "Samsung Q25",
152 .matches = {
153 DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG Electronics"),
154 DMI_MATCH(DMI_PRODUCT_NAME, "NQ25"),
155 },
156 .callback = dmi_check_callback,
157 },
158 {
159 .ident = "Dell Latitude X200",
160 .matches = {
161 DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
162 DMI_MATCH(DMI_PRODUCT_NAME, "X200"),
163 },
164 .callback = dmi_check_callback,
165 },
166 { },
167};
168MODULE_DEVICE_TABLE(dmi, samsungq10_dmi_table);
169
170static int __init samsungq10_init(void)
171{
172 if (!force && !dmi_check_system(samsungq10_dmi_table))
173 return -ENODEV;
174
175 samsungq10_device = platform_create_bundle(&samsungq10_driver,
176 samsungq10_probe,
177 NULL, 0, NULL, 0);
178
179 if (IS_ERR(samsungq10_device))
180 return PTR_ERR(samsungq10_device);
181
182 return 0;
183}
184
185static void __exit samsungq10_exit(void)
186{
187 platform_device_unregister(samsungq10_device);
188 platform_driver_unregister(&samsungq10_driver);
189}
190
191module_init(samsungq10_init);
192module_exit(samsungq10_exit);
193
194MODULE_AUTHOR("Frederick van der Wyck <fvanderwyck@gmail.com>");
195MODULE_DESCRIPTION("Samsung Q10 Driver");
196MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 26c5b117df2..7bd829f247e 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -3186,8 +3186,17 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
3186 KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */ 3186 KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */
3187 3187
3188 /* (assignments unknown, please report if found) */ 3188 /* (assignments unknown, please report if found) */
3189 KEY_UNKNOWN, KEY_UNKNOWN,
3190
3191 /*
3192 * The mic mute button only sends 0x1a. It does not
3193 * automatically mute the mic or change the mute light.
3194 */
3195 KEY_MICMUTE, /* 0x1a: Mic mute (since ?400 or so) */
3196
3197 /* (assignments unknown, please report if found) */
3189 KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, 3198 KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
3190 KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, 3199 KEY_UNKNOWN,
3191 }, 3200 },
3192 }; 3201 };
3193 3202
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index d3e38790906..d8e6a429e8b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -20,6 +20,7 @@
20#include <linux/debugfs.h> 20#include <linux/debugfs.h>
21#include <linux/device.h> 21#include <linux/device.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/async.h>
23#include <linux/err.h> 24#include <linux/err.h>
24#include <linux/mutex.h> 25#include <linux/mutex.h>
25#include <linux/suspend.h> 26#include <linux/suspend.h>
@@ -33,6 +34,8 @@
33 34
34#include "dummy.h" 35#include "dummy.h"
35 36
37#define rdev_crit(rdev, fmt, ...) \
38 pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
36#define rdev_err(rdev, fmt, ...) \ 39#define rdev_err(rdev, fmt, ...) \
37 pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__) 40 pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
38#define rdev_warn(rdev, fmt, ...) \ 41#define rdev_warn(rdev, fmt, ...) \
@@ -78,11 +81,13 @@ struct regulator {
78 char *supply_name; 81 char *supply_name;
79 struct device_attribute dev_attr; 82 struct device_attribute dev_attr;
80 struct regulator_dev *rdev; 83 struct regulator_dev *rdev;
84#ifdef CONFIG_DEBUG_FS
85 struct dentry *debugfs;
86#endif
81}; 87};
82 88
83static int _regulator_is_enabled(struct regulator_dev *rdev); 89static int _regulator_is_enabled(struct regulator_dev *rdev);
84static int _regulator_disable(struct regulator_dev *rdev, 90static int _regulator_disable(struct regulator_dev *rdev);
85 struct regulator_dev **supply_rdev_ptr);
86static int _regulator_get_voltage(struct regulator_dev *rdev); 91static int _regulator_get_voltage(struct regulator_dev *rdev);
87static int _regulator_get_current_limit(struct regulator_dev *rdev); 92static int _regulator_get_current_limit(struct regulator_dev *rdev);
88static unsigned int _regulator_get_mode(struct regulator_dev *rdev); 93static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
@@ -90,6 +95,9 @@ static void _notifier_call_chain(struct regulator_dev *rdev,
90 unsigned long event, void *data); 95 unsigned long event, void *data);
91static int _regulator_do_set_voltage(struct regulator_dev *rdev, 96static int _regulator_do_set_voltage(struct regulator_dev *rdev,
92 int min_uV, int max_uV); 97 int min_uV, int max_uV);
98static struct regulator *create_regulator(struct regulator_dev *rdev,
99 struct device *dev,
100 const char *supply_name);
93 101
94static const char *rdev_get_name(struct regulator_dev *rdev) 102static const char *rdev_get_name(struct regulator_dev *rdev)
95{ 103{
@@ -143,8 +151,11 @@ static int regulator_check_voltage(struct regulator_dev *rdev,
143 if (*min_uV < rdev->constraints->min_uV) 151 if (*min_uV < rdev->constraints->min_uV)
144 *min_uV = rdev->constraints->min_uV; 152 *min_uV = rdev->constraints->min_uV;
145 153
146 if (*min_uV > *max_uV) 154 if (*min_uV > *max_uV) {
155 rdev_err(rdev, "unsupportable voltage range: %d-%duV\n",
156 *min_uV, *max_uV);
147 return -EINVAL; 157 return -EINVAL;
158 }
148 159
149 return 0; 160 return 0;
150} 161}
@@ -197,8 +208,11 @@ static int regulator_check_current_limit(struct regulator_dev *rdev,
197 if (*min_uA < rdev->constraints->min_uA) 208 if (*min_uA < rdev->constraints->min_uA)
198 *min_uA = rdev->constraints->min_uA; 209 *min_uA = rdev->constraints->min_uA;
199 210
200 if (*min_uA > *max_uA) 211 if (*min_uA > *max_uA) {
212 rdev_err(rdev, "unsupportable current range: %d-%duA\n",
213 *min_uA, *max_uA);
201 return -EINVAL; 214 return -EINVAL;
215 }
202 216
203 return 0; 217 return 0;
204} 218}
@@ -213,6 +227,7 @@ static int regulator_mode_constrain(struct regulator_dev *rdev, int *mode)
213 case REGULATOR_MODE_STANDBY: 227 case REGULATOR_MODE_STANDBY:
214 break; 228 break;
215 default: 229 default:
230 rdev_err(rdev, "invalid mode %x specified\n", *mode);
216 return -EINVAL; 231 return -EINVAL;
217 } 232 }
218 233
@@ -779,7 +794,6 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
779 if (ret < 0) { 794 if (ret < 0) {
780 rdev_err(rdev, "failed to apply %duV constraint\n", 795 rdev_err(rdev, "failed to apply %duV constraint\n",
781 rdev->constraints->min_uV); 796 rdev->constraints->min_uV);
782 rdev->constraints = NULL;
783 return ret; 797 return ret;
784 } 798 }
785 } 799 }
@@ -882,7 +896,6 @@ static int set_machine_constraints(struct regulator_dev *rdev,
882 ret = suspend_prepare(rdev, rdev->constraints->initial_state); 896 ret = suspend_prepare(rdev, rdev->constraints->initial_state);
883 if (ret < 0) { 897 if (ret < 0) {
884 rdev_err(rdev, "failed to set suspend state\n"); 898 rdev_err(rdev, "failed to set suspend state\n");
885 rdev->constraints = NULL;
886 goto out; 899 goto out;
887 } 900 }
888 } 901 }
@@ -909,13 +922,15 @@ static int set_machine_constraints(struct regulator_dev *rdev,
909 ret = ops->enable(rdev); 922 ret = ops->enable(rdev);
910 if (ret < 0) { 923 if (ret < 0) {
911 rdev_err(rdev, "failed to enable\n"); 924 rdev_err(rdev, "failed to enable\n");
912 rdev->constraints = NULL;
913 goto out; 925 goto out;
914 } 926 }
915 } 927 }
916 928
917 print_constraints(rdev); 929 print_constraints(rdev);
930 return 0;
918out: 931out:
932 kfree(rdev->constraints);
933 rdev->constraints = NULL;
919 return ret; 934 return ret;
920} 935}
921 936
@@ -929,21 +944,20 @@ out:
929 * core if it's child is enabled. 944 * core if it's child is enabled.
930 */ 945 */
931static int set_supply(struct regulator_dev *rdev, 946static int set_supply(struct regulator_dev *rdev,
932 struct regulator_dev *supply_rdev) 947 struct regulator_dev *supply_rdev)
933{ 948{
934 int err; 949 int err;
935 950
936 err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj, 951 rdev_info(rdev, "supplied by %s\n", rdev_get_name(supply_rdev));
937 "supply"); 952
938 if (err) { 953 rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY");
939 rdev_err(rdev, "could not add device link %s err %d\n", 954 if (IS_ERR(rdev->supply)) {
940 supply_rdev->dev.kobj.name, err); 955 err = PTR_ERR(rdev->supply);
941 goto out; 956 rdev->supply = NULL;
957 return err;
942 } 958 }
943 rdev->supply = supply_rdev; 959
944 list_add(&rdev->slist, &supply_rdev->supply_list); 960 return 0;
945out:
946 return err;
947} 961}
948 962
949/** 963/**
@@ -1032,7 +1046,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev)
1032 } 1046 }
1033} 1047}
1034 1048
1035#define REG_STR_SIZE 32 1049#define REG_STR_SIZE 64
1036 1050
1037static struct regulator *create_regulator(struct regulator_dev *rdev, 1051static struct regulator *create_regulator(struct regulator_dev *rdev,
1038 struct device *dev, 1052 struct device *dev,
@@ -1052,8 +1066,9 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
1052 1066
1053 if (dev) { 1067 if (dev) {
1054 /* create a 'requested_microamps_name' sysfs entry */ 1068 /* create a 'requested_microamps_name' sysfs entry */
1055 size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s", 1069 size = scnprintf(buf, REG_STR_SIZE,
1056 supply_name); 1070 "microamps_requested_%s-%s",
1071 dev_name(dev), supply_name);
1057 if (size >= REG_STR_SIZE) 1072 if (size >= REG_STR_SIZE)
1058 goto overflow_err; 1073 goto overflow_err;
1059 1074
@@ -1088,7 +1103,28 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
1088 dev->kobj.name, err); 1103 dev->kobj.name, err);
1089 goto link_name_err; 1104 goto link_name_err;
1090 } 1105 }
1106 } else {
1107 regulator->supply_name = kstrdup(supply_name, GFP_KERNEL);
1108 if (regulator->supply_name == NULL)
1109 goto attr_err;
1110 }
1111
1112#ifdef CONFIG_DEBUG_FS
1113 regulator->debugfs = debugfs_create_dir(regulator->supply_name,
1114 rdev->debugfs);
1115 if (IS_ERR_OR_NULL(regulator->debugfs)) {
1116 rdev_warn(rdev, "Failed to create debugfs directory\n");
1117 regulator->debugfs = NULL;
1118 } else {
1119 debugfs_create_u32("uA_load", 0444, regulator->debugfs,
1120 &regulator->uA_load);
1121 debugfs_create_u32("min_uV", 0444, regulator->debugfs,
1122 &regulator->min_uV);
1123 debugfs_create_u32("max_uV", 0444, regulator->debugfs,
1124 &regulator->max_uV);
1091 } 1125 }
1126#endif
1127
1092 mutex_unlock(&rdev->mutex); 1128 mutex_unlock(&rdev->mutex);
1093 return regulator; 1129 return regulator;
1094link_name_err: 1130link_name_err:
@@ -1267,13 +1303,17 @@ void regulator_put(struct regulator *regulator)
1267 mutex_lock(&regulator_list_mutex); 1303 mutex_lock(&regulator_list_mutex);
1268 rdev = regulator->rdev; 1304 rdev = regulator->rdev;
1269 1305
1306#ifdef CONFIG_DEBUG_FS
1307 debugfs_remove_recursive(regulator->debugfs);
1308#endif
1309
1270 /* remove any sysfs entries */ 1310 /* remove any sysfs entries */
1271 if (regulator->dev) { 1311 if (regulator->dev) {
1272 sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); 1312 sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
1273 kfree(regulator->supply_name);
1274 device_remove_file(regulator->dev, &regulator->dev_attr); 1313 device_remove_file(regulator->dev, &regulator->dev_attr);
1275 kfree(regulator->dev_attr.attr.name); 1314 kfree(regulator->dev_attr.attr.name);
1276 } 1315 }
1316 kfree(regulator->supply_name);
1277 list_del(&regulator->list); 1317 list_del(&regulator->list);
1278 kfree(regulator); 1318 kfree(regulator);
1279 1319
@@ -1301,19 +1341,6 @@ static int _regulator_enable(struct regulator_dev *rdev)
1301{ 1341{
1302 int ret, delay; 1342 int ret, delay;
1303 1343
1304 if (rdev->use_count == 0) {
1305 /* do we need to enable the supply regulator first */
1306 if (rdev->supply) {
1307 mutex_lock(&rdev->supply->mutex);
1308 ret = _regulator_enable(rdev->supply);
1309 mutex_unlock(&rdev->supply->mutex);
1310 if (ret < 0) {
1311 rdev_err(rdev, "failed to enable: %d\n", ret);
1312 return ret;
1313 }
1314 }
1315 }
1316
1317 /* check voltage and requested load before enabling */ 1344 /* check voltage and requested load before enabling */
1318 if (rdev->constraints && 1345 if (rdev->constraints &&
1319 (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) 1346 (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS))
@@ -1388,19 +1415,27 @@ int regulator_enable(struct regulator *regulator)
1388 struct regulator_dev *rdev = regulator->rdev; 1415 struct regulator_dev *rdev = regulator->rdev;
1389 int ret = 0; 1416 int ret = 0;
1390 1417
1418 if (rdev->supply) {
1419 ret = regulator_enable(rdev->supply);
1420 if (ret != 0)
1421 return ret;
1422 }
1423
1391 mutex_lock(&rdev->mutex); 1424 mutex_lock(&rdev->mutex);
1392 ret = _regulator_enable(rdev); 1425 ret = _regulator_enable(rdev);
1393 mutex_unlock(&rdev->mutex); 1426 mutex_unlock(&rdev->mutex);
1427
1428 if (ret != 0)
1429 regulator_disable(rdev->supply);
1430
1394 return ret; 1431 return ret;
1395} 1432}
1396EXPORT_SYMBOL_GPL(regulator_enable); 1433EXPORT_SYMBOL_GPL(regulator_enable);
1397 1434
1398/* locks held by regulator_disable() */ 1435/* locks held by regulator_disable() */
1399static int _regulator_disable(struct regulator_dev *rdev, 1436static int _regulator_disable(struct regulator_dev *rdev)
1400 struct regulator_dev **supply_rdev_ptr)
1401{ 1437{
1402 int ret = 0; 1438 int ret = 0;
1403 *supply_rdev_ptr = NULL;
1404 1439
1405 if (WARN(rdev->use_count <= 0, 1440 if (WARN(rdev->use_count <= 0,
1406 "unbalanced disables for %s\n", rdev_get_name(rdev))) 1441 "unbalanced disables for %s\n", rdev_get_name(rdev)))
@@ -1427,9 +1462,6 @@ static int _regulator_disable(struct regulator_dev *rdev,
1427 NULL); 1462 NULL);
1428 } 1463 }
1429 1464
1430 /* decrease our supplies ref count and disable if required */
1431 *supply_rdev_ptr = rdev->supply;
1432
1433 rdev->use_count = 0; 1465 rdev->use_count = 0;
1434 } else if (rdev->use_count > 1) { 1466 } else if (rdev->use_count > 1) {
1435 1467
@@ -1440,6 +1472,7 @@ static int _regulator_disable(struct regulator_dev *rdev,
1440 1472
1441 rdev->use_count--; 1473 rdev->use_count--;
1442 } 1474 }
1475
1443 return ret; 1476 return ret;
1444} 1477}
1445 1478
@@ -1458,29 +1491,21 @@ static int _regulator_disable(struct regulator_dev *rdev,
1458int regulator_disable(struct regulator *regulator) 1491int regulator_disable(struct regulator *regulator)
1459{ 1492{
1460 struct regulator_dev *rdev = regulator->rdev; 1493 struct regulator_dev *rdev = regulator->rdev;
1461 struct regulator_dev *supply_rdev = NULL;
1462 int ret = 0; 1494 int ret = 0;
1463 1495
1464 mutex_lock(&rdev->mutex); 1496 mutex_lock(&rdev->mutex);
1465 ret = _regulator_disable(rdev, &supply_rdev); 1497 ret = _regulator_disable(rdev);
1466 mutex_unlock(&rdev->mutex); 1498 mutex_unlock(&rdev->mutex);
1467 1499
1468 /* decrease our supplies ref count and disable if required */ 1500 if (ret == 0 && rdev->supply)
1469 while (supply_rdev != NULL) { 1501 regulator_disable(rdev->supply);
1470 rdev = supply_rdev;
1471
1472 mutex_lock(&rdev->mutex);
1473 _regulator_disable(rdev, &supply_rdev);
1474 mutex_unlock(&rdev->mutex);
1475 }
1476 1502
1477 return ret; 1503 return ret;
1478} 1504}
1479EXPORT_SYMBOL_GPL(regulator_disable); 1505EXPORT_SYMBOL_GPL(regulator_disable);
1480 1506
1481/* locks held by regulator_force_disable() */ 1507/* locks held by regulator_force_disable() */
1482static int _regulator_force_disable(struct regulator_dev *rdev, 1508static int _regulator_force_disable(struct regulator_dev *rdev)
1483 struct regulator_dev **supply_rdev_ptr)
1484{ 1509{
1485 int ret = 0; 1510 int ret = 0;
1486 1511
@@ -1497,10 +1522,6 @@ static int _regulator_force_disable(struct regulator_dev *rdev,
1497 REGULATOR_EVENT_DISABLE, NULL); 1522 REGULATOR_EVENT_DISABLE, NULL);
1498 } 1523 }
1499 1524
1500 /* decrease our supplies ref count and disable if required */
1501 *supply_rdev_ptr = rdev->supply;
1502
1503 rdev->use_count = 0;
1504 return ret; 1525 return ret;
1505} 1526}
1506 1527
@@ -1516,16 +1537,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev,
1516int regulator_force_disable(struct regulator *regulator) 1537int regulator_force_disable(struct regulator *regulator)
1517{ 1538{
1518 struct regulator_dev *rdev = regulator->rdev; 1539 struct regulator_dev *rdev = regulator->rdev;
1519 struct regulator_dev *supply_rdev = NULL;
1520 int ret; 1540 int ret;
1521 1541
1522 mutex_lock(&rdev->mutex); 1542 mutex_lock(&rdev->mutex);
1523 regulator->uA_load = 0; 1543 regulator->uA_load = 0;
1524 ret = _regulator_force_disable(rdev, &supply_rdev); 1544 ret = _regulator_force_disable(regulator->rdev);
1525 mutex_unlock(&rdev->mutex); 1545 mutex_unlock(&rdev->mutex);
1526 1546
1527 if (supply_rdev) 1547 if (rdev->supply)
1528 regulator_disable(get_device_regulator(rdev_get_dev(supply_rdev))); 1548 while (rdev->open_count--)
1549 regulator_disable(rdev->supply);
1529 1550
1530 return ret; 1551 return ret;
1531} 1552}
@@ -2136,7 +2157,7 @@ int regulator_set_optimum_mode(struct regulator *regulator, int uA_load)
2136 /* get input voltage */ 2157 /* get input voltage */
2137 input_uV = 0; 2158 input_uV = 0;
2138 if (rdev->supply) 2159 if (rdev->supply)
2139 input_uV = _regulator_get_voltage(rdev->supply); 2160 input_uV = regulator_get_voltage(rdev->supply);
2140 if (input_uV <= 0) 2161 if (input_uV <= 0)
2141 input_uV = rdev->constraints->input_uV; 2162 input_uV = rdev->constraints->input_uV;
2142 if (input_uV <= 0) { 2163 if (input_uV <= 0) {
@@ -2206,17 +2227,8 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
2206static void _notifier_call_chain(struct regulator_dev *rdev, 2227static void _notifier_call_chain(struct regulator_dev *rdev,
2207 unsigned long event, void *data) 2228 unsigned long event, void *data)
2208{ 2229{
2209 struct regulator_dev *_rdev;
2210
2211 /* call rdev chain first */ 2230 /* call rdev chain first */
2212 blocking_notifier_call_chain(&rdev->notifier, event, NULL); 2231 blocking_notifier_call_chain(&rdev->notifier, event, NULL);
2213
2214 /* now notify regulator we supply */
2215 list_for_each_entry(_rdev, &rdev->supply_list, slist) {
2216 mutex_lock(&_rdev->mutex);
2217 _notifier_call_chain(_rdev, event, data);
2218 mutex_unlock(&_rdev->mutex);
2219 }
2220} 2232}
2221 2233
2222/** 2234/**
@@ -2264,6 +2276,13 @@ err:
2264} 2276}
2265EXPORT_SYMBOL_GPL(regulator_bulk_get); 2277EXPORT_SYMBOL_GPL(regulator_bulk_get);
2266 2278
2279static void regulator_bulk_enable_async(void *data, async_cookie_t cookie)
2280{
2281 struct regulator_bulk_data *bulk = data;
2282
2283 bulk->ret = regulator_enable(bulk->consumer);
2284}
2285
2267/** 2286/**
2268 * regulator_bulk_enable - enable multiple regulator consumers 2287 * regulator_bulk_enable - enable multiple regulator consumers
2269 * 2288 *
@@ -2279,21 +2298,33 @@ EXPORT_SYMBOL_GPL(regulator_bulk_get);
2279int regulator_bulk_enable(int num_consumers, 2298int regulator_bulk_enable(int num_consumers,
2280 struct regulator_bulk_data *consumers) 2299 struct regulator_bulk_data *consumers)
2281{ 2300{
2301 LIST_HEAD(async_domain);
2282 int i; 2302 int i;
2283 int ret; 2303 int ret = 0;
2304
2305 for (i = 0; i < num_consumers; i++)
2306 async_schedule_domain(regulator_bulk_enable_async,
2307 &consumers[i], &async_domain);
2308
2309 async_synchronize_full_domain(&async_domain);
2284 2310
2311 /* If any consumer failed we need to unwind any that succeeded */
2285 for (i = 0; i < num_consumers; i++) { 2312 for (i = 0; i < num_consumers; i++) {
2286 ret = regulator_enable(consumers[i].consumer); 2313 if (consumers[i].ret != 0) {
2287 if (ret != 0) 2314 ret = consumers[i].ret;
2288 goto err; 2315 goto err;
2316 }
2289 } 2317 }
2290 2318
2291 return 0; 2319 return 0;
2292 2320
2293err: 2321err:
2294 pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret); 2322 for (i = 0; i < num_consumers; i++)
2295 for (--i; i >= 0; --i) 2323 if (consumers[i].ret == 0)
2296 regulator_disable(consumers[i].consumer); 2324 regulator_disable(consumers[i].consumer);
2325 else
2326 pr_err("Failed to enable %s: %d\n",
2327 consumers[i].supply, consumers[i].ret);
2297 2328
2298 return ret; 2329 return ret;
2299} 2330}
@@ -2589,9 +2620,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
2589 rdev->owner = regulator_desc->owner; 2620 rdev->owner = regulator_desc->owner;
2590 rdev->desc = regulator_desc; 2621 rdev->desc = regulator_desc;
2591 INIT_LIST_HEAD(&rdev->consumer_list); 2622 INIT_LIST_HEAD(&rdev->consumer_list);
2592 INIT_LIST_HEAD(&rdev->supply_list);
2593 INIT_LIST_HEAD(&rdev->list); 2623 INIT_LIST_HEAD(&rdev->list);
2594 INIT_LIST_HEAD(&rdev->slist);
2595 BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier); 2624 BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier);
2596 2625
2597 /* preform any regulator specific init */ 2626 /* preform any regulator specific init */
@@ -2672,6 +2701,7 @@ unset_supplies:
2672 unset_regulator_supplies(rdev); 2701 unset_regulator_supplies(rdev);
2673 2702
2674scrub: 2703scrub:
2704 kfree(rdev->constraints);
2675 device_unregister(&rdev->dev); 2705 device_unregister(&rdev->dev);
2676 /* device core frees rdev */ 2706 /* device core frees rdev */
2677 rdev = ERR_PTR(ret); 2707 rdev = ERR_PTR(ret);
@@ -2703,7 +2733,7 @@ void regulator_unregister(struct regulator_dev *rdev)
2703 unset_regulator_supplies(rdev); 2733 unset_regulator_supplies(rdev);
2704 list_del(&rdev->list); 2734 list_del(&rdev->list);
2705 if (rdev->supply) 2735 if (rdev->supply)
2706 sysfs_remove_link(&rdev->dev.kobj, "supply"); 2736 regulator_put(rdev->supply);
2707 device_unregister(&rdev->dev); 2737 device_unregister(&rdev->dev);
2708 kfree(rdev->constraints); 2738 kfree(rdev->constraints);
2709 mutex_unlock(&regulator_list_mutex); 2739 mutex_unlock(&regulator_list_mutex);
diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c
index c7410bde7b5..f6ef6694ab9 100644
--- a/drivers/regulator/dummy.c
+++ b/drivers/regulator/dummy.c
@@ -36,6 +36,29 @@ static struct regulator_desc dummy_desc = {
36 .ops = &dummy_ops, 36 .ops = &dummy_ops,
37}; 37};
38 38
39static int __devinit dummy_regulator_probe(struct platform_device *pdev)
40{
41 int ret;
42
43 dummy_regulator_rdev = regulator_register(&dummy_desc, NULL,
44 &dummy_initdata, NULL);
45 if (IS_ERR(dummy_regulator_rdev)) {
46 ret = PTR_ERR(dummy_regulator_rdev);
47 pr_err("Failed to register regulator: %d\n", ret);
48 return ret;
49 }
50
51 return 0;
52}
53
54static struct platform_driver dummy_regulator_driver = {
55 .probe = dummy_regulator_probe,
56 .driver = {
57 .name = "reg-dummy",
58 .owner = THIS_MODULE,
59 },
60};
61
39static struct platform_device *dummy_pdev; 62static struct platform_device *dummy_pdev;
40 63
41void __init regulator_dummy_init(void) 64void __init regulator_dummy_init(void)
@@ -55,12 +78,9 @@ void __init regulator_dummy_init(void)
55 return; 78 return;
56 } 79 }
57 80
58 dummy_regulator_rdev = regulator_register(&dummy_desc, NULL, 81 ret = platform_driver_register(&dummy_regulator_driver);
59 &dummy_initdata, NULL); 82 if (ret != 0) {
60 if (IS_ERR(dummy_regulator_rdev)) { 83 pr_err("Failed to register dummy regulator driver: %d\n", ret);
61 ret = PTR_ERR(dummy_regulator_rdev);
62 pr_err("Failed to register regulator: %d\n", ret);
63 platform_device_unregister(dummy_pdev); 84 platform_device_unregister(dummy_pdev);
64 return;
65 } 85 }
66} 86}
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index 55dd4e6650d..66d2d60b436 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -49,7 +49,6 @@
49#define TPS65911_REG_LDO7 11 49#define TPS65911_REG_LDO7 11
50#define TPS65911_REG_LDO8 12 50#define TPS65911_REG_LDO8 12
51 51
52#define TPS65910_NUM_REGULATOR 13
53#define TPS65910_SUPPLY_STATE_ENABLED 0x1 52#define TPS65910_SUPPLY_STATE_ENABLED 0x1
54 53
55/* supported VIO voltages in milivolts */ 54/* supported VIO voltages in milivolts */
@@ -264,11 +263,12 @@ static struct tps_info tps65911_regs[] = {
264}; 263};
265 264
266struct tps65910_reg { 265struct tps65910_reg {
267 struct regulator_desc desc[TPS65910_NUM_REGULATOR]; 266 struct regulator_desc *desc;
268 struct tps65910 *mfd; 267 struct tps65910 *mfd;
269 struct regulator_dev *rdev[TPS65910_NUM_REGULATOR]; 268 struct regulator_dev **rdev;
270 struct tps_info *info[TPS65910_NUM_REGULATOR]; 269 struct tps_info **info;
271 struct mutex mutex; 270 struct mutex mutex;
271 int num_regulators;
272 int mode; 272 int mode;
273 int (*get_ctrl_reg)(int); 273 int (*get_ctrl_reg)(int);
274}; 274};
@@ -759,8 +759,13 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev,
759 mult = (selector / VDD1_2_NUM_VOLTS) + 1; 759 mult = (selector / VDD1_2_NUM_VOLTS) + 1;
760 volt = VDD1_2_MIN_VOLT + 760 volt = VDD1_2_MIN_VOLT +
761 (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET; 761 (selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
762 break;
762 case TPS65911_REG_VDDCTRL: 763 case TPS65911_REG_VDDCTRL:
763 volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET); 764 volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET);
765 break;
766 default:
767 BUG();
768 return -EINVAL;
764 } 769 }
765 770
766 return volt * 100 * mult; 771 return volt * 100 * mult;
@@ -897,16 +902,42 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
897 switch(tps65910_chip_id(tps65910)) { 902 switch(tps65910_chip_id(tps65910)) {
898 case TPS65910: 903 case TPS65910:
899 pmic->get_ctrl_reg = &tps65910_get_ctrl_register; 904 pmic->get_ctrl_reg = &tps65910_get_ctrl_register;
905 pmic->num_regulators = ARRAY_SIZE(tps65910_regs);
900 info = tps65910_regs; 906 info = tps65910_regs;
907 break;
901 case TPS65911: 908 case TPS65911:
902 pmic->get_ctrl_reg = &tps65911_get_ctrl_register; 909 pmic->get_ctrl_reg = &tps65911_get_ctrl_register;
910 pmic->num_regulators = ARRAY_SIZE(tps65911_regs);
903 info = tps65911_regs; 911 info = tps65911_regs;
912 break;
904 default: 913 default:
905 pr_err("Invalid tps chip version\n"); 914 pr_err("Invalid tps chip version\n");
915 kfree(pmic);
906 return -ENODEV; 916 return -ENODEV;
907 } 917 }
908 918
909 for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) { 919 pmic->desc = kcalloc(pmic->num_regulators,
920 sizeof(struct regulator_desc), GFP_KERNEL);
921 if (!pmic->desc) {
922 err = -ENOMEM;
923 goto err_free_pmic;
924 }
925
926 pmic->info = kcalloc(pmic->num_regulators,
927 sizeof(struct tps_info *), GFP_KERNEL);
928 if (!pmic->info) {
929 err = -ENOMEM;
930 goto err_free_desc;
931 }
932
933 pmic->rdev = kcalloc(pmic->num_regulators,
934 sizeof(struct regulator_dev *), GFP_KERNEL);
935 if (!pmic->rdev) {
936 err = -ENOMEM;
937 goto err_free_info;
938 }
939
940 for (i = 0; i < pmic->num_regulators; i++, info++, reg_data++) {
910 /* Register the regulators */ 941 /* Register the regulators */
911 pmic->info[i] = info; 942 pmic->info[i] = info;
912 943
@@ -938,7 +969,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
938 "failed to register %s regulator\n", 969 "failed to register %s regulator\n",
939 pdev->name); 970 pdev->name);
940 err = PTR_ERR(rdev); 971 err = PTR_ERR(rdev);
941 goto err; 972 goto err_unregister_regulator;
942 } 973 }
943 974
944 /* Save regulator for cleanup */ 975 /* Save regulator for cleanup */
@@ -946,23 +977,31 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
946 } 977 }
947 return 0; 978 return 0;
948 979
949err: 980err_unregister_regulator:
950 while (--i >= 0) 981 while (--i >= 0)
951 regulator_unregister(pmic->rdev[i]); 982 regulator_unregister(pmic->rdev[i]);
952 983 kfree(pmic->rdev);
984err_free_info:
985 kfree(pmic->info);
986err_free_desc:
987 kfree(pmic->desc);
988err_free_pmic:
953 kfree(pmic); 989 kfree(pmic);
954 return err; 990 return err;
955} 991}
956 992
957static int __devexit tps65910_remove(struct platform_device *pdev) 993static int __devexit tps65910_remove(struct platform_device *pdev)
958{ 994{
959 struct tps65910_reg *tps65910_reg = platform_get_drvdata(pdev); 995 struct tps65910_reg *pmic = platform_get_drvdata(pdev);
960 int i; 996 int i;
961 997
962 for (i = 0; i < TPS65910_NUM_REGULATOR; i++) 998 for (i = 0; i < pmic->num_regulators; i++)
963 regulator_unregister(tps65910_reg->rdev[i]); 999 regulator_unregister(pmic->rdev[i]);
964 1000
965 kfree(tps65910_reg); 1001 kfree(pmic->rdev);
1002 kfree(pmic->info);
1003 kfree(pmic->desc);
1004 kfree(pmic);
966 return 0; 1005 return 0;
967} 1006}
968 1007
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 87fe0f75a56..ee8747f4fa0 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -835,8 +835,8 @@ static struct regulator_ops twlsmps_ops = {
835 remap_conf) \ 835 remap_conf) \
836 TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ 836 TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
837 remap_conf, TWL4030, twl4030fixed_ops) 837 remap_conf, TWL4030, twl4030fixed_ops)
838#define TWL6030_FIXED_LDO(label, offset, mVolts, num, turnon_delay) \ 838#define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \
839 TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \ 839 TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \
840 0x0, TWL6030, twl6030fixed_ops) 840 0x0, TWL6030, twl6030fixed_ops)
841 841
842#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \ 842#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \
@@ -856,24 +856,22 @@ static struct regulator_ops twlsmps_ops = {
856 }, \ 856 }, \
857 } 857 }
858 858
859#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ 859#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \
860 .base = offset, \ 860 .base = offset, \
861 .id = num, \
862 .min_mV = min_mVolts, \ 861 .min_mV = min_mVolts, \
863 .max_mV = max_mVolts, \ 862 .max_mV = max_mVolts, \
864 .desc = { \ 863 .desc = { \
865 .name = #label, \ 864 .name = #label, \
866 .id = TWL6030_REG_##label, \ 865 .id = TWL6030_REG_##label, \
867 .n_voltages = (max_mVolts - min_mVolts)/100, \ 866 .n_voltages = (max_mVolts - min_mVolts)/100 + 1, \
868 .ops = &twl6030ldo_ops, \ 867 .ops = &twl6030ldo_ops, \
869 .type = REGULATOR_VOLTAGE, \ 868 .type = REGULATOR_VOLTAGE, \
870 .owner = THIS_MODULE, \ 869 .owner = THIS_MODULE, \
871 }, \ 870 }, \
872 } 871 }
873 872
874#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \ 873#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \
875 .base = offset, \ 874 .base = offset, \
876 .id = num, \
877 .min_mV = min_mVolts, \ 875 .min_mV = min_mVolts, \
878 .max_mV = max_mVolts, \ 876 .max_mV = max_mVolts, \
879 .desc = { \ 877 .desc = { \
@@ -903,9 +901,8 @@ static struct regulator_ops twlsmps_ops = {
903 }, \ 901 }, \
904 } 902 }
905 903
906#define TWL6030_FIXED_RESOURCE(label, offset, num, turnon_delay) { \ 904#define TWL6030_FIXED_RESOURCE(label, offset, turnon_delay) { \
907 .base = offset, \ 905 .base = offset, \
908 .id = num, \
909 .delay = turnon_delay, \ 906 .delay = turnon_delay, \
910 .desc = { \ 907 .desc = { \
911 .name = #label, \ 908 .name = #label, \
@@ -916,9 +913,8 @@ static struct regulator_ops twlsmps_ops = {
916 }, \ 913 }, \
917 } 914 }
918 915
919#define TWL6025_ADJUSTABLE_SMPS(label, offset, num) { \ 916#define TWL6025_ADJUSTABLE_SMPS(label, offset) { \
920 .base = offset, \ 917 .base = offset, \
921 .id = num, \
922 .min_mV = 600, \ 918 .min_mV = 600, \
923 .max_mV = 2100, \ 919 .max_mV = 2100, \
924 .desc = { \ 920 .desc = { \
@@ -961,32 +957,32 @@ static struct twlreg_info twl_regs[] = {
961 /* 6030 REG with base as PMC Slave Misc : 0x0030 */ 957 /* 6030 REG with base as PMC Slave Misc : 0x0030 */
962 /* Turnon-delay and remap configuration values for 6030 are not 958 /* Turnon-delay and remap configuration values for 6030 are not
963 verified since the specification is not public */ 959 verified since the specification is not public */
964 TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300, 1), 960 TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300),
965 TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300, 2), 961 TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300),
966 TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300, 3), 962 TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300),
967 TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300, 4), 963 TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300),
968 TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300, 5), 964 TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300),
969 TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300, 7), 965 TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300),
970 TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0), 966 TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0),
971 TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0), 967 TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0),
972 TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0), 968 TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0),
973 TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0), 969 TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0),
974 TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 48, 0), 970 TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0),
975 971
976 /* 6025 are renamed compared to 6030 versions */ 972 /* 6025 are renamed compared to 6030 versions */
977 TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300, 1), 973 TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300),
978 TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300, 2), 974 TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300),
979 TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300, 3), 975 TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300),
980 TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300, 4), 976 TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300),
981 TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300, 5), 977 TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300),
982 TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300, 7), 978 TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300),
983 TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300, 16), 979 TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300),
984 TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300, 17), 980 TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300),
985 TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300, 18), 981 TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300),
986 982
987 TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34, 1), 983 TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34),
988 TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10, 2), 984 TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10),
989 TWL6025_ADJUSTABLE_SMPS(VIO, 0x16, 3), 985 TWL6025_ADJUSTABLE_SMPS(VIO, 0x16),
990}; 986};
991 987
992static u8 twl_get_smps_offset(void) 988static u8 twl_get_smps_offset(void)
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index a0982e80985..bd3531d8b2a 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -267,23 +267,6 @@ static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev,
267 return vsel; 267 return vsel;
268} 268}
269 269
270static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev,
271 int min_uV, int max_uV)
272{
273 u16 vsel;
274
275 if (max_uV < 600000 || max_uV > 1800000)
276 return -EINVAL;
277
278 vsel = ((max_uV - 600000) / 12500) + 8;
279
280 if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV ||
281 wm831x_buckv_list_voltage(rdev, vsel) < max_uV)
282 return -EINVAL;
283
284 return vsel;
285}
286
287static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state) 270static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
288{ 271{
289 struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev); 272 struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
@@ -338,28 +321,23 @@ static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
338 if (ret < 0) 321 if (ret < 0)
339 return ret; 322 return ret;
340 323
341 /* Set the high voltage as the DVS voltage. This is optimised 324 /*
342 * for CPUfreq usage, most processors will keep the maximum 325 * If this VSEL is higher than the last one we've seen then
343 * voltage constant and lower the minimum with the frequency. */ 326 * remember it as the DVS VSEL. This is optimised for CPUfreq
344 vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV); 327 * usage where we want to get to the highest voltage very
345 if (vsel < 0) { 328 * quickly.
346 /* This should never happen - at worst the same vsel 329 */
347 * should be chosen */ 330 if (vsel > dcdc->dvs_vsel) {
348 WARN_ON(vsel < 0); 331 ret = wm831x_set_bits(wm831x, dvs_reg,
349 return 0; 332 WM831X_DC1_DVS_VSEL_MASK,
333 dcdc->dvs_vsel);
334 if (ret == 0)
335 dcdc->dvs_vsel = vsel;
336 else
337 dev_warn(wm831x->dev,
338 "Failed to set DCDC DVS VSEL: %d\n", ret);
350 } 339 }
351 340
352 /* Don't bother if it's the same VSEL we're already using */
353 if (vsel == dcdc->on_vsel)
354 return 0;
355
356 ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel);
357 if (ret == 0)
358 dcdc->dvs_vsel = vsel;
359 else
360 dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n",
361 ret);
362
363 return 0; 341 return 0;
364} 342}
365 343
@@ -456,27 +434,6 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
456 if (!pdata || !pdata->dvs_gpio) 434 if (!pdata || !pdata->dvs_gpio)
457 return; 435 return;
458 436
459 switch (pdata->dvs_control_src) {
460 case 1:
461 ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
462 break;
463 case 2:
464 ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
465 break;
466 default:
467 dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
468 pdata->dvs_control_src, dcdc->name);
469 return;
470 }
471
472 ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
473 WM831X_DC1_DVS_SRC_MASK, ctrl);
474 if (ret < 0) {
475 dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
476 dcdc->name, ret);
477 return;
478 }
479
480 ret = gpio_request(pdata->dvs_gpio, "DCDC DVS"); 437 ret = gpio_request(pdata->dvs_gpio, "DCDC DVS");
481 if (ret < 0) { 438 if (ret < 0) {
482 dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n", 439 dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
@@ -498,17 +455,57 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
498 } 455 }
499 456
500 dcdc->dvs_gpio = pdata->dvs_gpio; 457 dcdc->dvs_gpio = pdata->dvs_gpio;
458
459 switch (pdata->dvs_control_src) {
460 case 1:
461 ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
462 break;
463 case 2:
464 ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
465 break;
466 default:
467 dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
468 pdata->dvs_control_src, dcdc->name);
469 return;
470 }
471
472 /* If DVS_VSEL is set to the minimum value then raise it to ON_VSEL
473 * to make bootstrapping a bit smoother.
474 */
475 if (!dcdc->dvs_vsel) {
476 ret = wm831x_set_bits(wm831x,
477 dcdc->base + WM831X_DCDC_DVS_CONTROL,
478 WM831X_DC1_DVS_VSEL_MASK, dcdc->on_vsel);
479 if (ret == 0)
480 dcdc->dvs_vsel = dcdc->on_vsel;
481 else
482 dev_warn(wm831x->dev, "Failed to set DVS_VSEL: %d\n",
483 ret);
484 }
485
486 ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
487 WM831X_DC1_DVS_SRC_MASK, ctrl);
488 if (ret < 0) {
489 dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
490 dcdc->name, ret);
491 }
501} 492}
502 493
503static __devinit int wm831x_buckv_probe(struct platform_device *pdev) 494static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
504{ 495{
505 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 496 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
506 struct wm831x_pdata *pdata = wm831x->dev->platform_data; 497 struct wm831x_pdata *pdata = wm831x->dev->platform_data;
507 int id = pdev->id % ARRAY_SIZE(pdata->dcdc); 498 int id;
508 struct wm831x_dcdc *dcdc; 499 struct wm831x_dcdc *dcdc;
509 struct resource *res; 500 struct resource *res;
510 int ret, irq; 501 int ret, irq;
511 502
503 if (pdata && pdata->wm831x_num)
504 id = (pdata->wm831x_num * 10) + 1;
505 else
506 id = 0;
507 id = pdev->id - id;
508
512 dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); 509 dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
513 510
514 if (pdata == NULL || pdata->dcdc[id] == NULL) 511 if (pdata == NULL || pdata->dcdc[id] == NULL)
@@ -545,7 +542,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
545 } 542 }
546 dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK; 543 dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK;
547 544
548 ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG); 545 ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL);
549 if (ret < 0) { 546 if (ret < 0) {
550 dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret); 547 dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret);
551 goto err; 548 goto err;
@@ -709,11 +706,17 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
709{ 706{
710 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 707 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
711 struct wm831x_pdata *pdata = wm831x->dev->platform_data; 708 struct wm831x_pdata *pdata = wm831x->dev->platform_data;
712 int id = pdev->id % ARRAY_SIZE(pdata->dcdc); 709 int id;
713 struct wm831x_dcdc *dcdc; 710 struct wm831x_dcdc *dcdc;
714 struct resource *res; 711 struct resource *res;
715 int ret, irq; 712 int ret, irq;
716 713
714 if (pdata && pdata->wm831x_num)
715 id = (pdata->wm831x_num * 10) + 1;
716 else
717 id = 0;
718 id = pdev->id - id;
719
717 dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1); 720 dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
718 721
719 if (pdata == NULL || pdata->dcdc[id] == NULL) 722 if (pdata == NULL || pdata->dcdc[id] == NULL)
@@ -1046,3 +1049,4 @@ MODULE_DESCRIPTION("WM831x DC-DC convertor driver");
1046MODULE_LICENSE("GPL"); 1049MODULE_LICENSE("GPL");
1047MODULE_ALIAS("platform:wm831x-buckv"); 1050MODULE_ALIAS("platform:wm831x-buckv");
1048MODULE_ALIAS("platform:wm831x-buckp"); 1051MODULE_ALIAS("platform:wm831x-buckp");
1052MODULE_ALIAS("platform:wm831x-epe");
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index 2220cf8defb..6709710a059 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -310,11 +310,17 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
310{ 310{
311 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 311 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
312 struct wm831x_pdata *pdata = wm831x->dev->platform_data; 312 struct wm831x_pdata *pdata = wm831x->dev->platform_data;
313 int id = pdev->id % ARRAY_SIZE(pdata->ldo); 313 int id;
314 struct wm831x_ldo *ldo; 314 struct wm831x_ldo *ldo;
315 struct resource *res; 315 struct resource *res;
316 int ret, irq; 316 int ret, irq;
317 317
318 if (pdata && pdata->wm831x_num)
319 id = (pdata->wm831x_num * 10) + 1;
320 else
321 id = 0;
322 id = pdev->id - id;
323
318 dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); 324 dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
319 325
320 if (pdata == NULL || pdata->ldo[id] == NULL) 326 if (pdata == NULL || pdata->ldo[id] == NULL)
@@ -574,11 +580,17 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
574{ 580{
575 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 581 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
576 struct wm831x_pdata *pdata = wm831x->dev->platform_data; 582 struct wm831x_pdata *pdata = wm831x->dev->platform_data;
577 int id = pdev->id % ARRAY_SIZE(pdata->ldo); 583 int id;
578 struct wm831x_ldo *ldo; 584 struct wm831x_ldo *ldo;
579 struct resource *res; 585 struct resource *res;
580 int ret, irq; 586 int ret, irq;
581 587
588 if (pdata && pdata->wm831x_num)
589 id = (pdata->wm831x_num * 10) + 1;
590 else
591 id = 0;
592 id = pdev->id - id;
593
582 dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); 594 dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
583 595
584 if (pdata == NULL || pdata->ldo[id] == NULL) 596 if (pdata == NULL || pdata->ldo[id] == NULL)
@@ -764,11 +776,18 @@ static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev)
764{ 776{
765 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent); 777 struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
766 struct wm831x_pdata *pdata = wm831x->dev->platform_data; 778 struct wm831x_pdata *pdata = wm831x->dev->platform_data;
767 int id = pdev->id % ARRAY_SIZE(pdata->ldo); 779 int id;
768 struct wm831x_ldo *ldo; 780 struct wm831x_ldo *ldo;
769 struct resource *res; 781 struct resource *res;
770 int ret; 782 int ret;
771 783
784 if (pdata && pdata->wm831x_num)
785 id = (pdata->wm831x_num * 10) + 1;
786 else
787 id = 0;
788 id = pdev->id - id;
789
790
772 dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1); 791 dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
773 792
774 if (pdata == NULL || pdata->ldo[id] == NULL) 793 if (pdata == NULL || pdata->ldo[id] == NULL)
diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c
index 35b2958d510..1a6a690f24d 100644
--- a/drivers/regulator/wm8994-regulator.c
+++ b/drivers/regulator/wm8994-regulator.c
@@ -43,7 +43,7 @@ static int wm8994_ldo_enable(struct regulator_dev *rdev)
43 if (!ldo->enable) 43 if (!ldo->enable)
44 return 0; 44 return 0;
45 45
46 gpio_set_value(ldo->enable, 1); 46 gpio_set_value_cansleep(ldo->enable, 1);
47 ldo->is_enabled = true; 47 ldo->is_enabled = true;
48 48
49 return 0; 49 return 0;
@@ -57,7 +57,7 @@ static int wm8994_ldo_disable(struct regulator_dev *rdev)
57 if (!ldo->enable) 57 if (!ldo->enable)
58 return -EINVAL; 58 return -EINVAL;
59 59
60 gpio_set_value(ldo->enable, 0); 60 gpio_set_value_cansleep(ldo->enable, 0);
61 ldo->is_enabled = false; 61 ldo->is_enabled = false;
62 62
63 return 0; 63 return 0;
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bcae8dd4149..7789002bdd5 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
368 pr_info("%s: already running\n", pdev->name); 368 pr_info("%s: already running\n", pdev->name);
369 369
370 /* force to 24 hour mode */ 370 /* force to 24 hour mode */
371 new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP); 371 new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
372 new_ctrl |= OMAP_RTC_CTRL_STOP; 372 new_ctrl |= OMAP_RTC_CTRL_STOP;
373 373
374 /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE: 374 /* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 432444af7ee..a1d3ddba99c 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -24,6 +24,7 @@
24#include <linux/mutex.h> 24#include <linux/mutex.h>
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/seq_file.h> 26#include <linux/seq_file.h>
27#include <linux/vmalloc.h>
27 28
28#include <asm/ccwdev.h> 29#include <asm/ccwdev.h>
29#include <asm/ebcdic.h> 30#include <asm/ebcdic.h>
@@ -888,11 +889,11 @@ char *dasd_get_user_string(const char __user *user_buf, size_t user_len)
888{ 889{
889 char *buffer; 890 char *buffer;
890 891
891 buffer = kmalloc(user_len + 1, GFP_KERNEL); 892 buffer = vmalloc(user_len + 1);
892 if (buffer == NULL) 893 if (buffer == NULL)
893 return ERR_PTR(-ENOMEM); 894 return ERR_PTR(-ENOMEM);
894 if (copy_from_user(buffer, user_buf, user_len) != 0) { 895 if (copy_from_user(buffer, user_buf, user_len) != 0) {
895 kfree(buffer); 896 vfree(buffer);
896 return ERR_PTR(-EFAULT); 897 return ERR_PTR(-EFAULT);
897 } 898 }
898 /* got the string, now strip linefeed. */ 899 /* got the string, now strip linefeed. */
@@ -930,7 +931,7 @@ static ssize_t dasd_stats_write(struct file *file,
930 dasd_profile_off(prof); 931 dasd_profile_off(prof);
931 } else 932 } else
932 rc = -EINVAL; 933 rc = -EINVAL;
933 kfree(buffer); 934 vfree(buffer);
934 return rc; 935 return rc;
935} 936}
936 937
@@ -1042,7 +1043,7 @@ static ssize_t dasd_stats_global_write(struct file *file,
1042 dasd_global_profile_level = DASD_PROFILE_OFF; 1043 dasd_global_profile_level = DASD_PROFILE_OFF;
1043 } else 1044 } else
1044 rc = -EINVAL; 1045 rc = -EINVAL;
1045 kfree(buffer); 1046 vfree(buffer);
1046 return rc; 1047 return rc;
1047} 1048}
1048 1049
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 30fb979d684..6e835c9fdfc 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1461,6 +1461,15 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
1461 "Read device characteristic failed, rc=%d", rc); 1461 "Read device characteristic failed, rc=%d", rc);
1462 goto out_err3; 1462 goto out_err3;
1463 } 1463 }
1464
1465 if ((device->features & DASD_FEATURE_USERAW) &&
1466 !(private->rdc_data.facilities.RT_in_LR)) {
1467 dev_err(&device->cdev->dev, "The storage server does not "
1468 "support raw-track access\n");
1469 rc = -EINVAL;
1470 goto out_err3;
1471 }
1472
1464 /* find the valid cylinder size */ 1473 /* find the valid cylinder size */
1465 if (private->rdc_data.no_cyl == LV_COMPAT_CYL && 1474 if (private->rdc_data.no_cyl == LV_COMPAT_CYL &&
1466 private->rdc_data.long_no_cyl) 1475 private->rdc_data.long_no_cyl)
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 6c3c5364d08..e12989fff4f 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -312,14 +312,14 @@ static ssize_t dasd_stats_proc_write(struct file *file,
312 pr_info("The statistics have been reset\n"); 312 pr_info("The statistics have been reset\n");
313 } else 313 } else
314 goto out_parse_error; 314 goto out_parse_error;
315 kfree(buffer); 315 vfree(buffer);
316 return user_len; 316 return user_len;
317out_parse_error: 317out_parse_error:
318 rc = -EINVAL; 318 rc = -EINVAL;
319 pr_warning("%s is not a supported value for /proc/dasd/statistics\n", 319 pr_warning("%s is not a supported value for /proc/dasd/statistics\n",
320 str); 320 str);
321out_error: 321out_error:
322 kfree(buffer); 322 vfree(buffer);
323 return rc; 323 return rc;
324#else 324#else
325 pr_warning("/proc/dasd/statistics: is not activated in this kernel\n"); 325 pr_warning("/proc/dasd/statistics: is not activated in this kernel\n");
diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
index 7ad30e72f86..5f9f929e891 100644
--- a/drivers/s390/char/sclp_async.c
+++ b/drivers/s390/char/sclp_async.c
@@ -82,12 +82,9 @@ static int proc_handler_callhome(struct ctl_table *ctl, int write,
82 return -EFAULT; 82 return -EFAULT;
83 } else { 83 } else {
84 len = *count; 84 len = *count;
85 rc = copy_from_user(buf, buffer, sizeof(buf)); 85 rc = kstrtoul_from_user(buffer, len, 0, &val);
86 if (rc != 0) 86 if (rc)
87 return -EFAULT; 87 return rc;
88 buf[sizeof(buf) - 1] = '\0';
89 if (strict_strtoul(buf, 0, &val) != 0)
90 return -EINVAL;
91 if (val != 0 && val != 1) 88 if (val != 0 && val != 1)
92 return -EINVAL; 89 return -EINVAL;
93 callhome_enabled = val; 90 callhome_enabled = val;
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 7bc643f3f5a..e5c966462c5 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -14,6 +14,8 @@
14#include "chsc.h" 14#include "chsc.h"
15 15
16#define QDIO_BUSY_BIT_PATIENCE (100 << 12) /* 100 microseconds */ 16#define QDIO_BUSY_BIT_PATIENCE (100 << 12) /* 100 microseconds */
17#define QDIO_BUSY_BIT_RETRY_DELAY 10 /* 10 milliseconds */
18#define QDIO_BUSY_BIT_RETRIES 1000 /* = 10s retry time */
17#define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ 19#define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */
18 20
19/* 21/*
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index f8b03a636e4..0e615cb912d 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -188,19 +188,13 @@ static ssize_t qperf_seq_write(struct file *file, const char __user *ubuf,
188 struct qdio_irq *irq_ptr = seq->private; 188 struct qdio_irq *irq_ptr = seq->private;
189 struct qdio_q *q; 189 struct qdio_q *q;
190 unsigned long val; 190 unsigned long val;
191 char buf[8];
192 int ret, i; 191 int ret, i;
193 192
194 if (!irq_ptr) 193 if (!irq_ptr)
195 return 0; 194 return 0;
196 if (count >= sizeof(buf)) 195
197 return -EINVAL; 196 ret = kstrtoul_from_user(ubuf, count, 10, &val);
198 if (copy_from_user(&buf, ubuf, count)) 197 if (ret)
199 return -EFAULT;
200 buf[count] = 0;
201
202 ret = strict_strtoul(buf, 10, &val);
203 if (ret < 0)
204 return ret; 198 return ret;
205 199
206 switch (val) { 200 switch (val) {
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index e58169c3247..288c9140290 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -313,7 +313,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
313 unsigned long schid = *((u32 *) &q->irq_ptr->schid); 313 unsigned long schid = *((u32 *) &q->irq_ptr->schid);
314 unsigned int fc = QDIO_SIGA_WRITE; 314 unsigned int fc = QDIO_SIGA_WRITE;
315 u64 start_time = 0; 315 u64 start_time = 0;
316 int cc; 316 int retries = 0, cc;
317 317
318 if (is_qebsm(q)) { 318 if (is_qebsm(q)) {
319 schid = q->irq_ptr->sch_token; 319 schid = q->irq_ptr->sch_token;
@@ -325,6 +325,7 @@ again:
325 /* hipersocket busy condition */ 325 /* hipersocket busy condition */
326 if (unlikely(*busy_bit)) { 326 if (unlikely(*busy_bit)) {
327 WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2); 327 WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2);
328 retries++;
328 329
329 if (!start_time) { 330 if (!start_time) {
330 start_time = get_clock(); 331 start_time = get_clock();
@@ -333,6 +334,11 @@ again:
333 if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE) 334 if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE)
334 goto again; 335 goto again;
335 } 336 }
337 if (retries) {
338 DBF_DEV_EVENT(DBF_WARN, q->irq_ptr,
339 "%4x cc2 BB1:%1d", SCH_NO(q), q->nr);
340 DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "count:%u", retries);
341 }
336 return cc; 342 return cc;
337} 343}
338 344
@@ -728,13 +734,14 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q)
728 734
729static int qdio_kick_outbound_q(struct qdio_q *q) 735static int qdio_kick_outbound_q(struct qdio_q *q)
730{ 736{
737 int retries = 0, cc;
731 unsigned int busy_bit; 738 unsigned int busy_bit;
732 int cc;
733 739
734 if (!need_siga_out(q)) 740 if (!need_siga_out(q))
735 return 0; 741 return 0;
736 742
737 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr); 743 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr);
744retry:
738 qperf_inc(q, siga_write); 745 qperf_inc(q, siga_write);
739 746
740 cc = qdio_siga_output(q, &busy_bit); 747 cc = qdio_siga_output(q, &busy_bit);
@@ -743,7 +750,11 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
743 break; 750 break;
744 case 2: 751 case 2:
745 if (busy_bit) { 752 if (busy_bit) {
746 DBF_ERROR("%4x cc2 REP:%1d", SCH_NO(q), q->nr); 753 while (++retries < QDIO_BUSY_BIT_RETRIES) {
754 mdelay(QDIO_BUSY_BIT_RETRY_DELAY);
755 goto retry;
756 }
757 DBF_ERROR("%4x cc2 BBC:%1d", SCH_NO(q), q->nr);
747 cc |= QDIO_ERROR_SIGA_BUSY; 758 cc |= QDIO_ERROR_SIGA_BUSY;
748 } else 759 } else
749 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr); 760 DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr);
@@ -753,6 +764,10 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
753 DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc); 764 DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc);
754 break; 765 break;
755 } 766 }
767 if (retries) {
768 DBF_ERROR("%4x cc2 BB2:%1d", SCH_NO(q), q->nr);
769 DBF_ERROR("count:%u", retries);
770 }
756 return cc; 771 return cc;
757} 772}
758 773
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index eba88c749fb..730b4a37b82 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -2267,17 +2267,13 @@ static int __devexit
2267pl022_remove(struct amba_device *adev) 2267pl022_remove(struct amba_device *adev)
2268{ 2268{
2269 struct pl022 *pl022 = amba_get_drvdata(adev); 2269 struct pl022 *pl022 = amba_get_drvdata(adev);
2270 int status = 0; 2270
2271 if (!pl022) 2271 if (!pl022)
2272 return 0; 2272 return 0;
2273 2273
2274 /* Remove the queue */ 2274 /* Remove the queue */
2275 status = destroy_queue(pl022); 2275 if (destroy_queue(pl022) != 0)
2276 if (status != 0) { 2276 dev_err(&adev->dev, "queue remove failed\n");
2277 dev_err(&adev->dev,
2278 "queue remove failed (%d)\n", status);
2279 return status;
2280 }
2281 load_ssp_default_config(pl022); 2277 load_ssp_default_config(pl022);
2282 pl022_dma_remove(pl022); 2278 pl022_dma_remove(pl022);
2283 free_irq(adev->irq[0], pl022); 2279 free_irq(adev->irq[0], pl022);
@@ -2289,7 +2285,6 @@ pl022_remove(struct amba_device *adev)
2289 spi_unregister_master(pl022->master); 2285 spi_unregister_master(pl022->master);
2290 spi_master_put(pl022->master); 2286 spi_master_put(pl022->master);
2291 amba_set_drvdata(adev, NULL); 2287 amba_set_drvdata(adev, NULL);
2292 dev_dbg(&adev->dev, "remove succeeded\n");
2293 return 0; 2288 return 0;
2294} 2289}
2295 2290
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 564ff4e0dbc..8345fb457a4 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,5 +1,6 @@
1config ISCSI_TARGET 1config ISCSI_TARGET
2 tristate "Linux-iSCSI.org iSCSI Target Mode Stack" 2 tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
3 depends on NET
3 select CRYPTO 4 select CRYPTO
4 select CRYPTO_CRC32C 5 select CRYPTO_CRC32C
5 select CRYPTO_CRC32C_INTEL if X86 6 select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 14c81c4265b..c24fb10de60 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
120 struct iscsi_tiqn *tiqn = NULL; 120 struct iscsi_tiqn *tiqn = NULL;
121 int ret; 121 int ret;
122 122
123 if (strlen(buf) > ISCSI_IQN_LEN) { 123 if (strlen(buf) >= ISCSI_IQN_LEN) {
124 pr_err("Target IQN exceeds %d bytes\n", 124 pr_err("Target IQN exceeds %d bytes\n",
125 ISCSI_IQN_LEN); 125 ISCSI_IQN_LEN);
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
@@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd(
1857 char *text_ptr, *text_in; 1857 char *text_ptr, *text_in;
1858 int cmdsn_ret, niov = 0, rx_got, rx_size; 1858 int cmdsn_ret, niov = 0, rx_got, rx_size;
1859 u32 checksum = 0, data_crc = 0, payload_length; 1859 u32 checksum = 0, data_crc = 0, payload_length;
1860 u32 padding = 0, text_length = 0; 1860 u32 padding = 0, pad_bytes = 0, text_length = 0;
1861 struct iscsi_cmd *cmd; 1861 struct iscsi_cmd *cmd;
1862 struct kvec iov[3]; 1862 struct kvec iov[3];
1863 struct iscsi_text *hdr; 1863 struct iscsi_text *hdr;
@@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd(
1896 1896
1897 padding = ((-payload_length) & 3); 1897 padding = ((-payload_length) & 3);
1898 if (padding != 0) { 1898 if (padding != 0) {
1899 iov[niov].iov_base = cmd->pad_bytes; 1899 iov[niov].iov_base = &pad_bytes;
1900 iov[niov++].iov_len = padding; 1900 iov[niov++].iov_len = padding;
1901 rx_size += padding; 1901 rx_size += padding;
1902 pr_debug("Receiving %u additional bytes" 1902 pr_debug("Receiving %u additional bytes"
@@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd(
1917 if (conn->conn_ops->DataDigest) { 1917 if (conn->conn_ops->DataDigest) {
1918 iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, 1918 iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
1919 text_in, text_length, 1919 text_in, text_length,
1920 padding, cmd->pad_bytes, 1920 padding, (u8 *)&pad_bytes,
1921 (u8 *)&data_crc); 1921 (u8 *)&data_crc);
1922 1922
1923 if (checksum != data_crc) { 1923 if (checksum != data_crc) {
@@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask(
3468} 3468}
3469 3469
3470#else 3470#else
3471#define iscsit_thread_get_cpumask(X) ({}) 3471
3472void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
3473{
3474 return;
3475}
3476
3472#define iscsit_thread_check_cpumask(X, Y, Z) ({}) 3477#define iscsit_thread_check_cpumask(X, Y, Z) ({})
3473#endif /* CONFIG_SMP */ 3478#endif /* CONFIG_SMP */
3474 3479
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 32bb92c4445..f095e65b1cc 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
181 return ERR_PTR(-EOVERFLOW); 181 return ERR_PTR(-EOVERFLOW);
182 } 182 }
183 memset(buf, 0, MAX_PORTAL_LEN + 1); 183 memset(buf, 0, MAX_PORTAL_LEN + 1);
184 snprintf(buf, MAX_PORTAL_LEN, "%s", name); 184 snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
185 185
186 memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); 186 memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
187 187
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 713a4d23557..4d087ac1106 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation(
978 pr_err("Unable to allocate memory for struct iscsi_login.\n"); 978 pr_err("Unable to allocate memory for struct iscsi_login.\n");
979 iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, 979 iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
980 ISCSI_LOGIN_STATUS_NO_RESOURCES); 980 ISCSI_LOGIN_STATUS_NO_RESOURCES);
981 goto out; 981 return NULL;
982 } 982 }
983 983
984 login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); 984 login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index c75a01a1c47..89760329d5d 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb(
1747} 1747}
1748EXPORT_SYMBOL(transport_generic_handle_cdb); 1748EXPORT_SYMBOL(transport_generic_handle_cdb);
1749 1749
1750static void transport_generic_request_failure(struct se_cmd *,
1751 struct se_device *, int, int);
1750/* 1752/*
1751 * Used by fabric module frontends to queue tasks directly. 1753 * Used by fabric module frontends to queue tasks directly.
1752 * Many only be used from process context only 1754 * Many only be used from process context only
@@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb);
1754int transport_handle_cdb_direct( 1756int transport_handle_cdb_direct(
1755 struct se_cmd *cmd) 1757 struct se_cmd *cmd)
1756{ 1758{
1759 int ret;
1760
1757 if (!cmd->se_lun) { 1761 if (!cmd->se_lun) {
1758 dump_stack(); 1762 dump_stack();
1759 pr_err("cmd->se_lun is NULL\n"); 1763 pr_err("cmd->se_lun is NULL\n");
@@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct(
1765 " from interrupt context\n"); 1769 " from interrupt context\n");
1766 return -EINVAL; 1770 return -EINVAL;
1767 } 1771 }
1768 1772 /*
1769 return transport_generic_new_cmd(cmd); 1773 * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
1774 * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
1775 * in existing usage to ensure that outstanding descriptors are handled
1776 * correctly during shutdown via transport_generic_wait_for_tasks()
1777 *
1778 * Also, we don't take cmd->t_state_lock here as we only expect
1779 * this to be called for initial descriptor submission.
1780 */
1781 cmd->t_state = TRANSPORT_NEW_CMD;
1782 atomic_set(&cmd->t_transport_active, 1);
1783 /*
1784 * transport_generic_new_cmd() is already handling QUEUE_FULL,
1785 * so follow TRANSPORT_NEW_CMD processing thread context usage
1786 * and call transport_generic_request_failure() if necessary..
1787 */
1788 ret = transport_generic_new_cmd(cmd);
1789 if (ret == -EAGAIN)
1790 return 0;
1791 else if (ret < 0) {
1792 cmd->transport_error_status = ret;
1793 transport_generic_request_failure(cmd, NULL, 0,
1794 (cmd->data_direction != DMA_TO_DEVICE));
1795 }
1796 return 0;
1770} 1797}
1771EXPORT_SYMBOL(transport_handle_cdb_direct); 1798EXPORT_SYMBOL(transport_handle_cdb_direct);
1772 1799
@@ -3324,7 +3351,7 @@ static int transport_generic_cmd_sequencer(
3324 goto out_invalid_cdb_field; 3351 goto out_invalid_cdb_field;
3325 } 3352 }
3326 3353
3327 cmd->t_task_lba = get_unaligned_be16(&cdb[2]); 3354 cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
3328 passthrough = (dev->transport->transport_type == 3355 passthrough = (dev->transport->transport_type ==
3329 TRANSPORT_PLUGIN_PHBA_PDEV); 3356 TRANSPORT_PLUGIN_PHBA_PDEV);
3330 /* 3357 /*
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index f7fff7ed63c..bd4fe21a23b 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller);
187 187
188ssize_t ft_format_wwn(char *, size_t, u64); 188ssize_t ft_format_wwn(char *, size_t, u64);
189 189
190/*
191 * Underlying HW specific helper function
192 */
193void ft_invl_hw_context(struct ft_cmd *);
194
190#endif /* __TCM_FC_H__ */ 195#endif /* __TCM_FC_H__ */
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 09df38b4610..5654dc22f7a 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
320 default: 320 default:
321 pr_debug("%s: unhandled frame r_ctl %x\n", 321 pr_debug("%s: unhandled frame r_ctl %x\n",
322 __func__, fh->fh_r_ctl); 322 __func__, fh->fh_r_ctl);
323 ft_invl_hw_context(cmd);
323 fc_frame_free(fp); 324 fc_frame_free(fp);
324 transport_generic_free_cmd(&cmd->se_cmd, 0, 0); 325 transport_generic_free_cmd(&cmd->se_cmd, 0, 0);
325 break; 326 break;
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index 8e2a46ddccc..c37f4cd9645 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -213,62 +213,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
213 if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF)) 213 if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
214 goto drop; 214 goto drop;
215 215
216 f_ctl = ntoh24(fh->fh_f_ctl);
217 ep = fc_seq_exch(seq);
218 lport = ep->lp;
219 if (cmd->was_ddp_setup) {
220 BUG_ON(!ep);
221 BUG_ON(!lport);
222 }
223
216 /* 224 /*
217 * Doesn't expect even single byte of payload. Payload 225 * Doesn't expect payload if DDP is setup. Payload
218 * is expected to be copied directly to user buffers 226 * is expected to be copied directly to user buffers
219 * due to DDP (Large Rx offload) feature, hence 227 * due to DDP (Large Rx offload),
220 * BUG_ON if BUF is non-NULL
221 */ 228 */
222 buf = fc_frame_payload_get(fp, 1); 229 buf = fc_frame_payload_get(fp, 1);
223 if (cmd->was_ddp_setup && buf) { 230 if (buf)
224 pr_debug("%s: When DDP was setup, not expected to" 231 pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
225 "receive frame with payload, Payload shall be" 232 "cmd->sg_cnt 0x%x. DDP was setup"
226 "copied directly to buffer instead of coming " 233 " hence not expected to receive frame with "
227 "via. legacy receive queues\n", __func__); 234 "payload, Frame will be dropped if "
228 BUG_ON(buf); 235 "'Sequence Initiative' bit in f_ctl is "
229 } 236 "not set\n", __func__, ep->xid, f_ctl,
237 cmd->sg, cmd->sg_cnt);
238 /*
239 * Invalidate HW DDP context if it was setup for respective
240 * command. Invalidation of HW DDP context is requited in both
241 * situation (success and error).
242 */
243 ft_invl_hw_context(cmd);
230 244
231 /* 245 /*
232 * If ft_cmd indicated 'ddp_setup', in that case only the last frame 246 * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
233 * should come with 'TSI bit being set'. If 'TSI bit is not set and if 247 * write data frame is received successfully where payload is
234 * data frame appears here, means error condition. In both the cases 248 * posted directly to user buffer and only the last frame's
235 * release the DDP context (ddp_put) and in error case, as well 249 * header is posted in receive queue.
236 * initiate error recovery mechanism. 250 *
251 * If "Sequence Initiative (TSI)" bit is not set, means error
252 * condition w.r.t. DDP, hence drop the packet and let explict
253 * ABORTS from other end of exchange timer trigger the recovery.
237 */ 254 */
238 ep = fc_seq_exch(seq); 255 if (f_ctl & FC_FC_SEQ_INIT)
239 if (cmd->was_ddp_setup) { 256 goto last_frame;
240 BUG_ON(!ep); 257 else
241 lport = ep->lp; 258 goto drop;
242 BUG_ON(!lport);
243 }
244 if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
245 f_ctl = ntoh24(fh->fh_f_ctl);
246 /*
247 * If TSI bit set in f_ctl, means last write data frame is
248 * received successfully where payload is posted directly
249 * to user buffer and only the last frame's header is posted
250 * in legacy receive queue
251 */
252 if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
253 cmd->write_data_len = lport->tt.ddp_done(lport,
254 ep->xid);
255 goto last_frame;
256 } else {
257 /*
258 * Updating the write_data_len may be meaningless at
259 * this point, but just in case if required in future
260 * for debugging or any other purpose
261 */
262 pr_err("%s: Received frame with TSI bit not"
263 " being SET, dropping the frame, "
264 "cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
265 __func__, cmd->sg, cmd->sg_cnt);
266 cmd->write_data_len = lport->tt.ddp_done(lport,
267 ep->xid);
268 lport->tt.seq_exch_abort(cmd->seq, 0);
269 goto drop;
270 }
271 }
272 259
273 rel_off = ntohl(fh->fh_parm_offset); 260 rel_off = ntohl(fh->fh_parm_offset);
274 frame_len = fr_len(fp); 261 frame_len = fr_len(fp);
@@ -331,3 +318,39 @@ last_frame:
331drop: 318drop:
332 fc_frame_free(fp); 319 fc_frame_free(fp);
333} 320}
321
322/*
323 * Handle and cleanup any HW specific resources if
324 * received ABORTS, errors, timeouts.
325 */
326void ft_invl_hw_context(struct ft_cmd *cmd)
327{
328 struct fc_seq *seq = cmd->seq;
329 struct fc_exch *ep = NULL;
330 struct fc_lport *lport = NULL;
331
332 BUG_ON(!cmd);
333
334 /* Cleanup the DDP context in HW if DDP was setup */
335 if (cmd->was_ddp_setup && seq) {
336 ep = fc_seq_exch(seq);
337 if (ep) {
338 lport = ep->lp;
339 if (lport && (ep->xid <= lport->lro_xid))
340 /*
341 * "ddp_done" trigger invalidation of HW
342 * specific DDP context
343 */
344 cmd->write_data_len = lport->tt.ddp_done(lport,
345 ep->xid);
346
347 /*
348 * Resetting same variable to indicate HW's
349 * DDP context has been invalidated to avoid
350 * re_invalidation of same context (context is
351 * identified using ep->xid)
352 */
353 cmd->was_ddp_setup = 0;
354 }
355 }
356}
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index bf7c687519e..f7f71b2d310 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -14,11 +14,7 @@ menuconfig THERMAL
14 If you want this support, you should say Y or M here. 14 If you want this support, you should say Y or M here.
15 15
16config THERMAL_HWMON 16config THERMAL_HWMON
17 bool "Hardware monitoring support" 17 bool
18 depends on THERMAL 18 depends on THERMAL
19 depends on HWMON=y || HWMON=THERMAL 19 depends on HWMON=y || HWMON=THERMAL
20 help 20 default y
21 The generic thermal sysfs driver's hardware monitoring support
22 requires a 2.10.7/3.0.2 or later lm-sensors userspace.
23
24 Say Y if your user-space is new enough.
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 0b1c82ad680..708f8e92771 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev,
420 420
421/* hwmon sys I/F */ 421/* hwmon sys I/F */
422#include <linux/hwmon.h> 422#include <linux/hwmon.h>
423
424/* thermal zone devices with the same type share one hwmon device */
425struct thermal_hwmon_device {
426 char type[THERMAL_NAME_LENGTH];
427 struct device *device;
428 int count;
429 struct list_head tz_list;
430 struct list_head node;
431};
432
433struct thermal_hwmon_attr {
434 struct device_attribute attr;
435 char name[16];
436};
437
438/* one temperature input for each thermal zone */
439struct thermal_hwmon_temp {
440 struct list_head hwmon_node;
441 struct thermal_zone_device *tz;
442 struct thermal_hwmon_attr temp_input; /* hwmon sys attr */
443 struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */
444};
445
423static LIST_HEAD(thermal_hwmon_list); 446static LIST_HEAD(thermal_hwmon_list);
424 447
425static ssize_t 448static ssize_t
@@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
437 int ret; 460 int ret;
438 struct thermal_hwmon_attr *hwmon_attr 461 struct thermal_hwmon_attr *hwmon_attr
439 = container_of(attr, struct thermal_hwmon_attr, attr); 462 = container_of(attr, struct thermal_hwmon_attr, attr);
440 struct thermal_zone_device *tz 463 struct thermal_hwmon_temp *temp
441 = container_of(hwmon_attr, struct thermal_zone_device, 464 = container_of(hwmon_attr, struct thermal_hwmon_temp,
442 temp_input); 465 temp_input);
466 struct thermal_zone_device *tz = temp->tz;
443 467
444 ret = tz->ops->get_temp(tz, &temperature); 468 ret = tz->ops->get_temp(tz, &temperature);
445 469
@@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
455{ 479{
456 struct thermal_hwmon_attr *hwmon_attr 480 struct thermal_hwmon_attr *hwmon_attr
457 = container_of(attr, struct thermal_hwmon_attr, attr); 481 = container_of(attr, struct thermal_hwmon_attr, attr);
458 struct thermal_zone_device *tz 482 struct thermal_hwmon_temp *temp
459 = container_of(hwmon_attr, struct thermal_zone_device, 483 = container_of(hwmon_attr, struct thermal_hwmon_temp,
460 temp_crit); 484 temp_crit);
485 struct thermal_zone_device *tz = temp->tz;
461 long temperature; 486 long temperature;
462 int ret; 487 int ret;
463 488
@@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
469} 494}
470 495
471 496
472static int 497static struct thermal_hwmon_device *
473thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) 498thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
474{ 499{
475 struct thermal_hwmon_device *hwmon; 500 struct thermal_hwmon_device *hwmon;
476 int new_hwmon_device = 1;
477 int result;
478 501
479 mutex_lock(&thermal_list_lock); 502 mutex_lock(&thermal_list_lock);
480 list_for_each_entry(hwmon, &thermal_hwmon_list, node) 503 list_for_each_entry(hwmon, &thermal_hwmon_list, node)
481 if (!strcmp(hwmon->type, tz->type)) { 504 if (!strcmp(hwmon->type, tz->type)) {
482 new_hwmon_device = 0;
483 mutex_unlock(&thermal_list_lock); 505 mutex_unlock(&thermal_list_lock);
484 goto register_sys_interface; 506 return hwmon;
507 }
508 mutex_unlock(&thermal_list_lock);
509
510 return NULL;
511}
512
513/* Find the temperature input matching a given thermal zone */
514static struct thermal_hwmon_temp *
515thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
516 const struct thermal_zone_device *tz)
517{
518 struct thermal_hwmon_temp *temp;
519
520 mutex_lock(&thermal_list_lock);
521 list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
522 if (temp->tz == tz) {
523 mutex_unlock(&thermal_list_lock);
524 return temp;
485 } 525 }
486 mutex_unlock(&thermal_list_lock); 526 mutex_unlock(&thermal_list_lock);
487 527
528 return NULL;
529}
530
531static int
532thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
533{
534 struct thermal_hwmon_device *hwmon;
535 struct thermal_hwmon_temp *temp;
536 int new_hwmon_device = 1;
537 int result;
538
539 hwmon = thermal_hwmon_lookup_by_type(tz);
540 if (hwmon) {
541 new_hwmon_device = 0;
542 goto register_sys_interface;
543 }
544
488 hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL); 545 hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
489 if (!hwmon) 546 if (!hwmon)
490 return -ENOMEM; 547 return -ENOMEM;
@@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
502 goto free_mem; 559 goto free_mem;
503 560
504 register_sys_interface: 561 register_sys_interface:
505 tz->hwmon = hwmon; 562 temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
563 if (!temp) {
564 result = -ENOMEM;
565 goto unregister_name;
566 }
567
568 temp->tz = tz;
506 hwmon->count++; 569 hwmon->count++;
507 570
508 snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH, 571 snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH,
509 "temp%d_input", hwmon->count); 572 "temp%d_input", hwmon->count);
510 tz->temp_input.attr.attr.name = tz->temp_input.name; 573 temp->temp_input.attr.attr.name = temp->temp_input.name;
511 tz->temp_input.attr.attr.mode = 0444; 574 temp->temp_input.attr.attr.mode = 0444;
512 tz->temp_input.attr.show = temp_input_show; 575 temp->temp_input.attr.show = temp_input_show;
513 sysfs_attr_init(&tz->temp_input.attr.attr); 576 sysfs_attr_init(&temp->temp_input.attr.attr);
514 result = device_create_file(hwmon->device, &tz->temp_input.attr); 577 result = device_create_file(hwmon->device, &temp->temp_input.attr);
515 if (result) 578 if (result)
516 goto unregister_name; 579 goto free_temp_mem;
517 580
518 if (tz->ops->get_crit_temp) { 581 if (tz->ops->get_crit_temp) {
519 unsigned long temperature; 582 unsigned long temperature;
520 if (!tz->ops->get_crit_temp(tz, &temperature)) { 583 if (!tz->ops->get_crit_temp(tz, &temperature)) {
521 snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH, 584 snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH,
522 "temp%d_crit", hwmon->count); 585 "temp%d_crit", hwmon->count);
523 tz->temp_crit.attr.attr.name = tz->temp_crit.name; 586 temp->temp_crit.attr.attr.name = temp->temp_crit.name;
524 tz->temp_crit.attr.attr.mode = 0444; 587 temp->temp_crit.attr.attr.mode = 0444;
525 tz->temp_crit.attr.show = temp_crit_show; 588 temp->temp_crit.attr.show = temp_crit_show;
526 sysfs_attr_init(&tz->temp_crit.attr.attr); 589 sysfs_attr_init(&temp->temp_crit.attr.attr);
527 result = device_create_file(hwmon->device, 590 result = device_create_file(hwmon->device,
528 &tz->temp_crit.attr); 591 &temp->temp_crit.attr);
529 if (result) 592 if (result)
530 goto unregister_input; 593 goto unregister_input;
531 } 594 }
@@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
534 mutex_lock(&thermal_list_lock); 597 mutex_lock(&thermal_list_lock);
535 if (new_hwmon_device) 598 if (new_hwmon_device)
536 list_add_tail(&hwmon->node, &thermal_hwmon_list); 599 list_add_tail(&hwmon->node, &thermal_hwmon_list);
537 list_add_tail(&tz->hwmon_node, &hwmon->tz_list); 600 list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
538 mutex_unlock(&thermal_list_lock); 601 mutex_unlock(&thermal_list_lock);
539 602
540 return 0; 603 return 0;
541 604
542 unregister_input: 605 unregister_input:
543 device_remove_file(hwmon->device, &tz->temp_input.attr); 606 device_remove_file(hwmon->device, &temp->temp_input.attr);
607 free_temp_mem:
608 kfree(temp);
544 unregister_name: 609 unregister_name:
545 if (new_hwmon_device) { 610 if (new_hwmon_device) {
546 device_remove_file(hwmon->device, &dev_attr_name); 611 device_remove_file(hwmon->device, &dev_attr_name);
@@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
556static void 621static void
557thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) 622thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
558{ 623{
559 struct thermal_hwmon_device *hwmon = tz->hwmon; 624 struct thermal_hwmon_device *hwmon;
625 struct thermal_hwmon_temp *temp;
626
627 hwmon = thermal_hwmon_lookup_by_type(tz);
628 if (unlikely(!hwmon)) {
629 /* Should never happen... */
630 dev_dbg(&tz->device, "hwmon device lookup failed!\n");
631 return;
632 }
633
634 temp = thermal_hwmon_lookup_temp(hwmon, tz);
635 if (unlikely(!temp)) {
636 /* Should never happen... */
637 dev_dbg(&tz->device, "temperature input lookup failed!\n");
638 return;
639 }
560 640
561 tz->hwmon = NULL; 641 device_remove_file(hwmon->device, &temp->temp_input.attr);
562 device_remove_file(hwmon->device, &tz->temp_input.attr);
563 if (tz->ops->get_crit_temp) 642 if (tz->ops->get_crit_temp)
564 device_remove_file(hwmon->device, &tz->temp_crit.attr); 643 device_remove_file(hwmon->device, &temp->temp_crit.attr);
565 644
566 mutex_lock(&thermal_list_lock); 645 mutex_lock(&thermal_list_lock);
567 list_del(&tz->hwmon_node); 646 list_del(&temp->hwmon_node);
647 kfree(temp);
568 if (!list_empty(&hwmon->tz_list)) { 648 if (!list_empty(&hwmon->tz_list)) {
569 mutex_unlock(&thermal_list_lock); 649 mutex_unlock(&thermal_list_lock);
570 return; 650 return;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 827db765459..7e91b3d368c 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -1286,22 +1286,17 @@ static int serial_imx_resume(struct platform_device *dev)
1286static int serial_imx_probe_dt(struct imx_port *sport, 1286static int serial_imx_probe_dt(struct imx_port *sport,
1287 struct platform_device *pdev) 1287 struct platform_device *pdev)
1288{ 1288{
1289 static int portnum = 0;
1289 struct device_node *np = pdev->dev.of_node; 1290 struct device_node *np = pdev->dev.of_node;
1290 const struct of_device_id *of_id = 1291 const struct of_device_id *of_id =
1291 of_match_device(imx_uart_dt_ids, &pdev->dev); 1292 of_match_device(imx_uart_dt_ids, &pdev->dev);
1292 int ret;
1293 1293
1294 if (!np) 1294 if (!np)
1295 return -ENODEV; 1295 return -ENODEV;
1296 1296
1297 ret = of_alias_get_id(np, "serial"); 1297 sport->port.line = portnum++;
1298 if (ret < 0) { 1298 if (sport->port.line >= UART_NR)
1299 pr_err("%s: failed to get alias id, errno %d\n", 1299 return -EINVAL;
1300 __func__, ret);
1301 return -ENODEV;
1302 } else {
1303 sport->port.line = ret;
1304 }
1305 1300
1306 if (of_get_property(np, "fsl,uart-has-rtscts", NULL)) 1301 if (of_get_property(np, "fsl,uart-has-rtscts", NULL))
1307 sport->have_rtscts = 1; 1302 sport->have_rtscts = 1;
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index d0a56235c50..2ec57b2fb27 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1889,7 +1889,7 @@ static int __devinit sci_init_single(struct platform_device *dev,
1889 1889
1890 if (p->regtype == SCIx_PROBE_REGTYPE) { 1890 if (p->regtype == SCIx_PROBE_REGTYPE) {
1891 ret = sci_probe_regmap(p); 1891 ret = sci_probe_regmap(p);
1892 if (unlikely(!ret)) 1892 if (unlikely(ret))
1893 return ret; 1893 return ret;
1894 } 1894 }
1895 1895
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 69407e72aac..278aeaa9250 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633
336 enable its driver. 336 enable its driver.
337 337
338config BACKLIGHT_AAT2870 338config BACKLIGHT_AAT2870
339 bool "AnalogicTech AAT2870 Backlight" 339 tristate "AnalogicTech AAT2870 Backlight"
340 depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE 340 depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE
341 help 341 help
342 If you have a AnalogicTech AAT2870 say Y to enable the 342 If you have a AnalogicTech AAT2870 say Y to enable the
diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c
index 4952a617563..331f1ef1dad 100644
--- a/drivers/video/backlight/aat2870_bl.c
+++ b/drivers/video/backlight/aat2870_bl.c
@@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl,
44 struct backlight_device *bd = aat2870_bl->bd; 44 struct backlight_device *bd = aat2870_bl->bd;
45 int val; 45 int val;
46 46
47 val = brightness * aat2870_bl->max_current; 47 val = brightness * (aat2870_bl->max_current - 1);
48 val /= bd->props.max_brightness; 48 val /= bd->props.max_brightness;
49 49
50 return val; 50 return val;
@@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev)
158 props.type = BACKLIGHT_RAW; 158 props.type = BACKLIGHT_RAW;
159 bd = backlight_device_register("aat2870-backlight", &pdev->dev, 159 bd = backlight_device_register("aat2870-backlight", &pdev->dev,
160 aat2870_bl, &aat2870_bl_ops, &props); 160 aat2870_bl, &aat2870_bl_ops, &props);
161 if (!bd) { 161 if (IS_ERR(bd)) {
162 dev_err(&pdev->dev, 162 dev_err(&pdev->dev,
163 "Failed allocate memory for backlight device\n"); 163 "Failed allocate memory for backlight device\n");
164 ret = -ENOMEM; 164 ret = PTR_ERR(bd);
165 goto out_kfree; 165 goto out_kfree;
166 } 166 }
167 167
@@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev)
175 else 175 else
176 aat2870_bl->channels = AAT2870_BL_CH_ALL; 176 aat2870_bl->channels = AAT2870_BL_CH_ALL;
177 177
178 if (pdata->max_brightness > 0) 178 if (pdata->max_current > 0)
179 aat2870_bl->max_current = pdata->max_current; 179 aat2870_bl->max_current = pdata->max_current;
180 else 180 else
181 aat2870_bl->max_current = AAT2870_CURRENT_27_9; 181 aat2870_bl->max_current = AAT2870_CURRENT_27_9;
diff --git a/drivers/video/savage/savagefb.h b/drivers/video/savage/savagefb.h
index 32549d177b1..dcaab9012ca 100644
--- a/drivers/video/savage/savagefb.h
+++ b/drivers/video/savage/savagefb.h
@@ -55,7 +55,7 @@
55 55
56#define S3_SAVAGE3D_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX)) 56#define S3_SAVAGE3D_SERIES(chip) ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
57 57
58#define S3_SAVAGE4_SERIES(chip) ((chip>=S3_SAVAGE4) || (chip<=S3_PROSAVAGEDDR)) 58#define S3_SAVAGE4_SERIES(chip) ((chip>=S3_SAVAGE4) && (chip<=S3_PROSAVAGEDDR))
59 59
60#define S3_SAVAGE_MOBILE_SERIES(chip) ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE)) 60#define S3_SAVAGE_MOBILE_SERIES(chip) ((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE))
61 61
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index f441726ddf2..86b0735e6aa 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -36,9 +36,6 @@ config WATCHDOG_CORE
36 and gives them the /dev/watchdog interface (and later also the 36 and gives them the /dev/watchdog interface (and later also the
37 sysfs interface). 37 sysfs interface).
38 38
39 To compile this driver as a module, choose M here: the module will
40 be called watchdog.
41
42config WATCHDOG_NOWAYOUT 39config WATCHDOG_NOWAYOUT
43 bool "Disable watchdog shutdown on close" 40 bool "Disable watchdog shutdown on close"
44 help 41 help
diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c
index afa78a54711..809f41c30c4 100644
--- a/drivers/watchdog/nv_tco.c
+++ b/drivers/watchdog/nv_tco.c
@@ -458,7 +458,15 @@ static int __devexit nv_tco_remove(struct platform_device *dev)
458 458
459static void nv_tco_shutdown(struct platform_device *dev) 459static void nv_tco_shutdown(struct platform_device *dev)
460{ 460{
461 u32 val;
462
461 tco_timer_stop(); 463 tco_timer_stop();
464
465 /* Some BIOSes fail the POST (once) if the NO_REBOOT flag is not
466 * unset during shutdown. */
467 pci_read_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, &val);
468 val &= ~MCP51_SMBUS_SETUP_B_TCO_REBOOT;
469 pci_write_config_dword(tco_pci, MCP51_SMBUS_SETUP_B, val);
462} 470}
463 471
464static struct platform_driver nv_tco_driver = { 472static struct platform_driver nv_tco_driver = {
diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c
index db84f2322d1..a267dc078da 100644
--- a/drivers/watchdog/shwdt.c
+++ b/drivers/watchdog/shwdt.c
@@ -64,7 +64,7 @@
64 * misses its deadline, the kernel timer will allow the WDT to overflow. 64 * misses its deadline, the kernel timer will allow the WDT to overflow.
65 */ 65 */
66static int clock_division_ratio = WTCSR_CKS_4096; 66static int clock_division_ratio = WTCSR_CKS_4096;
67#define next_ping_period(cks) msecs_to_jiffies(cks - 4) 67#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4))
68 68
69static const struct watchdog_info sh_wdt_info; 69static const struct watchdog_info sh_wdt_info;
70static struct platform_device *sh_wdt_dev; 70static struct platform_device *sh_wdt_dev;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index f815283667a..5f7ff8e2fc1 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -11,7 +11,7 @@ config XEN_BALLOON
11 11
12config XEN_SELFBALLOONING 12config XEN_SELFBALLOONING
13 bool "Dynamically self-balloon kernel memory to target" 13 bool "Dynamically self-balloon kernel memory to target"
14 depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP 14 depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP && XEN_TMEM
15 default n 15 default n
16 help 16 help
17 Self-ballooning dynamically balloons available kernel memory driven 17 Self-ballooning dynamically balloons available kernel memory driven
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index e9cb57f0754..9a1d4263075 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -182,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry,
182 return 0; 182 return 0;
183} 183}
184 184
185int v9fs_acl_mode(struct inode *dir, mode_t *modep, 185int v9fs_acl_mode(struct inode *dir, umode_t *modep,
186 struct posix_acl **dpacl, struct posix_acl **pacl) 186 struct posix_acl **dpacl, struct posix_acl **pacl)
187{ 187{
188 int retval = 0; 188 int retval = 0;
189 mode_t mode = *modep; 189 umode_t mode = *modep;
190 struct posix_acl *acl = NULL; 190 struct posix_acl *acl = NULL;
191 191
192 if (!S_ISLNK(mode)) { 192 if (!S_ISLNK(mode)) {
@@ -319,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
319 case ACL_TYPE_ACCESS: 319 case ACL_TYPE_ACCESS:
320 name = POSIX_ACL_XATTR_ACCESS; 320 name = POSIX_ACL_XATTR_ACCESS;
321 if (acl) { 321 if (acl) {
322 mode_t mode = inode->i_mode; 322 umode_t mode = inode->i_mode;
323 retval = posix_acl_equiv_mode(acl, &mode); 323 retval = posix_acl_equiv_mode(acl, &mode);
324 if (retval < 0) 324 if (retval < 0)
325 goto err_out; 325 goto err_out;
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index ddb7ae19d97..55955641196 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -20,7 +20,7 @@ extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl **, struct posix_acl **); 22 struct posix_acl **, struct posix_acl **);
23extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, 23extern int v9fs_acl_mode(struct inode *dir, umode_t *modep,
24 struct posix_acl **dpacl, struct posix_acl **pacl); 24 struct posix_acl **dpacl, struct posix_acl **pacl);
25#else 25#else
26#define v9fs_iop_get_acl NULL 26#define v9fs_iop_get_acl NULL
@@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry,
38{ 38{
39 return 0; 39 return 0;
40} 40}
41static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep, 41static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep,
42 struct posix_acl **dpacl, 42 struct posix_acl **dpacl,
43 struct posix_acl **pacl) 43 struct posix_acl **pacl)
44{ 44{
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 9a26dce5a99..b6c8ed20519 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -206,7 +206,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
206 int err = 0; 206 int err = 0;
207 gid_t gid; 207 gid_t gid;
208 int flags; 208 int flags;
209 mode_t mode; 209 umode_t mode;
210 char *name = NULL; 210 char *name = NULL;
211 struct file *filp; 211 struct file *filp;
212 struct p9_qid qid; 212 struct p9_qid qid;
@@ -348,7 +348,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
348 struct p9_fid *fid = NULL, *dfid = NULL; 348 struct p9_fid *fid = NULL, *dfid = NULL;
349 gid_t gid; 349 gid_t gid;
350 char *name; 350 char *name;
351 mode_t mode; 351 umode_t mode;
352 struct inode *inode; 352 struct inode *inode;
353 struct p9_qid qid; 353 struct p9_qid qid;
354 struct dentry *dir_dentry; 354 struct dentry *dir_dentry;
@@ -751,7 +751,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
751 int err; 751 int err;
752 gid_t gid; 752 gid_t gid;
753 char *name; 753 char *name;
754 mode_t mode; 754 umode_t mode;
755 struct v9fs_session_info *v9ses; 755 struct v9fs_session_info *v9ses;
756 struct p9_fid *fid = NULL, *dfid = NULL; 756 struct p9_fid *fid = NULL, *dfid = NULL;
757 struct inode *inode; 757 struct inode *inode;
diff --git a/fs/Kconfig b/fs/Kconfig
index 19891aab9c6..9fe0b349f4c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
127 select TMPFS_XATTR 127 select TMPFS_XATTR
128 select GENERIC_ACL 128 select GENERIC_ACL
129 help 129 help
130 POSIX Access Control Lists (ACLs) support permissions for users and 130 POSIX Access Control Lists (ACLs) support additional access rights
131 groups beyond the owner/group/world scheme. 131 for users and groups beyond the standard owner/group/world scheme,
132 and this option selects support for ACLs specifically for tmpfs
133 filesystems.
134
135 If you've selected TMPFS, it's possible that you'll also need
136 this option as there are a number of Linux distros that require
137 POSIX ACL support under /dev for certain features to work properly.
138 For example, some distros need this feature for ALSA-related /dev
139 files for sound to work properly. In short, if you're not sure,
140 say Y.
132 141
133 To learn more about Access Control Lists, visit the POSIX ACLs for 142 To learn more about Access Control Lists, visit the POSIX ACLs for
134 Linux website <http://acl.bestbits.at/>. 143 Linux website <http://acl.bestbits.at/>.
135 144
136 If you don't know what Access Control Lists are, say N.
137
138config TMPFS_XATTR 145config TMPFS_XATTR
139 bool "Tmpfs extended attributes" 146 bool "Tmpfs extended attributes"
140 depends on TMPFS 147 depends on TMPFS
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f55aad4d161..ff77262e887 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
387 struct inode *bd_inode = filp->f_mapping->host; 387 struct inode *bd_inode = filp->f_mapping->host;
388 struct block_device *bdev = I_BDEV(bd_inode); 388 struct block_device *bdev = I_BDEV(bd_inode);
389 int error; 389 int error;
390
391 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
392 if (error)
393 return error;
390 394
391 /* 395 /*
392 * There is no need to serialise calls to blkdev_issue_flush with 396 * There is no need to serialise calls to blkdev_issue_flush with
@@ -552,6 +556,7 @@ struct block_device *bdget(dev_t dev)
552 556
553 if (inode->i_state & I_NEW) { 557 if (inode->i_state & I_NEW) {
554 bdev->bd_contains = NULL; 558 bdev->bd_contains = NULL;
559 bdev->bd_super = NULL;
555 bdev->bd_inode = inode; 560 bdev->bd_inode = inode;
556 bdev->bd_block_size = (1 << inode->i_blkbits); 561 bdev->bd_block_size = (1 << inode->i_blkbits);
557 bdev->bd_part_count = 0; 562 bdev->bd_part_count = 0;
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b72dcf1cd2..40e6ac08c21 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
11
12btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 65a735d8f6e..eb159aaa5a1 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -28,8 +28,6 @@
28#include "btrfs_inode.h" 28#include "btrfs_inode.h"
29#include "xattr.h" 29#include "xattr.h"
30 30
31#ifdef CONFIG_BTRFS_FS_POSIX_ACL
32
33struct posix_acl *btrfs_get_acl(struct inode *inode, int type) 31struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
34{ 32{
35 int size; 33 int size;
@@ -111,7 +109,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
111 int ret, size = 0; 109 int ret, size = 0;
112 const char *name; 110 const char *name;
113 char *value = NULL; 111 char *value = NULL;
114 mode_t mode;
115 112
116 if (acl) { 113 if (acl) {
117 ret = posix_acl_valid(acl); 114 ret = posix_acl_valid(acl);
@@ -122,13 +119,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
122 119
123 switch (type) { 120 switch (type) {
124 case ACL_TYPE_ACCESS: 121 case ACL_TYPE_ACCESS:
125 mode = inode->i_mode;
126 name = POSIX_ACL_XATTR_ACCESS; 122 name = POSIX_ACL_XATTR_ACCESS;
127 if (acl) { 123 if (acl) {
128 ret = posix_acl_equiv_mode(acl, &mode); 124 ret = posix_acl_equiv_mode(acl, &inode->i_mode);
129 if (ret < 0) 125 if (ret < 0)
130 return ret; 126 return ret;
131 inode->i_mode = mode;
132 } 127 }
133 ret = 0; 128 ret = 0;
134 break; 129 break;
@@ -222,19 +217,16 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
222 } 217 }
223 218
224 if (IS_POSIXACL(dir) && acl) { 219 if (IS_POSIXACL(dir) && acl) {
225 mode_t mode = inode->i_mode;
226
227 if (S_ISDIR(inode->i_mode)) { 220 if (S_ISDIR(inode->i_mode)) {
228 ret = btrfs_set_acl(trans, inode, acl, 221 ret = btrfs_set_acl(trans, inode, acl,
229 ACL_TYPE_DEFAULT); 222 ACL_TYPE_DEFAULT);
230 if (ret) 223 if (ret)
231 goto failed; 224 goto failed;
232 } 225 }
233 ret = posix_acl_create(&acl, GFP_NOFS, &mode); 226 ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
234 if (ret < 0) 227 if (ret < 0)
235 return ret; 228 return ret;
236 229
237 inode->i_mode = mode;
238 if (ret > 0) { 230 if (ret > 0) {
239 /* we need an acl */ 231 /* we need an acl */
240 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); 232 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
@@ -282,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = {
282 .get = btrfs_xattr_acl_get, 274 .get = btrfs_xattr_acl_get,
283 .set = btrfs_xattr_acl_set, 275 .set = btrfs_xattr_acl_set,
284}; 276};
285
286#else /* CONFIG_BTRFS_FS_POSIX_ACL */
287
288int btrfs_acl_chmod(struct inode *inode)
289{
290 return 0;
291}
292
293int btrfs_init_acl(struct btrfs_trans_handle *trans,
294 struct inode *inode, struct inode *dir)
295{
296 return 0;
297}
298
299#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bfe42b03eaf..8ec5d86f173 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
338 u64 first_byte = disk_start; 338 u64 first_byte = disk_start;
339 struct block_device *bdev; 339 struct block_device *bdev;
340 int ret; 340 int ret;
341 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
341 342
342 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); 343 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
343 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 344 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
392 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 393 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
393 BUG_ON(ret); 394 BUG_ON(ret);
394 395
395 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 396 if (!skip_sum) {
396 BUG_ON(ret); 397 ret = btrfs_csum_one_bio(root, inode, bio,
398 start, 1);
399 BUG_ON(ret);
400 }
397 401
398 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 402 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
399 BUG_ON(ret); 403 BUG_ON(ret);
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
418 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 422 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
419 BUG_ON(ret); 423 BUG_ON(ret);
420 424
421 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 425 if (!skip_sum) {
422 BUG_ON(ret); 426 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
427 BUG_ON(ret);
428 }
423 429
424 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
425 BUG_ON(ret); 431 BUG_ON(ret);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 365c4e1dde0..0469263e327 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2406,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2406 btrfs_root_item *item, struct btrfs_key *key); 2406 btrfs_root_item *item, struct btrfs_key *key);
2407int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2407int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2408int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2408int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2409int btrfs_set_root_node(struct btrfs_root_item *item, 2409void btrfs_set_root_node(struct btrfs_root_item *item,
2410 struct extent_buffer *node); 2410 struct extent_buffer *node);
2411void btrfs_check_and_init_root_item(struct btrfs_root_item *item); 2411void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2412 2412
2413/* dir-item.c */ 2413/* dir-item.c */
@@ -2523,6 +2523,14 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
2523#define PageChecked PageFsMisc 2523#define PageChecked PageFsMisc
2524#endif 2524#endif
2525 2525
2526/* This forces readahead on a given range of bytes in an inode */
2527static inline void btrfs_force_ra(struct address_space *mapping,
2528 struct file_ra_state *ra, struct file *file,
2529 pgoff_t offset, unsigned long req_size)
2530{
2531 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2532}
2533
2526struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); 2534struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
2527int btrfs_set_inode_index(struct inode *dir, u64 *index); 2535int btrfs_set_inode_index(struct inode *dir, u64 *index);
2528int btrfs_unlink_inode(struct btrfs_trans_handle *trans, 2536int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -2551,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2551int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2559int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2552 size_t size, struct bio *bio, unsigned long bio_flags); 2560 size_t size, struct bio *bio, unsigned long bio_flags);
2553 2561
2554unsigned long btrfs_force_ra(struct address_space *mapping,
2555 struct file_ra_state *ra, struct file *file,
2556 pgoff_t offset, pgoff_t last_index);
2557int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2562int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2558int btrfs_readpage(struct file *file, struct page *page); 2563int btrfs_readpage(struct file *file, struct page *page);
2559void btrfs_evict_inode(struct inode *inode); 2564void btrfs_evict_inode(struct inode *inode);
@@ -2648,12 +2653,21 @@ do { \
2648/* acl.c */ 2653/* acl.c */
2649#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2654#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2650struct posix_acl *btrfs_get_acl(struct inode *inode, int type); 2655struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
2651#else
2652#define btrfs_get_acl NULL
2653#endif
2654int btrfs_init_acl(struct btrfs_trans_handle *trans, 2656int btrfs_init_acl(struct btrfs_trans_handle *trans,
2655 struct inode *inode, struct inode *dir); 2657 struct inode *inode, struct inode *dir);
2656int btrfs_acl_chmod(struct inode *inode); 2658int btrfs_acl_chmod(struct inode *inode);
2659#else
2660#define btrfs_get_acl NULL
2661static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
2662 struct inode *inode, struct inode *dir)
2663{
2664 return 0;
2665}
2666static inline int btrfs_acl_chmod(struct inode *inode)
2667{
2668 return 0;
2669}
2670#endif
2657 2671
2658/* relocation.c */ 2672/* relocation.c */
2659int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 2673int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c360a848d97..31d84e78129 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -198,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
198 struct btrfs_key key; 198 struct btrfs_key key;
199 int ins_len = mod < 0 ? -1 : 0; 199 int ins_len = mod < 0 ? -1 : 0;
200 int cow = mod != 0; 200 int cow = mod != 0;
201 struct btrfs_key found_key;
202 struct extent_buffer *leaf;
203 201
204 key.objectid = dir; 202 key.objectid = dir;
205 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 203 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
@@ -209,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
209 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); 207 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
210 if (ret < 0) 208 if (ret < 0)
211 return ERR_PTR(ret); 209 return ERR_PTR(ret);
212 if (ret > 0) { 210 if (ret > 0)
213 if (path->slots[0] == 0)
214 return NULL;
215 path->slots[0]--;
216 }
217
218 leaf = path->nodes[0];
219 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
220
221 if (found_key.objectid != dir ||
222 btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
223 found_key.offset != key.offset)
224 return NULL; 211 return NULL;
225 212
226 return btrfs_match_dir_item_name(root, path, name, name_len); 213 return btrfs_match_dir_item_name(root, path, name, name_len);
@@ -315,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
315 struct btrfs_key key; 302 struct btrfs_key key;
316 int ins_len = mod < 0 ? -1 : 0; 303 int ins_len = mod < 0 ? -1 : 0;
317 int cow = mod != 0; 304 int cow = mod != 0;
318 struct btrfs_key found_key;
319 struct extent_buffer *leaf;
320 305
321 key.objectid = dir; 306 key.objectid = dir;
322 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 307 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
@@ -324,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
324 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); 309 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
325 if (ret < 0) 310 if (ret < 0)
326 return ERR_PTR(ret); 311 return ERR_PTR(ret);
327 if (ret > 0) { 312 if (ret > 0)
328 if (path->slots[0] == 0)
329 return NULL;
330 path->slots[0]--;
331 }
332
333 leaf = path->nodes[0];
334 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
335
336 if (found_key.objectid != dir ||
337 btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
338 found_key.offset != key.offset)
339 return NULL; 313 return NULL;
340 314
341 return btrfs_match_dir_item_name(root, path, name, name_len); 315 return btrfs_match_dir_item_name(root, path, name, name_len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4d08ed79405..66bac226944 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -663,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
663 struct btrfs_path *path; 663 struct btrfs_path *path;
664 664
665 path = btrfs_alloc_path(); 665 path = btrfs_alloc_path();
666 BUG_ON(!path); 666 if (!path)
667 return -ENOMEM;
668
667 key.objectid = start; 669 key.objectid = start;
668 key.offset = len; 670 key.offset = len;
669 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 671 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -3272,6 +3274,9 @@ again:
3272 } 3274 }
3273 3275
3274 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3276 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3277 if (ret < 0 && ret != -ENOSPC)
3278 goto out;
3279
3275 spin_lock(&space_info->lock); 3280 spin_lock(&space_info->lock);
3276 if (ret) 3281 if (ret)
3277 space_info->full = 1; 3282 space_info->full = 1;
@@ -3281,6 +3286,7 @@ again:
3281 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3286 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3282 space_info->chunk_alloc = 0; 3287 space_info->chunk_alloc = 0;
3283 spin_unlock(&space_info->lock); 3288 spin_unlock(&space_info->lock);
3289out:
3284 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3290 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3285 return ret; 3291 return ret;
3286} 3292}
@@ -4456,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4456 printk(KERN_ERR "umm, got %d back from search" 4462 printk(KERN_ERR "umm, got %d back from search"
4457 ", was looking for %llu\n", ret, 4463 ", was looking for %llu\n", ret,
4458 (unsigned long long)bytenr); 4464 (unsigned long long)bytenr);
4459 btrfs_print_leaf(extent_root, path->nodes[0]); 4465 if (ret > 0)
4466 btrfs_print_leaf(extent_root,
4467 path->nodes[0]);
4460 } 4468 }
4461 BUG_ON(ret); 4469 BUG_ON(ret);
4462 extent_slot = path->slots[0]; 4470 extent_slot = path->slots[0];
@@ -5073,7 +5081,9 @@ have_block_group:
5073 * group is does point to and try again 5081 * group is does point to and try again
5074 */ 5082 */
5075 if (!last_ptr_loop && last_ptr->block_group && 5083 if (!last_ptr_loop && last_ptr->block_group &&
5076 last_ptr->block_group != block_group) { 5084 last_ptr->block_group != block_group &&
5085 index <=
5086 get_block_group_index(last_ptr->block_group)) {
5077 5087
5078 btrfs_put_block_group(block_group); 5088 btrfs_put_block_group(block_group);
5079 block_group = last_ptr->block_group; 5089 block_group = last_ptr->block_group;
@@ -5501,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
5501 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5511 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
5502 5512
5503 path = btrfs_alloc_path(); 5513 path = btrfs_alloc_path();
5504 BUG_ON(!path); 5514 if (!path)
5515 return -ENOMEM;
5505 5516
5506 path->leave_spinning = 1; 5517 path->leave_spinning = 1;
5507 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5518 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -6272,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6272 int level; 6283 int level;
6273 6284
6274 path = btrfs_alloc_path(); 6285 path = btrfs_alloc_path();
6275 BUG_ON(!path); 6286 if (!path)
6287 return -ENOMEM;
6276 6288
6277 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6289 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6278 BUG_ON(!wc); 6290 if (!wc) {
6291 btrfs_free_path(path);
6292 return -ENOMEM;
6293 }
6279 6294
6280 trans = btrfs_start_transaction(tree_root, 0); 6295 trans = btrfs_start_transaction(tree_root, 0);
6281 BUG_ON(IS_ERR(trans)); 6296 BUG_ON(IS_ERR(trans));
@@ -6538,8 +6553,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6538 u64 min_allocable_bytes; 6553 u64 min_allocable_bytes;
6539 int ret = -ENOSPC; 6554 int ret = -ENOSPC;
6540 6555
6541 if (cache->ro)
6542 return 0;
6543 6556
6544 /* 6557 /*
6545 * We need some metadata space and system metadata space for 6558 * We need some metadata space and system metadata space for
@@ -6555,6 +6568,12 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6555 6568
6556 spin_lock(&sinfo->lock); 6569 spin_lock(&sinfo->lock);
6557 spin_lock(&cache->lock); 6570 spin_lock(&cache->lock);
6571
6572 if (cache->ro) {
6573 ret = 0;
6574 goto out;
6575 }
6576
6558 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6577 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
6559 cache->bytes_super - btrfs_block_group_used(&cache->item); 6578 cache->bytes_super - btrfs_block_group_used(&cache->item);
6560 6579
@@ -6568,7 +6587,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6568 cache->ro = 1; 6587 cache->ro = 1;
6569 ret = 0; 6588 ret = 0;
6570 } 6589 }
6571 6590out:
6572 spin_unlock(&cache->lock); 6591 spin_unlock(&cache->lock);
6573 spin_unlock(&sinfo->lock); 6592 spin_unlock(&sinfo->lock);
6574 return ret; 6593 return ret;
@@ -7183,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7183 spin_unlock(&cluster->refill_lock); 7202 spin_unlock(&cluster->refill_lock);
7184 7203
7185 path = btrfs_alloc_path(); 7204 path = btrfs_alloc_path();
7186 BUG_ON(!path); 7205 if (!path) {
7206 ret = -ENOMEM;
7207 goto out;
7208 }
7187 7209
7188 inode = lookup_free_space_inode(root, block_group, path); 7210 inode = lookup_free_space_inode(root, block_group, path);
7189 if (!IS_ERR(inode)) { 7211 if (!IS_ERR(inode)) {
7190 btrfs_orphan_add(trans, inode); 7212 ret = btrfs_orphan_add(trans, inode);
7213 BUG_ON(ret);
7191 clear_nlink(inode); 7214 clear_nlink(inode);
7192 /* One for the block groups ref */ 7215 /* One for the block groups ref */
7193 spin_lock(&block_group->lock); 7216 spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 067b1747421..d418164a35f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
254 * 254 *
255 * This should be called with the tree lock held. 255 * This should be called with the tree lock held.
256 */ 256 */
257static int merge_state(struct extent_io_tree *tree, 257static void merge_state(struct extent_io_tree *tree,
258 struct extent_state *state) 258 struct extent_state *state)
259{ 259{
260 struct extent_state *other; 260 struct extent_state *other;
261 struct rb_node *other_node; 261 struct rb_node *other_node;
262 262
263 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 263 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
264 return 0; 264 return;
265 265
266 other_node = rb_prev(&state->rb_node); 266 other_node = rb_prev(&state->rb_node);
267 if (other_node) { 267 if (other_node) {
@@ -287,19 +287,13 @@ static int merge_state(struct extent_io_tree *tree,
287 free_extent_state(other); 287 free_extent_state(other);
288 } 288 }
289 } 289 }
290
291 return 0;
292} 290}
293 291
294static int set_state_cb(struct extent_io_tree *tree, 292static void set_state_cb(struct extent_io_tree *tree,
295 struct extent_state *state, int *bits) 293 struct extent_state *state, int *bits)
296{ 294{
297 if (tree->ops && tree->ops->set_bit_hook) { 295 if (tree->ops && tree->ops->set_bit_hook)
298 return tree->ops->set_bit_hook(tree->mapping->host, 296 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
299 state, bits);
300 }
301
302 return 0;
303} 297}
304 298
305static void clear_state_cb(struct extent_io_tree *tree, 299static void clear_state_cb(struct extent_io_tree *tree,
@@ -309,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree,
309 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 303 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
310} 304}
311 305
306static void set_state_bits(struct extent_io_tree *tree,
307 struct extent_state *state, int *bits);
308
312/* 309/*
313 * insert an extent_state struct into the tree. 'bits' are set on the 310 * insert an extent_state struct into the tree. 'bits' are set on the
314 * struct before it is inserted. 311 * struct before it is inserted.
@@ -324,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree,
324 int *bits) 321 int *bits)
325{ 322{
326 struct rb_node *node; 323 struct rb_node *node;
327 int bits_to_set = *bits & ~EXTENT_CTLBITS;
328 int ret;
329 324
330 if (end < start) { 325 if (end < start) {
331 printk(KERN_ERR "btrfs end < start %llu %llu\n", 326 printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -335,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree,
335 } 330 }
336 state->start = start; 331 state->start = start;
337 state->end = end; 332 state->end = end;
338 ret = set_state_cb(tree, state, bits);
339 if (ret)
340 return ret;
341 333
342 if (bits_to_set & EXTENT_DIRTY) 334 set_state_bits(tree, state, bits);
343 tree->dirty_bytes += end - start + 1; 335
344 state->state |= bits_to_set;
345 node = tree_insert(&tree->state, end, &state->rb_node); 336 node = tree_insert(&tree->state, end, &state->rb_node);
346 if (node) { 337 if (node) {
347 struct extent_state *found; 338 struct extent_state *found;
@@ -357,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree,
357 return 0; 348 return 0;
358} 349}
359 350
360static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, 351static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
361 u64 split) 352 u64 split)
362{ 353{
363 if (tree->ops && tree->ops->split_extent_hook) 354 if (tree->ops && tree->ops->split_extent_hook)
364 return tree->ops->split_extent_hook(tree->mapping->host, 355 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
365 orig, split);
366 return 0;
367} 356}
368 357
369/* 358/*
@@ -659,34 +648,25 @@ again:
659 if (start > end) 648 if (start > end)
660 break; 649 break;
661 650
662 if (need_resched()) { 651 cond_resched_lock(&tree->lock);
663 spin_unlock(&tree->lock);
664 cond_resched();
665 spin_lock(&tree->lock);
666 }
667 } 652 }
668out: 653out:
669 spin_unlock(&tree->lock); 654 spin_unlock(&tree->lock);
670 return 0; 655 return 0;
671} 656}
672 657
673static int set_state_bits(struct extent_io_tree *tree, 658static void set_state_bits(struct extent_io_tree *tree,
674 struct extent_state *state, 659 struct extent_state *state,
675 int *bits) 660 int *bits)
676{ 661{
677 int ret;
678 int bits_to_set = *bits & ~EXTENT_CTLBITS; 662 int bits_to_set = *bits & ~EXTENT_CTLBITS;
679 663
680 ret = set_state_cb(tree, state, bits); 664 set_state_cb(tree, state, bits);
681 if (ret)
682 return ret;
683 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 665 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
684 u64 range = state->end - state->start + 1; 666 u64 range = state->end - state->start + 1;
685 tree->dirty_bytes += range; 667 tree->dirty_bytes += range;
686 } 668 }
687 state->state |= bits_to_set; 669 state->state |= bits_to_set;
688
689 return 0;
690} 670}
691 671
692static void cache_state(struct extent_state *state, 672static void cache_state(struct extent_state *state,
@@ -779,9 +759,7 @@ hit_next:
779 goto out; 759 goto out;
780 } 760 }
781 761
782 err = set_state_bits(tree, state, &bits); 762 set_state_bits(tree, state, &bits);
783 if (err)
784 goto out;
785 763
786 cache_state(state, cached_state); 764 cache_state(state, cached_state);
787 merge_state(tree, state); 765 merge_state(tree, state);
@@ -830,9 +808,7 @@ hit_next:
830 if (err) 808 if (err)
831 goto out; 809 goto out;
832 if (state->end <= end) { 810 if (state->end <= end) {
833 err = set_state_bits(tree, state, &bits); 811 set_state_bits(tree, state, &bits);
834 if (err)
835 goto out;
836 cache_state(state, cached_state); 812 cache_state(state, cached_state);
837 merge_state(tree, state); 813 merge_state(tree, state);
838 if (last_end == (u64)-1) 814 if (last_end == (u64)-1)
@@ -893,11 +869,7 @@ hit_next:
893 err = split_state(tree, state, prealloc, end + 1); 869 err = split_state(tree, state, prealloc, end + 1);
894 BUG_ON(err == -EEXIST); 870 BUG_ON(err == -EEXIST);
895 871
896 err = set_state_bits(tree, prealloc, &bits); 872 set_state_bits(tree, prealloc, &bits);
897 if (err) {
898 prealloc = NULL;
899 goto out;
900 }
901 cache_state(prealloc, cached_state); 873 cache_state(prealloc, cached_state);
902 merge_state(tree, prealloc); 874 merge_state(tree, prealloc);
903 prealloc = NULL; 875 prealloc = NULL;
@@ -1059,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1059 return 0; 1031 return 0;
1060} 1032}
1061 1033
1062/*
1063 * find the first offset in the io tree with 'bits' set. zero is
1064 * returned if we find something, and *start_ret and *end_ret are
1065 * set to reflect the state struct that was found.
1066 *
1067 * If nothing was found, 1 is returned, < 0 on error
1068 */
1069int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1070 u64 *start_ret, u64 *end_ret, int bits)
1071{
1072 struct rb_node *node;
1073 struct extent_state *state;
1074 int ret = 1;
1075
1076 spin_lock(&tree->lock);
1077 /*
1078 * this search will find all the extents that end after
1079 * our range starts.
1080 */
1081 node = tree_search(tree, start);
1082 if (!node)
1083 goto out;
1084
1085 while (1) {
1086 state = rb_entry(node, struct extent_state, rb_node);
1087 if (state->end >= start && (state->state & bits)) {
1088 *start_ret = state->start;
1089 *end_ret = state->end;
1090 ret = 0;
1091 break;
1092 }
1093 node = rb_next(node);
1094 if (!node)
1095 break;
1096 }
1097out:
1098 spin_unlock(&tree->lock);
1099 return ret;
1100}
1101
1102/* find the first state struct with 'bits' set after 'start', and 1034/* find the first state struct with 'bits' set after 'start', and
1103 * return it. tree->lock must be held. NULL will returned if 1035 * return it. tree->lock must be held. NULL will returned if
1104 * nothing was found after 'start' 1036 * nothing was found after 'start'
@@ -1131,6 +1063,30 @@ out:
1131} 1063}
1132 1064
1133/* 1065/*
1066 * find the first offset in the io tree with 'bits' set. zero is
1067 * returned if we find something, and *start_ret and *end_ret are
1068 * set to reflect the state struct that was found.
1069 *
1070 * If nothing was found, 1 is returned, < 0 on error
1071 */
1072int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1073 u64 *start_ret, u64 *end_ret, int bits)
1074{
1075 struct extent_state *state;
1076 int ret = 1;
1077
1078 spin_lock(&tree->lock);
1079 state = find_first_extent_bit_state(tree, start, bits);
1080 if (state) {
1081 *start_ret = state->start;
1082 *end_ret = state->end;
1083 ret = 0;
1084 }
1085 spin_unlock(&tree->lock);
1086 return ret;
1087}
1088
1089/*
1134 * find a contiguous range of bytes in the file marked as delalloc, not 1090 * find a contiguous range of bytes in the file marked as delalloc, not
1135 * more than 'max_bytes'. start and end are used to return the range, 1091 * more than 'max_bytes'. start and end are used to return the range,
1136 * 1092 *
@@ -2546,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2546 struct writeback_control *wbc) 2502 struct writeback_control *wbc)
2547{ 2503{
2548 int ret; 2504 int ret;
2549 struct address_space *mapping = page->mapping;
2550 struct extent_page_data epd = { 2505 struct extent_page_data epd = {
2551 .bio = NULL, 2506 .bio = NULL,
2552 .tree = tree, 2507 .tree = tree,
@@ -2554,17 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2554 .extent_locked = 0, 2509 .extent_locked = 0,
2555 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2510 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2556 }; 2511 };
2557 struct writeback_control wbc_writepages = {
2558 .sync_mode = wbc->sync_mode,
2559 .nr_to_write = 64,
2560 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2561 .range_end = (loff_t)-1,
2562 };
2563 2512
2564 ret = __extent_writepage(page, wbc, &epd); 2513 ret = __extent_writepage(page, wbc, &epd);
2565 2514
2566 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2567 __extent_writepage, &epd, flush_write_bio);
2568 flush_epd_write_bio(&epd); 2515 flush_epd_write_bio(&epd);
2569 return ret; 2516 return ret;
2570} 2517}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 21a7ca9e728..7b2f0c3e792 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,15 +76,15 @@ struct extent_io_ops {
76 struct extent_state *state); 76 struct extent_state *state);
77 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 77 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
78 struct extent_state *state, int uptodate); 78 struct extent_state *state, int uptodate);
79 int (*set_bit_hook)(struct inode *inode, struct extent_state *state, 79 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
80 int *bits); 80 int *bits);
81 int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 81 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
82 int *bits); 82 int *bits);
83 int (*merge_extent_hook)(struct inode *inode, 83 void (*merge_extent_hook)(struct inode *inode,
84 struct extent_state *new, 84 struct extent_state *new,
85 struct extent_state *other); 85 struct extent_state *other);
86 int (*split_extent_hook)(struct inode *inode, 86 void (*split_extent_hook)(struct inode *inode,
87 struct extent_state *orig, u64 split); 87 struct extent_state *orig, u64 split);
88 int (*write_cache_pages_lock_hook)(struct page *page); 88 int (*write_cache_pages_lock_hook)(struct page *page);
89}; 89};
90 90
@@ -108,8 +108,6 @@ struct extent_state {
108 wait_queue_head_t wq; 108 wait_queue_head_t wq;
109 atomic_t refs; 109 atomic_t refs;
110 unsigned long state; 110 unsigned long state;
111 u64 split_start;
112 u64 split_end;
113 111
114 /* for use by the FS */ 112 /* for use by the FS */
115 u64 private; 113 u64 private;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2d0410344ea..7c97b330145 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
183 return 0; 183 return 0;
184} 184}
185 185
186int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) 186static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
187{ 187{
188 int ret = 0;
189 struct extent_map *merge = NULL; 188 struct extent_map *merge = NULL;
190 struct rb_node *rb; 189 struct rb_node *rb;
191 struct extent_map *em;
192
193 write_lock(&tree->lock);
194 em = lookup_extent_mapping(tree, start, len);
195
196 WARN_ON(!em || em->start != start);
197
198 if (!em)
199 goto out;
200
201 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
202 190
203 if (em->start != 0) { 191 if (em->start != 0) {
204 rb = rb_prev(&em->rb_node); 192 rb = rb_prev(&em->rb_node);
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
225 merge->in_tree = 0; 213 merge->in_tree = 0;
226 free_extent_map(merge); 214 free_extent_map(merge);
227 } 215 }
216}
217
218int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
219{
220 int ret = 0;
221 struct extent_map *em;
222
223 write_lock(&tree->lock);
224 em = lookup_extent_mapping(tree, start, len);
225
226 WARN_ON(!em || em->start != start);
227
228 if (!em)
229 goto out;
230
231 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
232
233 try_merge_map(tree, em);
228 234
229 free_extent_map(em); 235 free_extent_map(em);
230out: 236out:
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
247 struct extent_map *em) 253 struct extent_map *em)
248{ 254{
249 int ret = 0; 255 int ret = 0;
250 struct extent_map *merge = NULL;
251 struct rb_node *rb; 256 struct rb_node *rb;
252 struct extent_map *exist; 257 struct extent_map *exist;
253 258
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
263 goto out; 268 goto out;
264 } 269 }
265 atomic_inc(&em->refs); 270 atomic_inc(&em->refs);
266 if (em->start != 0) { 271
267 rb = rb_prev(&em->rb_node); 272 try_merge_map(tree, em);
268 if (rb)
269 merge = rb_entry(rb, struct extent_map, rb_node);
270 if (rb && mergable_maps(merge, em)) {
271 em->start = merge->start;
272 em->len += merge->len;
273 em->block_len += merge->block_len;
274 em->block_start = merge->block_start;
275 merge->in_tree = 0;
276 rb_erase(&merge->rb_node, &tree->map);
277 free_extent_map(merge);
278 }
279 }
280 rb = rb_next(&em->rb_node);
281 if (rb)
282 merge = rb_entry(rb, struct extent_map, rb_node);
283 if (rb && mergable_maps(em, merge)) {
284 em->len += merge->len;
285 em->block_len += merge->len;
286 rb_erase(&merge->rb_node, &tree->map);
287 merge->in_tree = 0;
288 free_extent_map(merge);
289 }
290out: 273out:
291 return ret; 274 return ret;
292} 275}
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len)
299 return start + len; 282 return start + len;
300} 283}
301 284
302/** 285struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
303 * lookup_extent_mapping - lookup extent_map 286 u64 start, u64 len, int strict)
304 * @tree: tree to lookup in
305 * @start: byte offset to start the search
306 * @len: length of the lookup range
307 *
308 * Find and return the first extent_map struct in @tree that intersects the
309 * [start, len] range. There may be additional objects in the tree that
310 * intersect, so check the object returned carefully to make sure that no
311 * additional lookups are needed.
312 */
313struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
314 u64 start, u64 len)
315{ 287{
316 struct extent_map *em; 288 struct extent_map *em;
317 struct rb_node *rb_node; 289 struct rb_node *rb_node;
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
320 u64 end = range_end(start, len); 292 u64 end = range_end(start, len);
321 293
322 rb_node = __tree_search(&tree->map, start, &prev, &next); 294 rb_node = __tree_search(&tree->map, start, &prev, &next);
323 if (!rb_node && prev) {
324 em = rb_entry(prev, struct extent_map, rb_node);
325 if (end > em->start && start < extent_map_end(em))
326 goto found;
327 }
328 if (!rb_node && next) {
329 em = rb_entry(next, struct extent_map, rb_node);
330 if (end > em->start && start < extent_map_end(em))
331 goto found;
332 }
333 if (!rb_node) { 295 if (!rb_node) {
334 em = NULL; 296 if (prev)
335 goto out; 297 rb_node = prev;
336 } 298 else if (next)
337 if (IS_ERR(rb_node)) { 299 rb_node = next;
338 em = ERR_CAST(rb_node); 300 else
339 goto out; 301 return NULL;
340 } 302 }
303
341 em = rb_entry(rb_node, struct extent_map, rb_node); 304 em = rb_entry(rb_node, struct extent_map, rb_node);
342 if (end > em->start && start < extent_map_end(em))
343 goto found;
344 305
345 em = NULL; 306 if (strict && !(end > em->start && start < extent_map_end(em)))
346 goto out; 307 return NULL;
347 308
348found:
349 atomic_inc(&em->refs); 309 atomic_inc(&em->refs);
350out:
351 return em; 310 return em;
352} 311}
353 312
354/** 313/**
314 * lookup_extent_mapping - lookup extent_map
315 * @tree: tree to lookup in
316 * @start: byte offset to start the search
317 * @len: length of the lookup range
318 *
319 * Find and return the first extent_map struct in @tree that intersects the
320 * [start, len] range. There may be additional objects in the tree that
321 * intersect, so check the object returned carefully to make sure that no
322 * additional lookups are needed.
323 */
324struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
325 u64 start, u64 len)
326{
327 return __lookup_extent_mapping(tree, start, len, 1);
328}
329
330/**
355 * search_extent_mapping - find a nearby extent map 331 * search_extent_mapping - find a nearby extent map
356 * @tree: tree to lookup in 332 * @tree: tree to lookup in
357 * @start: byte offset to start the search 333 * @start: byte offset to start the search
@@ -365,38 +341,7 @@ out:
365struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 341struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
366 u64 start, u64 len) 342 u64 start, u64 len)
367{ 343{
368 struct extent_map *em; 344 return __lookup_extent_mapping(tree, start, len, 0);
369 struct rb_node *rb_node;
370 struct rb_node *prev = NULL;
371 struct rb_node *next = NULL;
372
373 rb_node = __tree_search(&tree->map, start, &prev, &next);
374 if (!rb_node && prev) {
375 em = rb_entry(prev, struct extent_map, rb_node);
376 goto found;
377 }
378 if (!rb_node && next) {
379 em = rb_entry(next, struct extent_map, rb_node);
380 goto found;
381 }
382 if (!rb_node) {
383 em = NULL;
384 goto out;
385 }
386 if (IS_ERR(rb_node)) {
387 em = ERR_CAST(rb_node);
388 goto out;
389 }
390 em = rb_entry(rb_node, struct extent_map, rb_node);
391 goto found;
392
393 em = NULL;
394 goto out;
395
396found:
397 atomic_inc(&em->refs);
398out:
399 return em;
400} 345}
401 346
402/** 347/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 08bcfa92a22..b910694f61e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -291,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
291 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); 291 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
292 292
293 path = btrfs_alloc_path(); 293 path = btrfs_alloc_path();
294 BUG_ON(!path); 294 if (!path)
295 return -ENOMEM;
295 296
296 if (search_commit) { 297 if (search_commit) {
297 path->skip_locking = 1; 298 path->skip_locking = 1;
@@ -677,7 +678,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
677 btrfs_super_csum_size(&root->fs_info->super_copy); 678 btrfs_super_csum_size(&root->fs_info->super_copy);
678 679
679 path = btrfs_alloc_path(); 680 path = btrfs_alloc_path();
680 BUG_ON(!path); 681 if (!path)
682 return -ENOMEM;
683
681 sector_sum = sums->sums; 684 sector_sum = sums->sums;
682again: 685again:
683 next_offset = (u64)-1; 686 next_offset = (u64)-1;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a35e51c9f23..658d66959ab 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
74 * If an existing record is found the defrag item you 74 * If an existing record is found the defrag item you
75 * pass in is freed 75 * pass in is freed
76 */ 76 */
77static int __btrfs_add_inode_defrag(struct inode *inode, 77static void __btrfs_add_inode_defrag(struct inode *inode,
78 struct inode_defrag *defrag) 78 struct inode_defrag *defrag)
79{ 79{
80 struct btrfs_root *root = BTRFS_I(inode)->root; 80 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
106 BTRFS_I(inode)->in_defrag = 1; 106 BTRFS_I(inode)->in_defrag = 1;
107 rb_link_node(&defrag->rb_node, parent, p); 107 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return 0; 109 return;
110 110
111exists: 111exists:
112 kfree(defrag); 112 kfree(defrag);
113 return 0; 113 return;
114 114
115} 115}
116 116
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
123{ 123{
124 struct btrfs_root *root = BTRFS_I(inode)->root; 124 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct inode_defrag *defrag; 125 struct inode_defrag *defrag;
126 int ret = 0;
127 u64 transid; 126 u64 transid;
128 127
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 128 if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
150 149
151 spin_lock(&root->fs_info->defrag_inodes_lock); 150 spin_lock(&root->fs_info->defrag_inodes_lock);
152 if (!BTRFS_I(inode)->in_defrag) 151 if (!BTRFS_I(inode)->in_defrag)
153 ret = __btrfs_add_inode_defrag(inode, defrag); 152 __btrfs_add_inode_defrag(inode, defrag);
154 spin_unlock(&root->fs_info->defrag_inodes_lock); 153 spin_unlock(&root->fs_info->defrag_inodes_lock);
155 return ret; 154 return 0;
156} 155}
157 156
158/* 157/*
@@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
855 btrfs_drop_extent_cache(inode, start, end - 1, 0); 854 btrfs_drop_extent_cache(inode, start, end - 1, 0);
856 855
857 path = btrfs_alloc_path(); 856 path = btrfs_alloc_path();
858 BUG_ON(!path); 857 if (!path)
858 return -ENOMEM;
859again: 859again:
860 recow = 0; 860 recow = 0;
861 split = start; 861 split = start;
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
1059static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1059static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1060 struct page **pages, size_t num_pages, 1060 struct page **pages, size_t num_pages,
1061 loff_t pos, unsigned long first_index, 1061 loff_t pos, unsigned long first_index,
1062 unsigned long last_index, size_t write_bytes) 1062 size_t write_bytes)
1063{ 1063{
1064 struct extent_state *cached_state = NULL; 1064 struct extent_state *cached_state = NULL;
1065 int i; 1065 int i;
@@ -1159,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1159 struct btrfs_root *root = BTRFS_I(inode)->root; 1159 struct btrfs_root *root = BTRFS_I(inode)->root;
1160 struct page **pages = NULL; 1160 struct page **pages = NULL;
1161 unsigned long first_index; 1161 unsigned long first_index;
1162 unsigned long last_index;
1163 size_t num_written = 0; 1162 size_t num_written = 0;
1164 int nrptrs; 1163 int nrptrs;
1165 int ret = 0; 1164 int ret = 0;
@@ -1172,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1172 return -ENOMEM; 1171 return -ENOMEM;
1173 1172
1174 first_index = pos >> PAGE_CACHE_SHIFT; 1173 first_index = pos >> PAGE_CACHE_SHIFT;
1175 last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
1176 1174
1177 while (iov_iter_count(i) > 0) { 1175 while (iov_iter_count(i) > 0) {
1178 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1176 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1206,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1206 * contents of pages from loop to loop 1204 * contents of pages from loop to loop
1207 */ 1205 */
1208 ret = prepare_pages(root, file, pages, num_pages, 1206 ret = prepare_pages(root, file, pages, num_pages,
1209 pos, first_index, last_index, 1207 pos, first_index, write_bytes);
1210 write_bytes);
1211 if (ret) { 1208 if (ret) {
1212 btrfs_delalloc_release_space(inode, 1209 btrfs_delalloc_release_space(inode,
1213 num_pages << PAGE_CACHE_SHIFT); 1210 num_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 13e6255182e..15fceefbca0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1061,7 +1061,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1061 u64 ino = btrfs_ino(inode); 1061 u64 ino = btrfs_ino(inode);
1062 1062
1063 path = btrfs_alloc_path(); 1063 path = btrfs_alloc_path();
1064 BUG_ON(!path); 1064 if (!path)
1065 return -ENOMEM;
1065 1066
1066 nolock = btrfs_is_free_space_inode(root, inode); 1067 nolock = btrfs_is_free_space_inode(root, inode);
1067 1068
@@ -1282,17 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1282 return ret; 1283 return ret;
1283} 1284}
1284 1285
1285static int btrfs_split_extent_hook(struct inode *inode, 1286static void btrfs_split_extent_hook(struct inode *inode,
1286 struct extent_state *orig, u64 split) 1287 struct extent_state *orig, u64 split)
1287{ 1288{
1288 /* not delalloc, ignore it */ 1289 /* not delalloc, ignore it */
1289 if (!(orig->state & EXTENT_DELALLOC)) 1290 if (!(orig->state & EXTENT_DELALLOC))
1290 return 0; 1291 return;
1291 1292
1292 spin_lock(&BTRFS_I(inode)->lock); 1293 spin_lock(&BTRFS_I(inode)->lock);
1293 BTRFS_I(inode)->outstanding_extents++; 1294 BTRFS_I(inode)->outstanding_extents++;
1294 spin_unlock(&BTRFS_I(inode)->lock); 1295 spin_unlock(&BTRFS_I(inode)->lock);
1295 return 0;
1296} 1296}
1297 1297
1298/* 1298/*
@@ -1301,18 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
1301 * extents, such as when we are doing sequential writes, so we can properly 1301 * extents, such as when we are doing sequential writes, so we can properly
1302 * account for the metadata space we'll need. 1302 * account for the metadata space we'll need.
1303 */ 1303 */
1304static int btrfs_merge_extent_hook(struct inode *inode, 1304static void btrfs_merge_extent_hook(struct inode *inode,
1305 struct extent_state *new, 1305 struct extent_state *new,
1306 struct extent_state *other) 1306 struct extent_state *other)
1307{ 1307{
1308 /* not delalloc, ignore it */ 1308 /* not delalloc, ignore it */
1309 if (!(other->state & EXTENT_DELALLOC)) 1309 if (!(other->state & EXTENT_DELALLOC))
1310 return 0; 1310 return;
1311 1311
1312 spin_lock(&BTRFS_I(inode)->lock); 1312 spin_lock(&BTRFS_I(inode)->lock);
1313 BTRFS_I(inode)->outstanding_extents--; 1313 BTRFS_I(inode)->outstanding_extents--;
1314 spin_unlock(&BTRFS_I(inode)->lock); 1314 spin_unlock(&BTRFS_I(inode)->lock);
1315 return 0;
1316} 1315}
1317 1316
1318/* 1317/*
@@ -1320,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1320 * bytes in this file, and to maintain the list of inodes that 1319 * bytes in this file, and to maintain the list of inodes that
1321 * have pending delalloc work to be done. 1320 * have pending delalloc work to be done.
1322 */ 1321 */
1323static int btrfs_set_bit_hook(struct inode *inode, 1322static void btrfs_set_bit_hook(struct inode *inode,
1324 struct extent_state *state, int *bits) 1323 struct extent_state *state, int *bits)
1325{ 1324{
1326 1325
1327 /* 1326 /*
@@ -1351,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
1351 } 1350 }
1352 spin_unlock(&root->fs_info->delalloc_lock); 1351 spin_unlock(&root->fs_info->delalloc_lock);
1353 } 1352 }
1354 return 0;
1355} 1353}
1356 1354
1357/* 1355/*
1358 * extent_io.c clear_bit_hook, see set_bit_hook for why 1356 * extent_io.c clear_bit_hook, see set_bit_hook for why
1359 */ 1357 */
1360static int btrfs_clear_bit_hook(struct inode *inode, 1358static void btrfs_clear_bit_hook(struct inode *inode,
1361 struct extent_state *state, int *bits) 1359 struct extent_state *state, int *bits)
1362{ 1360{
1363 /* 1361 /*
1364 * set_bit and clear bit hooks normally require _irqsave/restore 1362 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1395,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1395 } 1393 }
1396 spin_unlock(&root->fs_info->delalloc_lock); 1394 spin_unlock(&root->fs_info->delalloc_lock);
1397 } 1395 }
1398 return 0;
1399} 1396}
1400 1397
1401/* 1398/*
@@ -1645,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1645 int ret; 1642 int ret;
1646 1643
1647 path = btrfs_alloc_path(); 1644 path = btrfs_alloc_path();
1648 BUG_ON(!path); 1645 if (!path)
1646 return -ENOMEM;
1649 1647
1650 path->leave_spinning = 1; 1648 path->leave_spinning = 1;
1651 1649
@@ -2215,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2215 2213
2216 if (!root->orphan_block_rsv) { 2214 if (!root->orphan_block_rsv) {
2217 block_rsv = btrfs_alloc_block_rsv(root); 2215 block_rsv = btrfs_alloc_block_rsv(root);
2218 BUG_ON(!block_rsv); 2216 if (!block_rsv)
2217 return -ENOMEM;
2219 } 2218 }
2220 2219
2221 spin_lock(&root->orphan_lock); 2220 spin_lock(&root->orphan_lock);
@@ -2517,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode)
2517 filled = true; 2516 filled = true;
2518 2517
2519 path = btrfs_alloc_path(); 2518 path = btrfs_alloc_path();
2520 BUG_ON(!path); 2519 if (!path)
2520 goto make_bad;
2521
2521 path->leave_spinning = 1; 2522 path->leave_spinning = 1;
2522 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2523 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2523 2524
@@ -2998,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2998 2999
2999 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3000 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
3000 dentry->d_name.name, dentry->d_name.len); 3001 dentry->d_name.name, dentry->d_name.len);
3001 BUG_ON(ret); 3002 if (ret)
3003 goto out;
3002 3004
3003 if (inode->i_nlink == 0) { 3005 if (inode->i_nlink == 0) {
3004 ret = btrfs_orphan_add(trans, inode); 3006 ret = btrfs_orphan_add(trans, inode);
3005 BUG_ON(ret); 3007 if (ret)
3008 goto out;
3006 } 3009 }
3007 3010
3011out:
3008 nr = trans->blocks_used; 3012 nr = trans->blocks_used;
3009 __unlink_end_trans(trans, root); 3013 __unlink_end_trans(trans, root);
3010 btrfs_btree_balance_dirty(root, nr); 3014 btrfs_btree_balance_dirty(root, nr);
@@ -3147,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3147 3151
3148 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3152 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3149 3153
3154 path = btrfs_alloc_path();
3155 if (!path)
3156 return -ENOMEM;
3157 path->reada = -1;
3158
3150 if (root->ref_cows || root == root->fs_info->tree_root) 3159 if (root->ref_cows || root == root->fs_info->tree_root)
3151 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3160 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3152 3161
@@ -3159,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3159 if (min_type == 0 && root == BTRFS_I(inode)->root) 3168 if (min_type == 0 && root == BTRFS_I(inode)->root)
3160 btrfs_kill_delayed_inode_items(inode); 3169 btrfs_kill_delayed_inode_items(inode);
3161 3170
3162 path = btrfs_alloc_path();
3163 BUG_ON(!path);
3164 path->reada = -1;
3165
3166 key.objectid = ino; 3171 key.objectid = ino;
3167 key.offset = (u64)-1; 3172 key.offset = (u64)-1;
3168 key.type = (u8)-1; 3173 key.type = (u8)-1;
@@ -3690,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3690 int ret = 0; 3695 int ret = 0;
3691 3696
3692 path = btrfs_alloc_path(); 3697 path = btrfs_alloc_path();
3693 BUG_ON(!path); 3698 if (!path)
3699 return -ENOMEM;
3694 3700
3695 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, 3701 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3696 namelen, 0); 3702 namelen, 0);
@@ -3946,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3946 struct btrfs_root *root, int *new) 3952 struct btrfs_root *root, int *new)
3947{ 3953{
3948 struct inode *inode; 3954 struct inode *inode;
3955 int bad_inode = 0;
3949 3956
3950 inode = btrfs_iget_locked(s, location->objectid, root); 3957 inode = btrfs_iget_locked(s, location->objectid, root);
3951 if (!inode) 3958 if (!inode)
@@ -3955,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3955 BTRFS_I(inode)->root = root; 3962 BTRFS_I(inode)->root = root;
3956 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 3963 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3957 btrfs_read_locked_inode(inode); 3964 btrfs_read_locked_inode(inode);
3958 inode_tree_add(inode); 3965 if (!is_bad_inode(inode)) {
3959 unlock_new_inode(inode); 3966 inode_tree_add(inode);
3960 if (new) 3967 unlock_new_inode(inode);
3961 *new = 1; 3968 if (new)
3969 *new = 1;
3970 } else {
3971 bad_inode = 1;
3972 }
3973 }
3974
3975 if (bad_inode) {
3976 iput(inode);
3977 inode = ERR_PTR(-ESTALE);
3962 } 3978 }
3963 3979
3964 return inode; 3980 return inode;
@@ -3993,12 +4009,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3993 struct btrfs_root *sub_root = root; 4009 struct btrfs_root *sub_root = root;
3994 struct btrfs_key location; 4010 struct btrfs_key location;
3995 int index; 4011 int index;
3996 int ret; 4012 int ret = 0;
3997 4013
3998 if (dentry->d_name.len > BTRFS_NAME_LEN) 4014 if (dentry->d_name.len > BTRFS_NAME_LEN)
3999 return ERR_PTR(-ENAMETOOLONG); 4015 return ERR_PTR(-ENAMETOOLONG);
4000 4016
4001 ret = btrfs_inode_by_name(dir, dentry, &location); 4017 if (unlikely(d_need_lookup(dentry))) {
4018 memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
4019 kfree(dentry->d_fsdata);
4020 dentry->d_fsdata = NULL;
4021 d_clear_need_lookup(dentry);
4022 } else {
4023 ret = btrfs_inode_by_name(dir, dentry, &location);
4024 }
4002 4025
4003 if (ret < 0) 4026 if (ret < 0)
4004 return ERR_PTR(ret); 4027 return ERR_PTR(ret);
@@ -4053,6 +4076,12 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
4053 return 0; 4076 return 0;
4054} 4077}
4055 4078
4079static void btrfs_dentry_release(struct dentry *dentry)
4080{
4081 if (dentry->d_fsdata)
4082 kfree(dentry->d_fsdata);
4083}
4084
4056static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 4085static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4057 struct nameidata *nd) 4086 struct nameidata *nd)
4058{ 4087{
@@ -4075,6 +4104,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4075 struct btrfs_path *path; 4104 struct btrfs_path *path;
4076 struct list_head ins_list; 4105 struct list_head ins_list;
4077 struct list_head del_list; 4106 struct list_head del_list;
4107 struct qstr q;
4078 int ret; 4108 int ret;
4079 struct extent_buffer *leaf; 4109 struct extent_buffer *leaf;
4080 int slot; 4110 int slot;
@@ -4164,6 +4194,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4164 4194
4165 while (di_cur < di_total) { 4195 while (di_cur < di_total) {
4166 struct btrfs_key location; 4196 struct btrfs_key location;
4197 struct dentry *tmp;
4167 4198
4168 if (verify_dir_item(root, leaf, di)) 4199 if (verify_dir_item(root, leaf, di))
4169 break; 4200 break;
@@ -4184,6 +4215,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4184 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; 4215 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
4185 btrfs_dir_item_key_to_cpu(leaf, di, &location); 4216 btrfs_dir_item_key_to_cpu(leaf, di, &location);
4186 4217
4218 q.name = name_ptr;
4219 q.len = name_len;
4220 q.hash = full_name_hash(q.name, q.len);
4221 tmp = d_lookup(filp->f_dentry, &q);
4222 if (!tmp) {
4223 struct btrfs_key *newkey;
4224
4225 newkey = kzalloc(sizeof(struct btrfs_key),
4226 GFP_NOFS);
4227 if (!newkey)
4228 goto no_dentry;
4229 tmp = d_alloc(filp->f_dentry, &q);
4230 if (!tmp) {
4231 kfree(newkey);
4232 dput(tmp);
4233 goto no_dentry;
4234 }
4235 memcpy(newkey, &location,
4236 sizeof(struct btrfs_key));
4237 tmp->d_fsdata = newkey;
4238 tmp->d_flags |= DCACHE_NEED_LOOKUP;
4239 d_rehash(tmp);
4240 dput(tmp);
4241 } else {
4242 dput(tmp);
4243 }
4244no_dentry:
4187 /* is this a reference to our own snapshot? If so 4245 /* is this a reference to our own snapshot? If so
4188 * skip it 4246 * skip it
4189 */ 4247 */
@@ -4409,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4409 int owner; 4467 int owner;
4410 4468
4411 path = btrfs_alloc_path(); 4469 path = btrfs_alloc_path();
4412 BUG_ON(!path); 4470 if (!path)
4471 return ERR_PTR(-ENOMEM);
4413 4472
4414 inode = new_inode(root->fs_info->sb); 4473 inode = new_inode(root->fs_info->sb);
4415 if (!inode) { 4474 if (!inode) {
@@ -6669,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6669 return 0; 6728 return 0;
6670} 6729}
6671 6730
6672/* helper function for file defrag and space balancing. This
6673 * forces readahead on a given range of bytes in an inode
6674 */
6675unsigned long btrfs_force_ra(struct address_space *mapping,
6676 struct file_ra_state *ra, struct file *file,
6677 pgoff_t offset, pgoff_t last_index)
6678{
6679 pgoff_t req_size = last_index - offset + 1;
6680
6681 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
6682 return offset + req_size;
6683}
6684
6685struct inode *btrfs_alloc_inode(struct super_block *sb) 6731struct inode *btrfs_alloc_inode(struct super_block *sb)
6686{ 6732{
6687 struct btrfs_inode *ei; 6733 struct btrfs_inode *ei;
@@ -7164,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7164 goto out_unlock; 7210 goto out_unlock;
7165 7211
7166 path = btrfs_alloc_path(); 7212 path = btrfs_alloc_path();
7167 BUG_ON(!path); 7213 if (!path) {
7214 err = -ENOMEM;
7215 drop_inode = 1;
7216 goto out_unlock;
7217 }
7168 key.objectid = btrfs_ino(inode); 7218 key.objectid = btrfs_ino(inode);
7169 key.offset = 0; 7219 key.offset = 0;
7170 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7220 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
@@ -7430,4 +7480,5 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7430 7480
7431const struct dentry_operations btrfs_dentry_operations = { 7481const struct dentry_operations btrfs_dentry_operations = {
7432 .d_delete = btrfs_dentry_delete, 7482 .d_delete = btrfs_dentry_delete,
7483 .d_release = btrfs_dentry_release,
7433}; 7484};
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0b980afc5ed..7cf01334994 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1749,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1749 key.objectid = key.offset; 1749 key.objectid = key.offset;
1750 key.offset = (u64)-1; 1750 key.offset = (u64)-1;
1751 dirid = key.objectid; 1751 dirid = key.objectid;
1752
1753 } 1752 }
1754 if (ptr < name) 1753 if (ptr < name)
1755 goto out; 1754 goto out;
1756 memcpy(name, ptr, total_len); 1755 memmove(name, ptr, total_len);
1757 name[total_len]='\0'; 1756 name[total_len]='\0';
1758 ret = 0; 1757 ret = 0;
1759out: 1758out:
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
deleted file mode 100644
index 82d569cb626..00000000000
--- a/fs/btrfs/ref-cache.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/sort.h>
22#include "ctree.h"
23#include "ref-cache.h"
24#include "transaction.h"
25
26static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
27 struct rb_node *node)
28{
29 struct rb_node **p = &root->rb_node;
30 struct rb_node *parent = NULL;
31 struct btrfs_leaf_ref *entry;
32
33 while (*p) {
34 parent = *p;
35 entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
36
37 if (bytenr < entry->bytenr)
38 p = &(*p)->rb_left;
39 else if (bytenr > entry->bytenr)
40 p = &(*p)->rb_right;
41 else
42 return parent;
43 }
44
45 entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
46 rb_link_node(node, parent, p);
47 rb_insert_color(node, root);
48 return NULL;
49}
50
51static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
52{
53 struct rb_node *n = root->rb_node;
54 struct btrfs_leaf_ref *entry;
55
56 while (n) {
57 entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
58 WARN_ON(!entry->in_tree);
59
60 if (bytenr < entry->bytenr)
61 n = n->rb_left;
62 else if (bytenr > entry->bytenr)
63 n = n->rb_right;
64 else
65 return n;
66 }
67 return NULL;
68}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
deleted file mode 100644
index 24f7001f638..00000000000
--- a/fs/btrfs/ref-cache.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#ifndef __REFCACHE__
19#define __REFCACHE__
20
21struct btrfs_extent_info {
22 /* bytenr and num_bytes find the extent in the extent allocation tree */
23 u64 bytenr;
24 u64 num_bytes;
25
26 /* objectid and offset find the back reference for the file */
27 u64 objectid;
28 u64 offset;
29};
30
31struct btrfs_leaf_ref {
32 struct rb_node rb_node;
33 struct btrfs_leaf_ref_tree *tree;
34 int in_tree;
35 atomic_t usage;
36
37 u64 root_gen;
38 u64 bytenr;
39 u64 owner;
40 u64 generation;
41 int nritems;
42
43 struct list_head list;
44 struct btrfs_extent_info extents[];
45};
46
47static inline size_t btrfs_leaf_ref_size(int nr_extents)
48{
49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents;
51}
52#endif
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ebe45443de0..f4099904565 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -71,13 +71,12 @@ out:
71 return ret; 71 return ret;
72} 72}
73 73
74int btrfs_set_root_node(struct btrfs_root_item *item, 74void btrfs_set_root_node(struct btrfs_root_item *item,
75 struct extent_buffer *node) 75 struct extent_buffer *node)
76{ 76{
77 btrfs_set_root_bytenr(item, node->start); 77 btrfs_set_root_bytenr(item, node->start);
78 btrfs_set_root_level(item, btrfs_header_level(node)); 78 btrfs_set_root_level(item, btrfs_header_level(node));
79 btrfs_set_root_generation(item, btrfs_header_generation(node)); 79 btrfs_set_root_generation(item, btrfs_header_generation(node));
80 return 0;
81} 80}
82 81
83/* 82/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index eb55863bb4a..7dc36fab4af 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root)
216 spin_lock(&root->fs_info->trans_lock); 216 spin_lock(&root->fs_info->trans_lock);
217 cur_trans = root->fs_info->running_transaction; 217 cur_trans = root->fs_info->running_transaction;
218 if (cur_trans && cur_trans->blocked) { 218 if (cur_trans && cur_trans->blocked) {
219 DEFINE_WAIT(wait);
220 atomic_inc(&cur_trans->use_count); 219 atomic_inc(&cur_trans->use_count);
221 spin_unlock(&root->fs_info->trans_lock); 220 spin_unlock(&root->fs_info->trans_lock);
222 while (1) { 221
223 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 222 wait_event(root->fs_info->transaction_wait,
224 TASK_UNINTERRUPTIBLE); 223 !cur_trans->blocked);
225 if (!cur_trans->blocked)
226 break;
227 schedule();
228 }
229 finish_wait(&root->fs_info->transaction_wait, &wait);
230 put_transaction(cur_trans); 224 put_transaction(cur_trans);
231 } else { 225 } else {
232 spin_unlock(&root->fs_info->trans_lock); 226 spin_unlock(&root->fs_info->trans_lock);
@@ -357,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
357} 351}
358 352
359/* wait for a transaction commit to be fully complete */ 353/* wait for a transaction commit to be fully complete */
360static noinline int wait_for_commit(struct btrfs_root *root, 354static noinline void wait_for_commit(struct btrfs_root *root,
361 struct btrfs_transaction *commit) 355 struct btrfs_transaction *commit)
362{ 356{
363 DEFINE_WAIT(wait); 357 wait_event(commit->commit_wait, commit->commit_done);
364 while (!commit->commit_done) {
365 prepare_to_wait(&commit->commit_wait, &wait,
366 TASK_UNINTERRUPTIBLE);
367 if (commit->commit_done)
368 break;
369 schedule();
370 }
371 finish_wait(&commit->commit_wait, &wait);
372 return 0;
373} 358}
374 359
375int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) 360int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -1085,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1085static void wait_current_trans_commit_start(struct btrfs_root *root, 1070static void wait_current_trans_commit_start(struct btrfs_root *root,
1086 struct btrfs_transaction *trans) 1071 struct btrfs_transaction *trans)
1087{ 1072{
1088 DEFINE_WAIT(wait); 1073 wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
1089
1090 if (trans->in_commit)
1091 return;
1092
1093 while (1) {
1094 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1095 TASK_UNINTERRUPTIBLE);
1096 if (trans->in_commit) {
1097 finish_wait(&root->fs_info->transaction_blocked_wait,
1098 &wait);
1099 break;
1100 }
1101 schedule();
1102 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1103 }
1104} 1074}
1105 1075
1106/* 1076/*
@@ -1110,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1110static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, 1080static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1111 struct btrfs_transaction *trans) 1081 struct btrfs_transaction *trans)
1112{ 1082{
1113 DEFINE_WAIT(wait); 1083 wait_event(root->fs_info->transaction_wait,
1114 1084 trans->commit_done || (trans->in_commit && !trans->blocked));
1115 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1116 return;
1117
1118 while (1) {
1119 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1120 TASK_UNINTERRUPTIBLE);
1121 if (trans->commit_done ||
1122 (trans->in_commit && !trans->blocked)) {
1123 finish_wait(&root->fs_info->transaction_wait,
1124 &wait);
1125 break;
1126 }
1127 schedule();
1128 finish_wait(&root->fs_info->transaction_wait,
1129 &wait);
1130 }
1131} 1085}
1132 1086
1133/* 1087/*
@@ -1234,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1234 atomic_inc(&cur_trans->use_count); 1188 atomic_inc(&cur_trans->use_count);
1235 btrfs_end_transaction(trans, root); 1189 btrfs_end_transaction(trans, root);
1236 1190
1237 ret = wait_for_commit(root, cur_trans); 1191 wait_for_commit(root, cur_trans);
1238 BUG_ON(ret);
1239 1192
1240 put_transaction(cur_trans); 1193 put_transaction(cur_trans);
1241 1194
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ac278dd8317..babee65f8ed 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1617 return 0; 1617 return 0;
1618 1618
1619 path = btrfs_alloc_path(); 1619 path = btrfs_alloc_path();
1620 BUG_ON(!path); 1620 if (!path)
1621 return -ENOMEM;
1621 1622
1622 nritems = btrfs_header_nritems(eb); 1623 nritems = btrfs_header_nritems(eb);
1623 for (i = 0; i < nritems; i++) { 1624 for (i = 0; i < nritems; i++) {
@@ -1723,7 +1724,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1723 return -ENOMEM; 1724 return -ENOMEM;
1724 1725
1725 if (*level == 1) { 1726 if (*level == 1) {
1726 wc->process_func(root, next, wc, ptr_gen); 1727 ret = wc->process_func(root, next, wc, ptr_gen);
1728 if (ret)
1729 return ret;
1727 1730
1728 path->slots[*level]++; 1731 path->slots[*level]++;
1729 if (wc->free) { 1732 if (wc->free) {
@@ -1788,8 +1791,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1788 parent = path->nodes[*level + 1]; 1791 parent = path->nodes[*level + 1];
1789 1792
1790 root_owner = btrfs_header_owner(parent); 1793 root_owner = btrfs_header_owner(parent);
1791 wc->process_func(root, path->nodes[*level], wc, 1794 ret = wc->process_func(root, path->nodes[*level], wc,
1792 btrfs_header_generation(path->nodes[*level])); 1795 btrfs_header_generation(path->nodes[*level]));
1796 if (ret)
1797 return ret;
1798
1793 if (wc->free) { 1799 if (wc->free) {
1794 struct extent_buffer *next; 1800 struct extent_buffer *next;
1795 1801
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b89e372c754..53875ae73ad 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root,
1037 struct btrfs_key found_key; 1037 struct btrfs_key found_key;
1038 1038
1039 path = btrfs_alloc_path(); 1039 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1040 if (!path)
1041 return -ENOMEM;
1041 1042
1042 key.objectid = objectid; 1043 key.objectid = objectid;
1043 key.offset = (u64)-1; 1044 key.offset = (u64)-1;
@@ -2061,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root)
2061 2062
2062 /* step two, relocate all the chunks */ 2063 /* step two, relocate all the chunks */
2063 path = btrfs_alloc_path(); 2064 path = btrfs_alloc_path();
2064 BUG_ON(!path); 2065 if (!path) {
2065 2066 ret = -ENOMEM;
2067 goto error;
2068 }
2066 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2069 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2067 key.offset = (u64)-1; 2070 key.offset = (u64)-1;
2068 key.type = BTRFS_CHUNK_ITEM_KEY; 2071 key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2661,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
2661 2664
2662 ret = find_next_chunk(fs_info->chunk_root, 2665 ret = find_next_chunk(fs_info->chunk_root,
2663 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); 2666 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2664 BUG_ON(ret); 2667 if (ret)
2668 return ret;
2665 2669
2666 alloc_profile = BTRFS_BLOCK_GROUP_METADATA | 2670 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2667 (fs_info->metadata_alloc_profile & 2671 (fs_info->metadata_alloc_profile &
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8d8f28c94c0..6873bb634a9 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -141,10 +141,11 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
141 141
142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
143 if (rc < 0) { 143 if (rc < 0) {
144 cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", 144 cFYI(1, "%s: Failed to resolve server part of %s to IP: %d",
145 __func__, *devname, rc); 145 __func__, *devname, rc);
146 goto compose_mount_options_err; 146 goto compose_mount_options_err;
147 } 147 }
148
148 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 149 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
149 * assuming that we have 'unc=' and 'ip=' in 150 * assuming that we have 'unc=' and 'ip=' in
150 * the original sb_mountdata 151 * the original sb_mountdata
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 212e5629cc1..f93eb948d07 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -563,6 +563,10 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
563 mutex_unlock(&dir->i_mutex); 563 mutex_unlock(&dir->i_mutex);
564 dput(dentry); 564 dput(dentry);
565 dentry = child; 565 dentry = child;
566 if (!dentry->d_inode) {
567 dput(dentry);
568 dentry = ERR_PTR(-ENOENT);
569 }
566 } while (!IS_ERR(dentry)); 570 } while (!IS_ERR(dentry));
567 _FreeXid(xid); 571 _FreeXid(xid);
568 kfree(full_path); 572 kfree(full_path);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 548f06230a6..1d2d91d9bf6 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -79,8 +79,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
79 /* Perform the upcall */ 79 /* Perform the upcall */
80 rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); 80 rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
81 if (rc < 0) 81 if (rc < 0)
82 cERROR(1, "%s: unable to resolve: %*.*s", 82 cFYI(1, "%s: unable to resolve: %*.*s",
83 __func__, len, len, hostname); 83 __func__, len, len, hostname);
84 else 84 else
85 cFYI(1, "%s: resolved: %*.*s to %s", 85 cFYI(1, "%s: resolved: %*.*s to %s",
86 __func__, len, len, hostname, *ip_addr); 86 __func__, len, len, hostname, *ip_addr);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9b018c8334f..a7b2dcd4a53 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
764 if (full_path == NULL) 764 if (full_path == NULL)
765 return full_path; 765 return full_path;
766 766
767 if (dfsplen) { 767 if (dfsplen)
768 strncpy(full_path, tcon->treeName, dfsplen); 768 strncpy(full_path, tcon->treeName, dfsplen);
769 /* switch slash direction in prepath depending on whether
770 * windows or posix style path names
771 */
772 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
773 int i;
774 for (i = 0; i < dfsplen; i++) {
775 if (full_path[i] == '\\')
776 full_path[i] = '/';
777 }
778 }
779 }
780 strncpy(full_path + dfsplen, vol->prepath, pplen); 769 strncpy(full_path + dfsplen, vol->prepath, pplen);
770 convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
781 full_path[dfsplen + pplen] = 0; /* add trailing null */ 771 full_path[dfsplen + pplen] = 0; /* add trailing null */
782 return full_path; 772 return full_path;
783} 773}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 243d5872051..d3e619692ee 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -124,8 +124,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
124 /* that we use in next few lines */ 124 /* that we use in next few lines */
125 /* Note that header is initialized to zero in header_assemble */ 125 /* Note that header is initialized to zero in header_assemble */
126 pSMB->req.AndXCommand = 0xFF; 126 pSMB->req.AndXCommand = 0xFF;
127 pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, CIFSMaxBufSize - 4, 127 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
128 USHRT_MAX));
129 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); 128 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
130 pSMB->req.VcNumber = get_next_vcnum(ses); 129 pSMB->req.VcNumber = get_next_vcnum(ses);
131 130
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 147aa22c3c3..c1b9c4b1073 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -362,6 +362,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
362 mid = AllocMidQEntry(hdr, server); 362 mid = AllocMidQEntry(hdr, server);
363 if (mid == NULL) { 363 if (mid == NULL) {
364 mutex_unlock(&server->srv_mutex); 364 mutex_unlock(&server->srv_mutex);
365 atomic_dec(&server->inFlight);
366 wake_up(&server->request_q);
365 return -ENOMEM; 367 return -ENOMEM;
366 } 368 }
367 369
diff --git a/fs/dcache.c b/fs/dcache.c
index b05aac3a8cf..a88948b8bd1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -301,6 +301,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
301 return parent; 301 return parent;
302} 302}
303 303
304/*
305 * Unhash a dentry without inserting an RCU walk barrier or checking that
306 * dentry->d_lock is locked. The caller must take care of that, if
307 * appropriate.
308 */
309static void __d_shrink(struct dentry *dentry)
310{
311 if (!d_unhashed(dentry)) {
312 struct hlist_bl_head *b;
313 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
314 b = &dentry->d_sb->s_anon;
315 else
316 b = d_hash(dentry->d_parent, dentry->d_name.hash);
317
318 hlist_bl_lock(b);
319 __hlist_bl_del(&dentry->d_hash);
320 dentry->d_hash.pprev = NULL;
321 hlist_bl_unlock(b);
322 }
323}
324
304/** 325/**
305 * d_drop - drop a dentry 326 * d_drop - drop a dentry
306 * @dentry: dentry to drop 327 * @dentry: dentry to drop
@@ -319,17 +340,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
319void __d_drop(struct dentry *dentry) 340void __d_drop(struct dentry *dentry)
320{ 341{
321 if (!d_unhashed(dentry)) { 342 if (!d_unhashed(dentry)) {
322 struct hlist_bl_head *b; 343 __d_shrink(dentry);
323 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
324 b = &dentry->d_sb->s_anon;
325 else
326 b = d_hash(dentry->d_parent, dentry->d_name.hash);
327
328 hlist_bl_lock(b);
329 __hlist_bl_del(&dentry->d_hash);
330 dentry->d_hash.pprev = NULL;
331 hlist_bl_unlock(b);
332
333 dentry_rcuwalk_barrier(dentry); 344 dentry_rcuwalk_barrier(dentry);
334 } 345 }
335} 346}
@@ -784,6 +795,7 @@ relock:
784 795
785/** 796/**
786 * prune_dcache_sb - shrink the dcache 797 * prune_dcache_sb - shrink the dcache
798 * @sb: superblock
787 * @nr_to_scan: number of entries to try to free 799 * @nr_to_scan: number of entries to try to free
788 * 800 *
789 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is 801 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
@@ -828,44 +840,24 @@ EXPORT_SYMBOL(shrink_dcache_sb);
828static void shrink_dcache_for_umount_subtree(struct dentry *dentry) 840static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
829{ 841{
830 struct dentry *parent; 842 struct dentry *parent;
831 unsigned detached = 0;
832 843
833 BUG_ON(!IS_ROOT(dentry)); 844 BUG_ON(!IS_ROOT(dentry));
834 845
835 /* detach this root from the system */
836 spin_lock(&dentry->d_lock);
837 dentry_lru_del(dentry);
838 __d_drop(dentry);
839 spin_unlock(&dentry->d_lock);
840
841 for (;;) { 846 for (;;) {
842 /* descend to the first leaf in the current subtree */ 847 /* descend to the first leaf in the current subtree */
843 while (!list_empty(&dentry->d_subdirs)) { 848 while (!list_empty(&dentry->d_subdirs))
844 struct dentry *loop;
845
846 /* this is a branch with children - detach all of them
847 * from the system in one go */
848 spin_lock(&dentry->d_lock);
849 list_for_each_entry(loop, &dentry->d_subdirs,
850 d_u.d_child) {
851 spin_lock_nested(&loop->d_lock,
852 DENTRY_D_LOCK_NESTED);
853 dentry_lru_del(loop);
854 __d_drop(loop);
855 spin_unlock(&loop->d_lock);
856 }
857 spin_unlock(&dentry->d_lock);
858
859 /* move to the first child */
860 dentry = list_entry(dentry->d_subdirs.next, 849 dentry = list_entry(dentry->d_subdirs.next,
861 struct dentry, d_u.d_child); 850 struct dentry, d_u.d_child);
862 }
863 851
864 /* consume the dentries from this leaf up through its parents 852 /* consume the dentries from this leaf up through its parents
865 * until we find one with children or run out altogether */ 853 * until we find one with children or run out altogether */
866 do { 854 do {
867 struct inode *inode; 855 struct inode *inode;
868 856
857 /* detach from the system */
858 dentry_lru_del(dentry);
859 __d_shrink(dentry);
860
869 if (dentry->d_count != 0) { 861 if (dentry->d_count != 0) {
870 printk(KERN_ERR 862 printk(KERN_ERR
871 "BUG: Dentry %p{i=%lx,n=%s}" 863 "BUG: Dentry %p{i=%lx,n=%s}"
@@ -886,14 +878,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
886 list_del(&dentry->d_u.d_child); 878 list_del(&dentry->d_u.d_child);
887 } else { 879 } else {
888 parent = dentry->d_parent; 880 parent = dentry->d_parent;
889 spin_lock(&parent->d_lock);
890 parent->d_count--; 881 parent->d_count--;
891 list_del(&dentry->d_u.d_child); 882 list_del(&dentry->d_u.d_child);
892 spin_unlock(&parent->d_lock);
893 } 883 }
894 884
895 detached++;
896
897 inode = dentry->d_inode; 885 inode = dentry->d_inode;
898 if (inode) { 886 if (inode) {
899 dentry->d_inode = NULL; 887 dentry->d_inode = NULL;
@@ -938,9 +926,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
938 926
939 dentry = sb->s_root; 927 dentry = sb->s_root;
940 sb->s_root = NULL; 928 sb->s_root = NULL;
941 spin_lock(&dentry->d_lock);
942 dentry->d_count--; 929 dentry->d_count--;
943 spin_unlock(&dentry->d_lock);
944 shrink_dcache_for_umount_subtree(dentry); 930 shrink_dcache_for_umount_subtree(dentry);
945 931
946 while (!hlist_bl_empty(&sb->s_anon)) { 932 while (!hlist_bl_empty(&sb->s_anon)) {
@@ -1743,7 +1729,7 @@ seqretry:
1743 */ 1729 */
1744 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1730 if (read_seqcount_retry(&dentry->d_seq, *seq))
1745 goto seqretry; 1731 goto seqretry;
1746 if (parent->d_flags & DCACHE_OP_COMPARE) { 1732 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1747 if (parent->d_op->d_compare(parent, *inode, 1733 if (parent->d_op->d_compare(parent, *inode,
1748 dentry, i, 1734 dentry, i,
1749 tlen, tname, name)) 1735 tlen, tname, name))
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 2d0f757fda3..c5a5855a6c4 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,8 @@
12# Kbuild - Gets included from the Kernels Makefile and build system 12# Kbuild - Gets included from the Kernels Makefile and build system
13# 13#
14 14
15exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o 15# ore module library
16obj-$(CONFIG_ORE) += ore.o
17
18exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
16obj-$(CONFIG_EXOFS_FS) += exofs.o 19obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index 86194b2f799..70bae414929 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,6 +1,10 @@
1config ORE
2 tristate
3
1config EXOFS_FS 4config EXOFS_FS
2 tristate "exofs: OSD based file system support" 5 tristate "exofs: OSD based file system support"
3 depends on SCSI_OSD_ULD 6 depends on SCSI_OSD_ULD
7 select ORE
4 help 8 help
5 EXOFS is a file system that uses an OSD storage device, 9 EXOFS is a file system that uses an OSD storage device,
6 as its backing storage. 10 as its backing storage.
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c965806c282..f4e442ec744 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -36,12 +36,9 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/backing-dev.h> 38#include <linux/backing-dev.h>
39#include "common.h" 39#include <scsi/osd_ore.h>
40 40
41/* FIXME: Remove once pnfs hits mainline 41#include "common.h"
42 * #include <linux/exportfs/pnfs_osd_xdr.h>
43 */
44#include "pnfs.h"
45 42
46#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 43#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
47 44
@@ -56,27 +53,11 @@
56/* u64 has problems with printk this will cast it to unsigned long long */ 53/* u64 has problems with printk this will cast it to unsigned long long */
57#define _LLU(x) (unsigned long long)(x) 54#define _LLU(x) (unsigned long long)(x)
58 55
59struct exofs_layout {
60 osd_id s_pid; /* partition ID of file system*/
61
62 /* Our way of looking at the data_map */
63 unsigned stripe_unit;
64 unsigned mirrors_p1;
65
66 unsigned group_width;
67 u64 group_depth;
68 unsigned group_count;
69
70 enum exofs_inode_layout_gen_functions lay_func;
71
72 unsigned s_numdevs; /* Num of devices in array */
73 struct osd_dev *s_ods[0]; /* Variable length */
74};
75
76/* 56/*
77 * our extension to the in-memory superblock 57 * our extension to the in-memory superblock
78 */ 58 */
79struct exofs_sb_info { 59struct exofs_sb_info {
60 struct backing_dev_info bdi; /* register our bdi with VFS */
80 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ 61 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
81 int s_timeout; /* timeout for OSD operations */ 62 int s_timeout; /* timeout for OSD operations */
82 uint64_t s_nextid; /* highest object ID used */ 63 uint64_t s_nextid; /* highest object ID used */
@@ -84,16 +65,13 @@ struct exofs_sb_info {
84 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 65 spinlock_t s_next_gen_lock; /* spinlock for gen # update */
85 u32 s_next_generation; /* next gen # to use */ 66 u32 s_next_generation; /* next gen # to use */
86 atomic_t s_curr_pending; /* number of pending commands */ 67 atomic_t s_curr_pending; /* number of pending commands */
87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
88 struct backing_dev_info bdi; /* register our bdi with VFS */
89 68
90 struct pnfs_osd_data_map data_map; /* Default raid to use 69 struct pnfs_osd_data_map data_map; /* Default raid to use
91 * FIXME: Needed ? 70 * FIXME: Needed ?
92 */ 71 */
93/* struct exofs_layout dir_layout;*/ /* Default dir layout */ 72 struct ore_layout layout; /* Default files layout */
94 struct exofs_layout layout; /* Default files layout, 73 struct ore_comp one_comp; /* id & cred of partition id=0*/
95 * contains the variable osd_dev 74 struct ore_components comps; /* comps for the partition */
96 * array. Keep last */
97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ 75 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
98}; 76};
99 77
@@ -107,7 +85,8 @@ struct exofs_i_info {
107 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ 85 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
108 uint32_t i_dir_start_lookup; /* which page to start lookup */ 86 uint32_t i_dir_start_lookup; /* which page to start lookup */
109 uint64_t i_commit_size; /* the object's written length */ 87 uint64_t i_commit_size; /* the object's written length */
110 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ 88 struct ore_comp one_comp; /* same component for all devices */
89 struct ore_components comps; /* inode view of the device table */
111}; 90};
112 91
113static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) 92static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
115 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; 94 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
116} 95}
117 96
118struct exofs_io_state;
119typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
120
121struct exofs_io_state {
122 struct kref kref;
123
124 void *private;
125 exofs_io_done_fn done;
126
127 struct exofs_layout *layout;
128 struct osd_obj_id obj;
129 u8 *cred;
130
131 /* Global read/write IO*/
132 loff_t offset;
133 unsigned long length;
134 void *kern_buff;
135
136 struct page **pages;
137 unsigned nr_pages;
138 unsigned pgbase;
139 unsigned pages_consumed;
140
141 /* Attributes */
142 unsigned in_attr_len;
143 struct osd_attr *in_attr;
144 unsigned out_attr_len;
145 struct osd_attr *out_attr;
146
147 /* Variable array of size numdevs */
148 unsigned numdevs;
149 struct exofs_per_dev_state {
150 struct osd_request *or;
151 struct bio *bio;
152 loff_t offset;
153 unsigned length;
154 unsigned dev;
155 } per_dev[];
156};
157
158static inline unsigned exofs_io_state_size(unsigned numdevs)
159{
160 return sizeof(struct exofs_io_state) +
161 sizeof(struct exofs_per_dev_state) * numdevs;
162}
163
164/* 97/*
165 * our inode flags 98 * our inode flags
166 */ 99 */
@@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
205} 138}
206 139
207/* 140/*
208 * Given a layout, object_number and stripe_index return the associated global
209 * dev_index
210 */
211unsigned exofs_layout_od_id(struct exofs_layout *layout,
212 osd_id obj_no, unsigned layout_index);
213/*
214 * Maximum count of links to a file 141 * Maximum count of links to a file
215 */ 142 */
216#define EXOFS_LINK_MAX 32000 143#define EXOFS_LINK_MAX 32000
@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout,
219 * function declarations * 146 * function declarations *
220 *************************/ 147 *************************/
221 148
222/* ios.c */
223void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
224 const struct osd_obj_id *obj);
225int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
226 u64 offset, void *p, unsigned length);
227
228int exofs_get_io_state(struct exofs_layout *layout,
229 struct exofs_io_state **ios);
230void exofs_put_io_state(struct exofs_io_state *ios);
231
232int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
233
234int exofs_sbi_create(struct exofs_io_state *ios);
235int exofs_sbi_remove(struct exofs_io_state *ios);
236int exofs_sbi_write(struct exofs_io_state *ios);
237int exofs_sbi_read(struct exofs_io_state *ios);
238
239int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
240
241int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
242static inline int exofs_oi_write(struct exofs_i_info *oi,
243 struct exofs_io_state *ios)
244{
245 ios->obj.id = exofs_oi_objno(oi);
246 ios->cred = oi->i_cred;
247 return exofs_sbi_write(ios);
248}
249
250static inline int exofs_oi_read(struct exofs_i_info *oi,
251 struct exofs_io_state *ios)
252{
253 ios->obj.id = exofs_oi_objno(oi);
254 ios->cred = oi->i_cred;
255 return exofs_sbi_read(ios);
256}
257
258/* inode.c */ 149/* inode.c */
259unsigned exofs_max_io_pages(struct exofs_layout *layout, 150unsigned exofs_max_io_pages(struct ore_layout *layout,
260 unsigned expected_pages); 151 unsigned expected_pages);
261int exofs_setattr(struct dentry *, struct iattr *); 152int exofs_setattr(struct dentry *, struct iattr *);
262int exofs_write_begin(struct file *file, struct address_space *mapping, 153int exofs_write_begin(struct file *file, struct address_space *mapping,
@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
281 struct inode *); 172 struct inode *);
282 173
283/* super.c */ 174/* super.c */
175void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
176 const struct osd_obj_id *obj);
284int exofs_sbi_write_stats(struct exofs_sb_info *sbi); 177int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
285 178
286/********************* 179/*********************
@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations;
295 188
296/* inode.c */ 189/* inode.c */
297extern const struct address_space_operations exofs_aops; 190extern const struct address_space_operations exofs_aops;
298extern const struct osd_attr g_attr_logical_length;
299 191
300/* namei.c */ 192/* namei.c */
301extern const struct inode_operations exofs_dir_inode_operations; 193extern const struct inode_operations exofs_dir_inode_operations;
@@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations;
305extern const struct inode_operations exofs_symlink_inode_operations; 197extern const struct inode_operations exofs_symlink_inode_operations;
306extern const struct inode_operations exofs_fast_symlink_inode_operations; 198extern const struct inode_operations exofs_fast_symlink_inode_operations;
307 199
200/* exofs_init_comps will initialize an ore_components device array
201 * pointing to a single ore_comp struct, and a round-robin view
202 * of the device table.
203 * The first device of each inode is the [inode->ino % num_devices]
204 * and the rest of the devices sequentially following where the
205 * first device is after the last device.
206 * It is assumed that the global device array at @sbi is twice
207 * bigger and that the device table repeats twice.
208 * See: exofs_read_lookup_dev_table()
209 */
210static inline void exofs_init_comps(struct ore_components *comps,
211 struct ore_comp *one_comp,
212 struct exofs_sb_info *sbi, osd_id oid)
213{
214 unsigned dev_mod = (unsigned)oid, first_dev;
215
216 one_comp->obj.partition = sbi->one_comp.obj.partition;
217 one_comp->obj.id = oid;
218 exofs_make_credential(one_comp->cred, &one_comp->obj);
219
220 comps->numdevs = sbi->comps.numdevs;
221 comps->single_comp = EC_SINGLE_COMP;
222 comps->comps = one_comp;
223
224 /* Round robin device view of the table */
225 first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs;
226 comps->ods = sbi->comps.ods + first_dev;
227}
228
308#endif 229#endif
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 8472c098445..f39a38fc234 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout, 46unsigned exofs_max_io_pages(struct ore_layout *layout,
47 unsigned expected_pages) 47 unsigned expected_pages)
48{ 48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); 49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
@@ -58,7 +58,7 @@ struct page_collect {
58 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
59 struct inode *inode; 59 struct inode *inode;
60 unsigned expected_pages; 60 unsigned expected_pages;
61 struct exofs_io_state *ios; 61 struct ore_io_state *ios;
62 62
63 struct page **pages; 63 struct page **pages;
64 unsigned alloc_pages; 64 unsigned alloc_pages;
@@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol)
110{ 110{
111 unsigned pages; 111 unsigned pages;
112 112
113 if (!pcol->ios) { /* First time allocate io_state */
114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
115
116 if (ret)
117 return ret;
118 }
119
120 /* TODO: easily support bio chaining */ 113 /* TODO: easily support bio chaining */
121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 114 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
122 115
@@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol)
140 pcol->pages = NULL; 133 pcol->pages = NULL;
141 134
142 if (pcol->ios) { 135 if (pcol->ios) {
143 exofs_put_io_state(pcol->ios); 136 ore_put_io_state(pcol->ios);
144 pcol->ios = NULL; 137 pcol->ios = NULL;
145 } 138 }
146} 139}
@@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol)
200 u64 resid; 193 u64 resid;
201 u64 good_bytes; 194 u64 good_bytes;
202 u64 length = 0; 195 u64 length = 0;
203 int ret = exofs_check_io(pcol->ios, &resid); 196 int ret = ore_check_io(pcol->ios, &resid);
204 197
205 if (likely(!ret)) 198 if (likely(!ret))
206 good_bytes = pcol->length; 199 good_bytes = pcol->length;
@@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol)
241} 234}
242 235
243/* callback of async reads */ 236/* callback of async reads */
244static void readpages_done(struct exofs_io_state *ios, void *p) 237static void readpages_done(struct ore_io_state *ios, void *p)
245{ 238{
246 struct page_collect *pcol = p; 239 struct page_collect *pcol = p;
247 240
@@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
269static int read_exec(struct page_collect *pcol) 262static int read_exec(struct page_collect *pcol)
270{ 263{
271 struct exofs_i_info *oi = exofs_i(pcol->inode); 264 struct exofs_i_info *oi = exofs_i(pcol->inode);
272 struct exofs_io_state *ios = pcol->ios; 265 struct ore_io_state *ios;
273 struct page_collect *pcol_copy = NULL; 266 struct page_collect *pcol_copy = NULL;
274 int ret; 267 int ret;
275 268
276 if (!pcol->pages) 269 if (!pcol->pages)
277 return 0; 270 return 0;
278 271
272 if (!pcol->ios) {
273 int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true,
274 pcol->pg_first << PAGE_CACHE_SHIFT,
275 pcol->length, &pcol->ios);
276
277 if (ret)
278 return ret;
279 }
280
281 ios = pcol->ios;
279 ios->pages = pcol->pages; 282 ios->pages = pcol->pages;
280 ios->nr_pages = pcol->nr_pages; 283 ios->nr_pages = pcol->nr_pages;
281 ios->length = pcol->length;
282 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
283 284
284 if (pcol->read_4_write) { 285 if (pcol->read_4_write) {
285 exofs_oi_read(oi, pcol->ios); 286 ore_read(pcol->ios);
286 return __readpages_done(pcol); 287 return __readpages_done(pcol);
287 } 288 }
288 289
@@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol)
295 *pcol_copy = *pcol; 296 *pcol_copy = *pcol;
296 ios->done = readpages_done; 297 ios->done = readpages_done;
297 ios->private = pcol_copy; 298 ios->private = pcol_copy;
298 ret = exofs_oi_read(oi, ios); 299 ret = ore_read(ios);
299 if (unlikely(ret)) 300 if (unlikely(ret))
300 goto err; 301 goto err;
301 302
302 atomic_inc(&pcol->sbi->s_curr_pending); 303 atomic_inc(&pcol->sbi->s_curr_pending);
303 304
304 EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 305 EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
305 ios->obj.id, _LLU(ios->offset), pcol->length); 306 oi->one_comp.obj.id, _LLU(ios->offset), pcol->length);
306 307
307 /* pages ownership was passed to pcol_copy */ 308 /* pages ownership was passed to pcol_copy */
308 _pcol_reset(pcol); 309 _pcol_reset(pcol);
@@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page)
457} 458}
458 459
459/* Callback for osd_write. All writes are asynchronous */ 460/* Callback for osd_write. All writes are asynchronous */
460static void writepages_done(struct exofs_io_state *ios, void *p) 461static void writepages_done(struct ore_io_state *ios, void *p)
461{ 462{
462 struct page_collect *pcol = p; 463 struct page_collect *pcol = p;
463 int i; 464 int i;
464 u64 resid; 465 u64 resid;
465 u64 good_bytes; 466 u64 good_bytes;
466 u64 length = 0; 467 u64 length = 0;
467 int ret = exofs_check_io(ios, &resid); 468 int ret = ore_check_io(ios, &resid);
468 469
469 atomic_dec(&pcol->sbi->s_curr_pending); 470 atomic_dec(&pcol->sbi->s_curr_pending);
470 471
@@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
507static int write_exec(struct page_collect *pcol) 508static int write_exec(struct page_collect *pcol)
508{ 509{
509 struct exofs_i_info *oi = exofs_i(pcol->inode); 510 struct exofs_i_info *oi = exofs_i(pcol->inode);
510 struct exofs_io_state *ios = pcol->ios; 511 struct ore_io_state *ios;
511 struct page_collect *pcol_copy = NULL; 512 struct page_collect *pcol_copy = NULL;
512 int ret; 513 int ret;
513 514
514 if (!pcol->pages) 515 if (!pcol->pages)
515 return 0; 516 return 0;
516 517
518 BUG_ON(pcol->ios);
519 ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false,
520 pcol->pg_first << PAGE_CACHE_SHIFT,
521 pcol->length, &pcol->ios);
522
523 if (unlikely(ret))
524 goto err;
525
517 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 526 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
518 if (!pcol_copy) { 527 if (!pcol_copy) {
519 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); 528 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
@@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol)
523 532
524 *pcol_copy = *pcol; 533 *pcol_copy = *pcol;
525 534
535 ios = pcol->ios;
526 ios->pages = pcol_copy->pages; 536 ios->pages = pcol_copy->pages;
527 ios->nr_pages = pcol_copy->nr_pages; 537 ios->nr_pages = pcol_copy->nr_pages;
528 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
529 ios->length = pcol_copy->length;
530 ios->done = writepages_done; 538 ios->done = writepages_done;
531 ios->private = pcol_copy; 539 ios->private = pcol_copy;
532 540
533 ret = exofs_oi_write(oi, ios); 541 ret = ore_write(ios);
534 if (unlikely(ret)) { 542 if (unlikely(ret)) {
535 EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); 543 EXOFS_ERR("write_exec: ore_write() Failed\n");
536 goto err; 544 goto err;
537 } 545 }
538 546
@@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
844 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 852 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
845} 853}
846 854
847const struct osd_attr g_attr_logical_length = ATTR_DEF(
848 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
849
850static int _do_truncate(struct inode *inode, loff_t newsize) 855static int _do_truncate(struct inode *inode, loff_t newsize)
851{ 856{
852 struct exofs_i_info *oi = exofs_i(inode); 857 struct exofs_i_info *oi = exofs_i(inode);
858 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
853 int ret; 859 int ret;
854 860
855 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 861 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
856 862
857 ret = exofs_oi_truncate(oi, (u64)newsize); 863 ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize);
858 if (likely(!ret)) 864 if (likely(!ret))
859 truncate_setsize(inode, newsize); 865 truncate_setsize(inode, newsize);
860 866
@@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
917 [1] = g_attr_inode_file_layout, 923 [1] = g_attr_inode_file_layout,
918 [2] = g_attr_inode_dir_layout, 924 [2] = g_attr_inode_dir_layout,
919 }; 925 };
920 struct exofs_io_state *ios; 926 struct ore_io_state *ios;
921 struct exofs_on_disk_inode_layout *layout; 927 struct exofs_on_disk_inode_layout *layout;
922 int ret; 928 int ret;
923 929
924 ret = exofs_get_io_state(&sbi->layout, &ios); 930 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
925 if (unlikely(ret)) { 931 if (unlikely(ret)) {
926 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 932 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
927 return ret; 933 return ret;
928 } 934 }
929 935
930 ios->obj.id = exofs_oi_objno(oi); 936 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
931 exofs_make_credential(oi->i_cred, &ios->obj); 937 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
932 ios->cred = oi->i_cred;
933
934 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
935 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
936 938
937 ios->in_attr = attrs; 939 ios->in_attr = attrs;
938 ios->in_attr_len = ARRAY_SIZE(attrs); 940 ios->in_attr_len = ARRAY_SIZE(attrs);
939 941
940 ret = exofs_sbi_read(ios); 942 ret = ore_read(ios);
941 if (unlikely(ret)) { 943 if (unlikely(ret)) {
942 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 944 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
943 _LLU(ios->obj.id), ret); 945 _LLU(oi->one_comp.obj.id), ret);
944 memset(inode, 0, sizeof(*inode)); 946 memset(inode, 0, sizeof(*inode));
945 inode->i_mode = 0040000 | (0777 & ~022); 947 inode->i_mode = 0040000 | (0777 & ~022);
946 /* If object is lost on target we might as well enable it's 948 /* If object is lost on target we might as well enable it's
@@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
990 } 992 }
991 993
992out: 994out:
993 exofs_put_io_state(ios); 995 ore_put_io_state(ios);
994 return ret; 996 return ret;
995} 997}
996 998
@@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1016 return inode; 1018 return inode;
1017 oi = exofs_i(inode); 1019 oi = exofs_i(inode);
1018 __oi_init(oi); 1020 __oi_init(oi);
1021 exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
1022 exofs_oi_objno(oi));
1019 1023
1020 /* read the inode from the osd */ 1024 /* read the inode from the osd */
1021 ret = exofs_get_inode(sb, oi, &fcb); 1025 ret = exofs_get_inode(sb, oi, &fcb);
@@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1107 * set the obj_created flag so that other methods know that the object exists on 1111 * set the obj_created flag so that other methods know that the object exists on
1108 * the OSD. 1112 * the OSD.
1109 */ 1113 */
1110static void create_done(struct exofs_io_state *ios, void *p) 1114static void create_done(struct ore_io_state *ios, void *p)
1111{ 1115{
1112 struct inode *inode = p; 1116 struct inode *inode = p;
1113 struct exofs_i_info *oi = exofs_i(inode); 1117 struct exofs_i_info *oi = exofs_i(inode);
1114 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1118 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1115 int ret; 1119 int ret;
1116 1120
1117 ret = exofs_check_io(ios, NULL); 1121 ret = ore_check_io(ios, NULL);
1118 exofs_put_io_state(ios); 1122 ore_put_io_state(ios);
1119 1123
1120 atomic_dec(&sbi->s_curr_pending); 1124 atomic_dec(&sbi->s_curr_pending);
1121 1125
1122 if (unlikely(ret)) { 1126 if (unlikely(ret)) {
1123 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1127 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
1124 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); 1128 _LLU(exofs_oi_objno(oi)),
1129 _LLU(oi->one_comp.obj.partition));
1125 /*TODO: When FS is corrupted creation can fail, object already 1130 /*TODO: When FS is corrupted creation can fail, object already
1126 * exist. Get rid of this asynchronous creation, if exist 1131 * exist. Get rid of this asynchronous creation, if exist
1127 * increment the obj counter and try the next object. Until we 1132 * increment the obj counter and try the next object. Until we
@@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p)
1140 */ 1145 */
1141struct inode *exofs_new_inode(struct inode *dir, int mode) 1146struct inode *exofs_new_inode(struct inode *dir, int mode)
1142{ 1147{
1143 struct super_block *sb; 1148 struct super_block *sb = dir->i_sb;
1149 struct exofs_sb_info *sbi = sb->s_fs_info;
1144 struct inode *inode; 1150 struct inode *inode;
1145 struct exofs_i_info *oi; 1151 struct exofs_i_info *oi;
1146 struct exofs_sb_info *sbi; 1152 struct ore_io_state *ios;
1147 struct exofs_io_state *ios;
1148 int ret; 1153 int ret;
1149 1154
1150 sb = dir->i_sb;
1151 inode = new_inode(sb); 1155 inode = new_inode(sb);
1152 if (!inode) 1156 if (!inode)
1153 return ERR_PTR(-ENOMEM); 1157 return ERR_PTR(-ENOMEM);
@@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1157 1161
1158 set_obj_2bcreated(oi); 1162 set_obj_2bcreated(oi);
1159 1163
1160 sbi = sb->s_fs_info;
1161
1162 inode->i_mapping->backing_dev_info = sb->s_bdi; 1164 inode->i_mapping->backing_dev_info = sb->s_bdi;
1163 inode_init_owner(inode, dir, mode); 1165 inode_init_owner(inode, dir, mode);
1164 inode->i_ino = sbi->s_nextid++; 1166 inode->i_ino = sbi->s_nextid++;
@@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1170 spin_unlock(&sbi->s_next_gen_lock); 1172 spin_unlock(&sbi->s_next_gen_lock);
1171 insert_inode_hash(inode); 1173 insert_inode_hash(inode);
1172 1174
1175 exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
1176 exofs_oi_objno(oi));
1173 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1177 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1174 1178
1175 mark_inode_dirty(inode); 1179 mark_inode_dirty(inode);
1176 1180
1177 ret = exofs_get_io_state(&sbi->layout, &ios); 1181 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
1178 if (unlikely(ret)) { 1182 if (unlikely(ret)) {
1179 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); 1183 EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n");
1180 return ERR_PTR(ret); 1184 return ERR_PTR(ret);
1181 } 1185 }
1182 1186
1183 ios->obj.id = exofs_oi_objno(oi);
1184 exofs_make_credential(oi->i_cred, &ios->obj);
1185
1186 ios->done = create_done; 1187 ios->done = create_done;
1187 ios->private = inode; 1188 ios->private = inode;
1188 ios->cred = oi->i_cred; 1189
1189 ret = exofs_sbi_create(ios); 1190 ret = ore_create(ios);
1190 if (ret) { 1191 if (ret) {
1191 exofs_put_io_state(ios); 1192 ore_put_io_state(ios);
1192 return ERR_PTR(ret); 1193 return ERR_PTR(ret);
1193 } 1194 }
1194 atomic_inc(&sbi->s_curr_pending); 1195 atomic_inc(&sbi->s_curr_pending);
@@ -1207,11 +1208,11 @@ struct updatei_args {
1207/* 1208/*
1208 * Callback function from exofs_update_inode(). 1209 * Callback function from exofs_update_inode().
1209 */ 1210 */
1210static void updatei_done(struct exofs_io_state *ios, void *p) 1211static void updatei_done(struct ore_io_state *ios, void *p)
1211{ 1212{
1212 struct updatei_args *args = p; 1213 struct updatei_args *args = p;
1213 1214
1214 exofs_put_io_state(ios); 1215 ore_put_io_state(ios);
1215 1216
1216 atomic_dec(&args->sbi->s_curr_pending); 1217 atomic_dec(&args->sbi->s_curr_pending);
1217 1218
@@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1227 struct exofs_i_info *oi = exofs_i(inode); 1228 struct exofs_i_info *oi = exofs_i(inode);
1228 struct super_block *sb = inode->i_sb; 1229 struct super_block *sb = inode->i_sb;
1229 struct exofs_sb_info *sbi = sb->s_fs_info; 1230 struct exofs_sb_info *sbi = sb->s_fs_info;
1230 struct exofs_io_state *ios; 1231 struct ore_io_state *ios;
1231 struct osd_attr attr; 1232 struct osd_attr attr;
1232 struct exofs_fcb *fcb; 1233 struct exofs_fcb *fcb;
1233 struct updatei_args *args; 1234 struct updatei_args *args;
@@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1266 } else 1267 } else
1267 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1268 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1268 1269
1269 ret = exofs_get_io_state(&sbi->layout, &ios); 1270 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
1270 if (unlikely(ret)) { 1271 if (unlikely(ret)) {
1271 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 1272 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
1272 goto free_args; 1273 goto free_args;
1273 } 1274 }
1274 1275
@@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1285 ios->private = args; 1286 ios->private = args;
1286 } 1287 }
1287 1288
1288 ret = exofs_oi_write(oi, ios); 1289 ret = ore_write(ios);
1289 if (!do_sync && !ret) { 1290 if (!do_sync && !ret) {
1290 atomic_inc(&sbi->s_curr_pending); 1291 atomic_inc(&sbi->s_curr_pending);
1291 goto out; /* deallocation in updatei_done */ 1292 goto out; /* deallocation in updatei_done */
1292 } 1293 }
1293 1294
1294 exofs_put_io_state(ios); 1295 ore_put_io_state(ios);
1295free_args: 1296free_args:
1296 kfree(args); 1297 kfree(args);
1297out: 1298out:
@@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1310 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1311 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1311 * do. 1312 * do.
1312 */ 1313 */
1313static void delete_done(struct exofs_io_state *ios, void *p) 1314static void delete_done(struct ore_io_state *ios, void *p)
1314{ 1315{
1315 struct exofs_sb_info *sbi = p; 1316 struct exofs_sb_info *sbi = p;
1316 1317
1317 exofs_put_io_state(ios); 1318 ore_put_io_state(ios);
1318 1319
1319 atomic_dec(&sbi->s_curr_pending); 1320 atomic_dec(&sbi->s_curr_pending);
1320} 1321}
@@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode)
1329 struct exofs_i_info *oi = exofs_i(inode); 1330 struct exofs_i_info *oi = exofs_i(inode);
1330 struct super_block *sb = inode->i_sb; 1331 struct super_block *sb = inode->i_sb;
1331 struct exofs_sb_info *sbi = sb->s_fs_info; 1332 struct exofs_sb_info *sbi = sb->s_fs_info;
1332 struct exofs_io_state *ios; 1333 struct ore_io_state *ios;
1333 int ret; 1334 int ret;
1334 1335
1335 truncate_inode_pages(&inode->i_data, 0); 1336 truncate_inode_pages(&inode->i_data, 0);
@@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode)
1349 /* ignore the error, attempt a remove anyway */ 1350 /* ignore the error, attempt a remove anyway */
1350 1351
1351 /* Now Remove the OSD objects */ 1352 /* Now Remove the OSD objects */
1352 ret = exofs_get_io_state(&sbi->layout, &ios); 1353 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
1353 if (unlikely(ret)) { 1354 if (unlikely(ret)) {
1354 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1355 EXOFS_ERR("%s: ore_get_io_state failed\n", __func__);
1355 return; 1356 return;
1356 } 1357 }
1357 1358
1358 ios->obj.id = exofs_oi_objno(oi);
1359 ios->done = delete_done; 1359 ios->done = delete_done;
1360 ios->private = sbi; 1360 ios->private = sbi;
1361 ios->cred = oi->i_cred; 1361
1362 ret = exofs_sbi_remove(ios); 1362 ret = ore_remove(ios);
1363 if (ret) { 1363 if (ret) {
1364 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); 1364 EXOFS_ERR("%s: ore_remove failed\n", __func__);
1365 exofs_put_io_state(ios); 1365 ore_put_io_state(ios);
1366 return; 1366 return;
1367 } 1367 }
1368 atomic_inc(&sbi->s_curr_pending); 1368 atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ore.c
index f74a2ec027a..25305af8819 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ore.c
@@ -23,81 +23,87 @@
23 */ 23 */
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <scsi/scsi_device.h>
27#include <asm/div64.h> 26#include <asm/div64.h>
28 27
29#include "exofs.h" 28#include <scsi/osd_ore.h>
30 29
31#define EXOFS_DBGMSG2(M...) do {} while (0) 30#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a)
32/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
33 31
34void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 32#ifdef CONFIG_EXOFS_DEBUG
35{ 33#define ORE_DBGMSG(fmt, a...) \
36 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 34 printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a)
37} 35#else
36#define ORE_DBGMSG(fmt, a...) \
37 do { if (0) printk(fmt, ##a); } while (0)
38#endif
38 39
39int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 40/* u64 has problems with printk this will cast it to unsigned long long */
40 u64 offset, void *p, unsigned length) 41#define _LLU(x) (unsigned long long)(x)
41{
42 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
43/* struct osd_sense_info osi = {.key = 0};*/
44 int ret;
45 42
46 if (unlikely(!or)) { 43#define ORE_DBGMSG2(M...) do {} while (0)
47 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); 44/* #define ORE_DBGMSG2 ORE_DBGMSG */
48 return -ENOMEM;
49 }
50 ret = osd_req_read_kern(or, obj, offset, p, length);
51 if (unlikely(ret)) {
52 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
53 goto out;
54 }
55 45
56 ret = osd_finalize_request(or, 0, cred, NULL); 46MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
57 if (unlikely(ret)) { 47MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
58 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); 48MODULE_LICENSE("GPL");
59 goto out;
60 }
61 49
62 ret = osd_execute_request(or); 50static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
63 if (unlikely(ret)) 51{
64 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 52 return ios->comps->comps[index & ios->comps->single_comp].cred;
65 /* osd_req_decode_sense(or, ret); */ 53}
66 54
67out: 55static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
68 osd_end_request(or); 56{
69 return ret; 57 return &ios->comps->comps[index & ios->comps->single_comp].obj;
70} 58}
71 59
72int exofs_get_io_state(struct exofs_layout *layout, 60static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
73 struct exofs_io_state **pios)
74{ 61{
75 struct exofs_io_state *ios; 62 return ios->comps->ods[index];
63}
64
65int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
66 bool is_reading, u64 offset, u64 length,
67 struct ore_io_state **pios)
68{
69 struct ore_io_state *ios;
76 70
77 /*TODO: Maybe use kmem_cach per sbi of size 71 /*TODO: Maybe use kmem_cach per sbi of size
78 * exofs_io_state_size(layout->s_numdevs) 72 * exofs_io_state_size(layout->s_numdevs)
79 */ 73 */
80 ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); 74 ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL);
81 if (unlikely(!ios)) { 75 if (unlikely(!ios)) {
82 EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", 76 ORE_DBGMSG("Failed kzalloc bytes=%d\n",
83 exofs_io_state_size(layout->s_numdevs)); 77 ore_io_state_size(comps->numdevs));
84 *pios = NULL; 78 *pios = NULL;
85 return -ENOMEM; 79 return -ENOMEM;
86 } 80 }
87 81
88 ios->layout = layout; 82 ios->layout = layout;
89 ios->obj.partition = layout->s_pid; 83 ios->comps = comps;
84 ios->offset = offset;
85 ios->length = length;
86 ios->reading = is_reading;
87
90 *pios = ios; 88 *pios = ios;
91 return 0; 89 return 0;
92} 90}
91EXPORT_SYMBOL(ore_get_rw_state);
92
93int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
94 struct ore_io_state **ios)
95{
96 return ore_get_rw_state(layout, comps, true, 0, 0, ios);
97}
98EXPORT_SYMBOL(ore_get_io_state);
93 99
94void exofs_put_io_state(struct exofs_io_state *ios) 100void ore_put_io_state(struct ore_io_state *ios)
95{ 101{
96 if (ios) { 102 if (ios) {
97 unsigned i; 103 unsigned i;
98 104
99 for (i = 0; i < ios->numdevs; i++) { 105 for (i = 0; i < ios->numdevs; i++) {
100 struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; 106 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
101 107
102 if (per_dev->or) 108 if (per_dev->or)
103 osd_end_request(per_dev->or); 109 osd_end_request(per_dev->or);
@@ -108,31 +114,9 @@ void exofs_put_io_state(struct exofs_io_state *ios)
108 kfree(ios); 114 kfree(ios);
109 } 115 }
110} 116}
117EXPORT_SYMBOL(ore_put_io_state);
111 118
112unsigned exofs_layout_od_id(struct exofs_layout *layout, 119static void _sync_done(struct ore_io_state *ios, void *p)
113 osd_id obj_no, unsigned layout_index)
114{
115/* switch (layout->lay_func) {
116 case LAYOUT_MOVING_WINDOW:
117 {*/
118 unsigned dev_mod = obj_no;
119
120 return (layout_index + dev_mod * layout->mirrors_p1) %
121 layout->s_numdevs;
122/* }
123 case LAYOUT_FUNC_IMPLICT:
124 return layout->devs[layout_index];
125 }*/
126}
127
128static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
129 unsigned layout_index)
130{
131 return ios->layout->s_ods[
132 exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
133}
134
135static void _sync_done(struct exofs_io_state *ios, void *p)
136{ 120{
137 struct completion *waiting = p; 121 struct completion *waiting = p;
138 122
@@ -141,20 +125,20 @@ static void _sync_done(struct exofs_io_state *ios, void *p)
141 125
142static void _last_io(struct kref *kref) 126static void _last_io(struct kref *kref)
143{ 127{
144 struct exofs_io_state *ios = container_of( 128 struct ore_io_state *ios = container_of(
145 kref, struct exofs_io_state, kref); 129 kref, struct ore_io_state, kref);
146 130
147 ios->done(ios, ios->private); 131 ios->done(ios, ios->private);
148} 132}
149 133
150static void _done_io(struct osd_request *or, void *p) 134static void _done_io(struct osd_request *or, void *p)
151{ 135{
152 struct exofs_io_state *ios = p; 136 struct ore_io_state *ios = p;
153 137
154 kref_put(&ios->kref, _last_io); 138 kref_put(&ios->kref, _last_io);
155} 139}
156 140
157static int exofs_io_execute(struct exofs_io_state *ios) 141static int ore_io_execute(struct ore_io_state *ios)
158{ 142{
159 DECLARE_COMPLETION_ONSTACK(wait); 143 DECLARE_COMPLETION_ONSTACK(wait);
160 bool sync = (ios->done == NULL); 144 bool sync = (ios->done == NULL);
@@ -170,9 +154,9 @@ static int exofs_io_execute(struct exofs_io_state *ios)
170 if (unlikely(!or)) 154 if (unlikely(!or))
171 continue; 155 continue;
172 156
173 ret = osd_finalize_request(or, 0, ios->cred, NULL); 157 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
174 if (unlikely(ret)) { 158 if (unlikely(ret)) {
175 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", 159 ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
176 ret); 160 ret);
177 return ret; 161 return ret;
178 } 162 }
@@ -194,7 +178,7 @@ static int exofs_io_execute(struct exofs_io_state *ios)
194 178
195 if (sync) { 179 if (sync) {
196 wait_for_completion(&wait); 180 wait_for_completion(&wait);
197 ret = exofs_check_io(ios, NULL); 181 ret = ore_check_io(ios, NULL);
198 } 182 }
199 return ret; 183 return ret;
200} 184}
@@ -214,7 +198,7 @@ static void _clear_bio(struct bio *bio)
214 } 198 }
215} 199}
216 200
217int exofs_check_io(struct exofs_io_state *ios, u64 *resid) 201int ore_check_io(struct ore_io_state *ios, u64 *resid)
218{ 202{
219 enum osd_err_priority acumulated_osd_err = 0; 203 enum osd_err_priority acumulated_osd_err = 0;
220 int acumulated_lin_err = 0; 204 int acumulated_lin_err = 0;
@@ -235,7 +219,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
235 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { 219 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
236 /* start read offset passed endof file */ 220 /* start read offset passed endof file */
237 _clear_bio(ios->per_dev[i].bio); 221 _clear_bio(ios->per_dev[i].bio);
238 EXOFS_DBGMSG("start read offset passed end of file " 222 ORE_DBGMSG("start read offset passed end of file "
239 "offset=0x%llx, length=0x%llx\n", 223 "offset=0x%llx, length=0x%llx\n",
240 _LLU(ios->per_dev[i].offset), 224 _LLU(ios->per_dev[i].offset),
241 _LLU(ios->per_dev[i].length)); 225 _LLU(ios->per_dev[i].length));
@@ -259,6 +243,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
259 243
260 return acumulated_lin_err; 244 return acumulated_lin_err;
261} 245}
246EXPORT_SYMBOL(ore_check_io);
262 247
263/* 248/*
264 * L - logical offset into the file 249 * L - logical offset into the file
@@ -305,20 +290,21 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
305struct _striping_info { 290struct _striping_info {
306 u64 obj_offset; 291 u64 obj_offset;
307 u64 group_length; 292 u64 group_length;
293 u64 M; /* for truncate */
308 unsigned dev; 294 unsigned dev;
309 unsigned unit_off; 295 unsigned unit_off;
310}; 296};
311 297
312static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, 298static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset,
313 struct _striping_info *si) 299 struct _striping_info *si)
314{ 300{
315 u32 stripe_unit = ios->layout->stripe_unit; 301 u32 stripe_unit = layout->stripe_unit;
316 u32 group_width = ios->layout->group_width; 302 u32 group_width = layout->group_width;
317 u64 group_depth = ios->layout->group_depth; 303 u64 group_depth = layout->group_depth;
318 304
319 u32 U = stripe_unit * group_width; 305 u32 U = stripe_unit * group_width;
320 u64 T = U * group_depth; 306 u64 T = U * group_depth;
321 u64 S = T * ios->layout->group_count; 307 u64 S = T * layout->group_count;
322 u64 M = div64_u64(file_offset, S); 308 u64 M = div64_u64(file_offset, S);
323 309
324 /* 310 /*
@@ -333,7 +319,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
333 319
334 /* "H - (N * U)" is just "H % U" so it's bound to u32 */ 320 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
335 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; 321 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
336 si->dev *= ios->layout->mirrors_p1; 322 si->dev *= layout->mirrors_p1;
337 323
338 div_u64_rem(file_offset, stripe_unit, &si->unit_off); 324 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
339 325
@@ -341,15 +327,16 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
341 (M * group_depth * stripe_unit); 327 (M * group_depth * stripe_unit);
342 328
343 si->group_length = T - H; 329 si->group_length = T - H;
330 si->M = M;
344} 331}
345 332
346static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, 333static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
347 unsigned pgbase, struct exofs_per_dev_state *per_dev, 334 unsigned pgbase, struct ore_per_dev_state *per_dev,
348 int cur_len) 335 int cur_len)
349{ 336{
350 unsigned pg = *cur_pg; 337 unsigned pg = *cur_pg;
351 struct request_queue *q = 338 struct request_queue *q =
352 osd_request_queue(exofs_ios_od(ios, per_dev->dev)); 339 osd_request_queue(_ios_od(ios, per_dev->dev));
353 340
354 per_dev->length += cur_len; 341 per_dev->length += cur_len;
355 342
@@ -361,7 +348,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
361 348
362 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); 349 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
363 if (unlikely(!per_dev->bio)) { 350 if (unlikely(!per_dev->bio)) {
364 EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", 351 ORE_DBGMSG("Failed to allocate BIO size=%u\n",
365 bio_size); 352 bio_size);
366 return -ENOMEM; 353 return -ENOMEM;
367 } 354 }
@@ -387,7 +374,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
387 return 0; 374 return 0;
388} 375}
389 376
390static int _prepare_one_group(struct exofs_io_state *ios, u64 length, 377static int _prepare_one_group(struct ore_io_state *ios, u64 length,
391 struct _striping_info *si) 378 struct _striping_info *si)
392{ 379{
393 unsigned stripe_unit = ios->layout->stripe_unit; 380 unsigned stripe_unit = ios->layout->stripe_unit;
@@ -400,7 +387,7 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
400 int ret = 0; 387 int ret = 0;
401 388
402 while (length) { 389 while (length) {
403 struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; 390 struct ore_per_dev_state *per_dev = &ios->per_dev[dev];
404 unsigned cur_len, page_off = 0; 391 unsigned cur_len, page_off = 0;
405 392
406 if (!per_dev->length) { 393 if (!per_dev->length) {
@@ -443,7 +430,7 @@ out:
443 return ret; 430 return ret;
444} 431}
445 432
446static int _prepare_for_striping(struct exofs_io_state *ios) 433static int _prepare_for_striping(struct ore_io_state *ios)
447{ 434{
448 u64 length = ios->length; 435 u64 length = ios->length;
449 u64 offset = ios->offset; 436 u64 offset = ios->offset;
@@ -452,9 +439,9 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
452 439
453 if (!ios->pages) { 440 if (!ios->pages) {
454 if (ios->kern_buff) { 441 if (ios->kern_buff) {
455 struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; 442 struct ore_per_dev_state *per_dev = &ios->per_dev[0];
456 443
457 _calc_stripe_info(ios, ios->offset, &si); 444 _calc_stripe_info(ios->layout, ios->offset, &si);
458 per_dev->offset = si.obj_offset; 445 per_dev->offset = si.obj_offset;
459 per_dev->dev = si.dev; 446 per_dev->dev = si.dev;
460 447
@@ -468,7 +455,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
468 } 455 }
469 456
470 while (length) { 457 while (length) {
471 _calc_stripe_info(ios, offset, &si); 458 _calc_stripe_info(ios->layout, offset, &si);
472 459
473 if (length < si.group_length) 460 if (length < si.group_length)
474 si.group_length = length; 461 si.group_length = length;
@@ -485,57 +472,59 @@ out:
485 return ret; 472 return ret;
486} 473}
487 474
488int exofs_sbi_create(struct exofs_io_state *ios) 475int ore_create(struct ore_io_state *ios)
489{ 476{
490 int i, ret; 477 int i, ret;
491 478
492 for (i = 0; i < ios->layout->s_numdevs; i++) { 479 for (i = 0; i < ios->comps->numdevs; i++) {
493 struct osd_request *or; 480 struct osd_request *or;
494 481
495 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); 482 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
496 if (unlikely(!or)) { 483 if (unlikely(!or)) {
497 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 484 ORE_ERR("%s: osd_start_request failed\n", __func__);
498 ret = -ENOMEM; 485 ret = -ENOMEM;
499 goto out; 486 goto out;
500 } 487 }
501 ios->per_dev[i].or = or; 488 ios->per_dev[i].or = or;
502 ios->numdevs++; 489 ios->numdevs++;
503 490
504 osd_req_create_object(or, &ios->obj); 491 osd_req_create_object(or, _ios_obj(ios, i));
505 } 492 }
506 ret = exofs_io_execute(ios); 493 ret = ore_io_execute(ios);
507 494
508out: 495out:
509 return ret; 496 return ret;
510} 497}
498EXPORT_SYMBOL(ore_create);
511 499
512int exofs_sbi_remove(struct exofs_io_state *ios) 500int ore_remove(struct ore_io_state *ios)
513{ 501{
514 int i, ret; 502 int i, ret;
515 503
516 for (i = 0; i < ios->layout->s_numdevs; i++) { 504 for (i = 0; i < ios->comps->numdevs; i++) {
517 struct osd_request *or; 505 struct osd_request *or;
518 506
519 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); 507 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
520 if (unlikely(!or)) { 508 if (unlikely(!or)) {
521 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 509 ORE_ERR("%s: osd_start_request failed\n", __func__);
522 ret = -ENOMEM; 510 ret = -ENOMEM;
523 goto out; 511 goto out;
524 } 512 }
525 ios->per_dev[i].or = or; 513 ios->per_dev[i].or = or;
526 ios->numdevs++; 514 ios->numdevs++;
527 515
528 osd_req_remove_object(or, &ios->obj); 516 osd_req_remove_object(or, _ios_obj(ios, i));
529 } 517 }
530 ret = exofs_io_execute(ios); 518 ret = ore_io_execute(ios);
531 519
532out: 520out:
533 return ret; 521 return ret;
534} 522}
523EXPORT_SYMBOL(ore_remove);
535 524
536static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) 525static int _write_mirror(struct ore_io_state *ios, int cur_comp)
537{ 526{
538 struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; 527 struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
539 unsigned dev = ios->per_dev[cur_comp].dev; 528 unsigned dev = ios->per_dev[cur_comp].dev;
540 unsigned last_comp = cur_comp + ios->layout->mirrors_p1; 529 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
541 int ret = 0; 530 int ret = 0;
@@ -544,12 +533,12 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
544 return 0; /* Just an empty slot */ 533 return 0; /* Just an empty slot */
545 534
546 for (; cur_comp < last_comp; ++cur_comp, ++dev) { 535 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
547 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 536 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
548 struct osd_request *or; 537 struct osd_request *or;
549 538
550 or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); 539 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
551 if (unlikely(!or)) { 540 if (unlikely(!or)) {
552 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 541 ORE_ERR("%s: osd_start_request failed\n", __func__);
553 ret = -ENOMEM; 542 ret = -ENOMEM;
554 goto out; 543 goto out;
555 } 544 }
@@ -563,7 +552,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
563 bio = bio_kmalloc(GFP_KERNEL, 552 bio = bio_kmalloc(GFP_KERNEL,
564 master_dev->bio->bi_max_vecs); 553 master_dev->bio->bi_max_vecs);
565 if (unlikely(!bio)) { 554 if (unlikely(!bio)) {
566 EXOFS_DBGMSG( 555 ORE_DBGMSG(
567 "Failed to allocate BIO size=%u\n", 556 "Failed to allocate BIO size=%u\n",
568 master_dev->bio->bi_max_vecs); 557 master_dev->bio->bi_max_vecs);
569 ret = -ENOMEM; 558 ret = -ENOMEM;
@@ -582,25 +571,29 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
582 bio->bi_rw |= REQ_WRITE; 571 bio->bi_rw |= REQ_WRITE;
583 } 572 }
584 573
585 osd_req_write(or, &ios->obj, per_dev->offset, bio, 574 osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
586 per_dev->length); 575 bio, per_dev->length);
587 EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " 576 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
588 "length=0x%llx dev=%d\n", 577 "length=0x%llx dev=%d\n",
589 _LLU(ios->obj.id), _LLU(per_dev->offset), 578 _LLU(_ios_obj(ios, dev)->id),
579 _LLU(per_dev->offset),
590 _LLU(per_dev->length), dev); 580 _LLU(per_dev->length), dev);
591 } else if (ios->kern_buff) { 581 } else if (ios->kern_buff) {
592 ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, 582 ret = osd_req_write_kern(or, _ios_obj(ios, dev),
593 ios->kern_buff, ios->length); 583 per_dev->offset,
584 ios->kern_buff, ios->length);
594 if (unlikely(ret)) 585 if (unlikely(ret))
595 goto out; 586 goto out;
596 EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " 587 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
597 "length=0x%llx dev=%d\n", 588 "length=0x%llx dev=%d\n",
598 _LLU(ios->obj.id), _LLU(per_dev->offset), 589 _LLU(_ios_obj(ios, dev)->id),
590 _LLU(per_dev->offset),
599 _LLU(ios->length), dev); 591 _LLU(ios->length), dev);
600 } else { 592 } else {
601 osd_req_set_attributes(or, &ios->obj); 593 osd_req_set_attributes(or, _ios_obj(ios, dev));
602 EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", 594 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
603 _LLU(ios->obj.id), ios->out_attr_len, dev); 595 _LLU(_ios_obj(ios, dev)->id),
596 ios->out_attr_len, dev);
604 } 597 }
605 598
606 if (ios->out_attr) 599 if (ios->out_attr)
@@ -616,7 +609,7 @@ out:
616 return ret; 609 return ret;
617} 610}
618 611
619int exofs_sbi_write(struct exofs_io_state *ios) 612int ore_write(struct ore_io_state *ios)
620{ 613{
621 int i; 614 int i;
622 int ret; 615 int ret;
@@ -626,52 +619,55 @@ int exofs_sbi_write(struct exofs_io_state *ios)
626 return ret; 619 return ret;
627 620
628 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 621 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
629 ret = _sbi_write_mirror(ios, i); 622 ret = _write_mirror(ios, i);
630 if (unlikely(ret)) 623 if (unlikely(ret))
631 return ret; 624 return ret;
632 } 625 }
633 626
634 ret = exofs_io_execute(ios); 627 ret = ore_io_execute(ios);
635 return ret; 628 return ret;
636} 629}
630EXPORT_SYMBOL(ore_write);
637 631
638static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) 632static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp)
639{ 633{
640 struct osd_request *or; 634 struct osd_request *or;
641 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 635 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
642 unsigned first_dev = (unsigned)ios->obj.id; 636 struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
637 unsigned first_dev = (unsigned)obj->id;
643 638
644 if (ios->pages && !per_dev->length) 639 if (ios->pages && !per_dev->length)
645 return 0; /* Just an empty slot */ 640 return 0; /* Just an empty slot */
646 641
647 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; 642 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
648 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); 643 or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
649 if (unlikely(!or)) { 644 if (unlikely(!or)) {
650 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 645 ORE_ERR("%s: osd_start_request failed\n", __func__);
651 return -ENOMEM; 646 return -ENOMEM;
652 } 647 }
653 per_dev->or = or; 648 per_dev->or = or;
654 649
655 if (ios->pages) { 650 if (ios->pages) {
656 osd_req_read(or, &ios->obj, per_dev->offset, 651 osd_req_read(or, obj, per_dev->offset,
657 per_dev->bio, per_dev->length); 652 per_dev->bio, per_dev->length);
658 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" 653 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
659 " dev=%d\n", _LLU(ios->obj.id), 654 " dev=%d\n", _LLU(obj->id),
660 _LLU(per_dev->offset), _LLU(per_dev->length), 655 _LLU(per_dev->offset), _LLU(per_dev->length),
661 first_dev); 656 first_dev);
662 } else if (ios->kern_buff) { 657 } else if (ios->kern_buff) {
663 int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, 658 int ret = osd_req_read_kern(or, obj, per_dev->offset,
664 ios->kern_buff, ios->length); 659 ios->kern_buff, ios->length);
665 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " 660 ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
666 "length=0x%llx dev=%d ret=>%d\n", 661 "length=0x%llx dev=%d ret=>%d\n",
667 _LLU(ios->obj.id), _LLU(per_dev->offset), 662 _LLU(obj->id), _LLU(per_dev->offset),
668 _LLU(ios->length), first_dev, ret); 663 _LLU(ios->length), first_dev, ret);
669 if (unlikely(ret)) 664 if (unlikely(ret))
670 return ret; 665 return ret;
671 } else { 666 } else {
672 osd_req_get_attributes(or, &ios->obj); 667 osd_req_get_attributes(or, obj);
673 EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", 668 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
674 _LLU(ios->obj.id), ios->in_attr_len, first_dev); 669 _LLU(obj->id),
670 ios->in_attr_len, first_dev);
675 } 671 }
676 if (ios->out_attr) 672 if (ios->out_attr)
677 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); 673 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
@@ -682,7 +678,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
682 return 0; 678 return 0;
683} 679}
684 680
685int exofs_sbi_read(struct exofs_io_state *ios) 681int ore_read(struct ore_io_state *ios)
686{ 682{
687 int i; 683 int i;
688 int ret; 684 int ret;
@@ -692,16 +688,17 @@ int exofs_sbi_read(struct exofs_io_state *ios)
692 return ret; 688 return ret;
693 689
694 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 690 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
695 ret = _sbi_read_mirror(ios, i); 691 ret = _read_mirror(ios, i);
696 if (unlikely(ret)) 692 if (unlikely(ret))
697 return ret; 693 return ret;
698 } 694 }
699 695
700 ret = exofs_io_execute(ios); 696 ret = ore_io_execute(ios);
701 return ret; 697 return ret;
702} 698}
699EXPORT_SYMBOL(ore_read);
703 700
704int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) 701int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
705{ 702{
706 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 703 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
707 void *iter = NULL; 704 void *iter = NULL;
@@ -721,83 +718,118 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
721 718
722 return -EIO; 719 return -EIO;
723} 720}
721EXPORT_SYMBOL(extract_attr_from_ios);
724 722
725static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, 723static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
726 struct osd_attr *attr) 724 struct osd_attr *attr)
727{ 725{
728 int last_comp = cur_comp + ios->layout->mirrors_p1; 726 int last_comp = cur_comp + ios->layout->mirrors_p1;
729 727
730 for (; cur_comp < last_comp; ++cur_comp) { 728 for (; cur_comp < last_comp; ++cur_comp) {
731 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 729 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
732 struct osd_request *or; 730 struct osd_request *or;
733 731
734 or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); 732 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
735 if (unlikely(!or)) { 733 if (unlikely(!or)) {
736 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 734 ORE_ERR("%s: osd_start_request failed\n", __func__);
737 return -ENOMEM; 735 return -ENOMEM;
738 } 736 }
739 per_dev->or = or; 737 per_dev->or = or;
740 738
741 osd_req_set_attributes(or, &ios->obj); 739 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
742 osd_req_add_set_attr_list(or, attr, 1); 740 osd_req_add_set_attr_list(or, attr, 1);
743 } 741 }
744 742
745 return 0; 743 return 0;
746} 744}
747 745
748int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) 746struct _trunc_info {
747 struct _striping_info si;
748 u64 prev_group_obj_off;
749 u64 next_group_obj_off;
750
751 unsigned first_group_dev;
752 unsigned nex_group_dev;
753 unsigned max_devs;
754};
755
756void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
757 struct _trunc_info *ti)
758{
759 unsigned stripe_unit = layout->stripe_unit;
760
761 _calc_stripe_info(layout, file_offset, &ti->si);
762
763 ti->prev_group_obj_off = ti->si.M * stripe_unit;
764 ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
765
766 ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
767 ti->nex_group_dev = ti->first_group_dev + layout->group_width;
768 ti->max_devs = layout->group_width * layout->group_count;
769}
770
771int ore_truncate(struct ore_layout *layout, struct ore_components *comps,
772 u64 size)
749{ 773{
750 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; 774 struct ore_io_state *ios;
751 struct exofs_io_state *ios;
752 struct exofs_trunc_attr { 775 struct exofs_trunc_attr {
753 struct osd_attr attr; 776 struct osd_attr attr;
754 __be64 newsize; 777 __be64 newsize;
755 } *size_attrs; 778 } *size_attrs;
756 struct _striping_info si; 779 struct _trunc_info ti;
757 int i, ret; 780 int i, ret;
758 781
759 ret = exofs_get_io_state(&sbi->layout, &ios); 782 ret = ore_get_io_state(layout, comps, &ios);
760 if (unlikely(ret)) 783 if (unlikely(ret))
761 return ret; 784 return ret;
762 785
763 size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), 786 _calc_trunk_info(ios->layout, size, &ti);
787
788 size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs),
764 GFP_KERNEL); 789 GFP_KERNEL);
765 if (unlikely(!size_attrs)) { 790 if (unlikely(!size_attrs)) {
766 ret = -ENOMEM; 791 ret = -ENOMEM;
767 goto out; 792 goto out;
768 } 793 }
769 794
770 ios->obj.id = exofs_oi_objno(oi); 795 ios->numdevs = ios->comps->numdevs;
771 ios->cred = oi->i_cred;
772 796
773 ios->numdevs = ios->layout->s_numdevs; 797 for (i = 0; i < ti.max_devs; ++i) {
774 _calc_stripe_info(ios, size, &si);
775
776 for (i = 0; i < ios->layout->group_width; ++i) {
777 struct exofs_trunc_attr *size_attr = &size_attrs[i]; 798 struct exofs_trunc_attr *size_attr = &size_attrs[i];
778 u64 obj_size; 799 u64 obj_size;
779 800
780 if (i < si.dev) 801 if (i < ti.first_group_dev)
781 obj_size = si.obj_offset + 802 obj_size = ti.prev_group_obj_off;
782 ios->layout->stripe_unit - si.unit_off; 803 else if (i >= ti.nex_group_dev)
783 else if (i == si.dev) 804 obj_size = ti.next_group_obj_off;
784 obj_size = si.obj_offset; 805 else if (i < ti.si.dev) /* dev within this group */
785 else /* i > si.dev */ 806 obj_size = ti.si.obj_offset +
786 obj_size = si.obj_offset - si.unit_off; 807 ios->layout->stripe_unit - ti.si.unit_off;
808 else if (i == ti.si.dev)
809 obj_size = ti.si.obj_offset;
810 else /* i > ti.dev */
811 obj_size = ti.si.obj_offset - ti.si.unit_off;
787 812
788 size_attr->newsize = cpu_to_be64(obj_size); 813 size_attr->newsize = cpu_to_be64(obj_size);
789 size_attr->attr = g_attr_logical_length; 814 size_attr->attr = g_attr_logical_length;
790 size_attr->attr.val_ptr = &size_attr->newsize; 815 size_attr->attr.val_ptr = &size_attr->newsize;
791 816
817 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
818 _LLU(comps->comps->obj.id), _LLU(obj_size), i);
792 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, 819 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
793 &size_attr->attr); 820 &size_attr->attr);
794 if (unlikely(ret)) 821 if (unlikely(ret))
795 goto out; 822 goto out;
796 } 823 }
797 ret = exofs_io_execute(ios); 824 ret = ore_io_execute(ios);
798 825
799out: 826out:
800 kfree(size_attrs); 827 kfree(size_attrs);
801 exofs_put_io_state(ios); 828 ore_put_io_state(ios);
802 return ret; 829 return ret;
803} 830}
831EXPORT_SYMBOL(ore_truncate);
832
833const struct osd_attr g_attr_logical_length = ATTR_DEF(
834 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
835EXPORT_SYMBOL(g_attr_logical_length);
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
deleted file mode 100644
index c52e9888b8a..00000000000
--- a/fs/exofs/pnfs.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/*
2 * Copyright (C) 2008, 2009
3 * Boaz Harrosh <bharrosh@panasas.com>
4 *
5 * This file is part of exofs.
6 *
7 * exofs is free software; you can redistribute it and/or modify it under the
8 * terms of the GNU General Public License version 2 as published by the Free
9 * Software Foundation.
10 *
11 */
12
13/* FIXME: Remove this file once pnfs hits mainline */
14
15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__
17
18#if ! defined(__PNFS_OSD_XDR_H__)
19
20enum pnfs_iomode {
21 IOMODE_READ = 1,
22 IOMODE_RW = 2,
23 IOMODE_ANY = 3,
24};
25
26/* Layout Structure */
27enum pnfs_osd_raid_algorithm4 {
28 PNFS_OSD_RAID_0 = 1,
29 PNFS_OSD_RAID_4 = 2,
30 PNFS_OSD_RAID_5 = 3,
31 PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
32};
33
34struct pnfs_osd_data_map {
35 u32 odm_num_comps;
36 u64 odm_stripe_unit;
37 u32 odm_group_width;
38 u32 odm_group_depth;
39 u32 odm_mirror_cnt;
40 u32 odm_raid_algorithm;
41};
42
43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
44
45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index c57beddcc21..274894053b0 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -40,6 +40,8 @@
40 40
41#include "exofs.h" 41#include "exofs.h"
42 42
43#define EXOFS_DBGMSG2(M...) do {} while (0)
44
43/****************************************************************************** 45/******************************************************************************
44 * MOUNT OPTIONS 46 * MOUNT OPTIONS
45 *****************************************************************************/ 47 *****************************************************************************/
@@ -208,10 +210,48 @@ static void destroy_inodecache(void)
208} 210}
209 211
210/****************************************************************************** 212/******************************************************************************
211 * SUPERBLOCK FUNCTIONS 213 * Some osd helpers
212 *****************************************************************************/ 214 *****************************************************************************/
213static const struct super_operations exofs_sops; 215void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
214static const struct export_operations exofs_export_ops; 216{
217 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
218}
219
220static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
221 u64 offset, void *p, unsigned length)
222{
223 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
224/* struct osd_sense_info osi = {.key = 0};*/
225 int ret;
226
227 if (unlikely(!or)) {
228 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
229 return -ENOMEM;
230 }
231 ret = osd_req_read_kern(or, obj, offset, p, length);
232 if (unlikely(ret)) {
233 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
234 goto out;
235 }
236
237 ret = osd_finalize_request(or, 0, cred, NULL);
238 if (unlikely(ret)) {
239 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
240 goto out;
241 }
242
243 ret = osd_execute_request(or);
244 if (unlikely(ret))
245 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
246 /* osd_req_decode_sense(or, ret); */
247
248out:
249 osd_end_request(or);
250 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
251 "length=0x%llx dev=%p ret=>%d\n",
252 _LLU(obj->id), _LLU(offset), _LLU(length), od, ret);
253 return ret;
254}
215 255
216static const struct osd_attr g_attr_sb_stats = ATTR_DEF( 256static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
217 EXOFS_APAGE_SB_DATA, 257 EXOFS_APAGE_SB_DATA,
@@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
223 struct osd_attr attrs[] = { 263 struct osd_attr attrs[] = {
224 [0] = g_attr_sb_stats, 264 [0] = g_attr_sb_stats,
225 }; 265 };
226 struct exofs_io_state *ios; 266 struct ore_io_state *ios;
227 int ret; 267 int ret;
228 268
229 ret = exofs_get_io_state(&sbi->layout, &ios); 269 ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
230 if (unlikely(ret)) { 270 if (unlikely(ret)) {
231 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 271 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
232 return ret; 272 return ret;
233 } 273 }
234 274
235 ios->cred = sbi->s_cred;
236
237 ios->in_attr = attrs; 275 ios->in_attr = attrs;
238 ios->in_attr_len = ARRAY_SIZE(attrs); 276 ios->in_attr_len = ARRAY_SIZE(attrs);
239 277
240 ret = exofs_sbi_read(ios); 278 ret = ore_read(ios);
241 if (unlikely(ret)) { 279 if (unlikely(ret)) {
242 EXOFS_ERR("Error reading super_block stats => %d\n", ret); 280 EXOFS_ERR("Error reading super_block stats => %d\n", ret);
243 goto out; 281 goto out;
@@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
264 } 302 }
265 303
266out: 304out:
267 exofs_put_io_state(ios); 305 ore_put_io_state(ios);
268 return ret; 306 return ret;
269} 307}
270 308
271static void stats_done(struct exofs_io_state *ios, void *p) 309static void stats_done(struct ore_io_state *ios, void *p)
272{ 310{
273 exofs_put_io_state(ios); 311 ore_put_io_state(ios);
274 /* Good thanks nothing to do anymore */ 312 /* Good thanks nothing to do anymore */
275} 313}
276 314
@@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
280 struct osd_attr attrs[] = { 318 struct osd_attr attrs[] = {
281 [0] = g_attr_sb_stats, 319 [0] = g_attr_sb_stats,
282 }; 320 };
283 struct exofs_io_state *ios; 321 struct ore_io_state *ios;
284 int ret; 322 int ret;
285 323
286 ret = exofs_get_io_state(&sbi->layout, &ios); 324 ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
287 if (unlikely(ret)) { 325 if (unlikely(ret)) {
288 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 326 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
289 return ret; 327 return ret;
290 } 328 }
291 329
@@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); 331 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
294 attrs[0].val_ptr = &sbi->s_ess; 332 attrs[0].val_ptr = &sbi->s_ess;
295 333
296 ios->cred = sbi->s_cred; 334
297 ios->done = stats_done; 335 ios->done = stats_done;
298 ios->private = sbi; 336 ios->private = sbi;
299 ios->out_attr = attrs; 337 ios->out_attr = attrs;
300 ios->out_attr_len = ARRAY_SIZE(attrs); 338 ios->out_attr_len = ARRAY_SIZE(attrs);
301 339
302 ret = exofs_sbi_write(ios); 340 ret = ore_write(ios);
303 if (unlikely(ret)) { 341 if (unlikely(ret)) {
304 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 342 EXOFS_ERR("%s: ore_write failed.\n", __func__);
305 exofs_put_io_state(ios); 343 ore_put_io_state(ios);
306 } 344 }
307 345
308 return ret; 346 return ret;
309} 347}
310 348
349/******************************************************************************
350 * SUPERBLOCK FUNCTIONS
351 *****************************************************************************/
352static const struct super_operations exofs_sops;
353static const struct export_operations exofs_export_ops;
354
311/* 355/*
312 * Write the superblock to the OSD 356 * Write the superblock to the OSD
313 */ 357 */
@@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait)
315{ 359{
316 struct exofs_sb_info *sbi; 360 struct exofs_sb_info *sbi;
317 struct exofs_fscb *fscb; 361 struct exofs_fscb *fscb;
318 struct exofs_io_state *ios; 362 struct ore_comp one_comp;
363 struct ore_components comps;
364 struct ore_io_state *ios;
319 int ret = -ENOMEM; 365 int ret = -ENOMEM;
320 366
321 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); 367 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
@@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All 377 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
332 * the writeable info is set in exofs_sbi_write_stats() above. 378 * the writeable info is set in exofs_sbi_write_stats() above.
333 */ 379 */
334 ret = exofs_get_io_state(&sbi->layout, &ios); 380
381 exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID);
382
383 ret = ore_get_io_state(&sbi->layout, &comps, &ios);
335 if (unlikely(ret)) 384 if (unlikely(ret))
336 goto out; 385 goto out;
337 386
@@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait)
345 fscb->s_newfs = 0; 394 fscb->s_newfs = 0;
346 fscb->s_version = EXOFS_FSCB_VER; 395 fscb->s_version = EXOFS_FSCB_VER;
347 396
348 ios->obj.id = EXOFS_SUPER_ID;
349 ios->offset = 0; 397 ios->offset = 0;
350 ios->kern_buff = fscb; 398 ios->kern_buff = fscb;
351 ios->cred = sbi->s_cred;
352 399
353 ret = exofs_sbi_write(ios); 400 ret = ore_write(ios);
354 if (unlikely(ret)) 401 if (unlikely(ret))
355 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 402 EXOFS_ERR("%s: ore_write failed.\n", __func__);
356 else 403 else
357 sb->s_dirt = 0; 404 sb->s_dirt = 0;
358 405
@@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
360 unlock_super(sb); 407 unlock_super(sb);
361out: 408out:
362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 409 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
363 exofs_put_io_state(ios); 410 ore_put_io_state(ios);
364 kfree(fscb); 411 kfree(fscb);
365 return ret; 412 return ret;
366} 413}
@@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
384 431
385void exofs_free_sbi(struct exofs_sb_info *sbi) 432void exofs_free_sbi(struct exofs_sb_info *sbi)
386{ 433{
387 while (sbi->layout.s_numdevs) { 434 while (sbi->comps.numdevs) {
388 int i = --sbi->layout.s_numdevs; 435 int i = --sbi->comps.numdevs;
389 struct osd_dev *od = sbi->layout.s_ods[i]; 436 struct osd_dev *od = sbi->comps.ods[i];
390 437
391 if (od) { 438 if (od) {
392 sbi->layout.s_ods[i] = NULL; 439 sbi->comps.ods[i] = NULL;
393 osduld_put_device(od); 440 osduld_put_device(od);
394 } 441 }
395 } 442 }
443 if (sbi->comps.ods != sbi->_min_one_dev)
444 kfree(sbi->comps.ods);
396 kfree(sbi); 445 kfree(sbi);
397} 446}
398 447
@@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb)
419 msecs_to_jiffies(100)); 468 msecs_to_jiffies(100));
420 } 469 }
421 470
422 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 471 _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0],
423 sbi->layout.s_pid); 472 sbi->one_comp.obj.partition);
424 473
425 bdi_destroy(&sbi->bdi); 474 bdi_destroy(&sbi->bdi);
426 exofs_free_sbi(sbi); 475 exofs_free_sbi(sbi);
@@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
501 return -EINVAL; 550 return -EINVAL;
502 } 551 }
503 552
553 EXOFS_DBGMSG("exofs: layout: "
554 "num_comps=%u stripe_unit=0x%x group_width=%u "
555 "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
556 numdevs,
557 sbi->layout.stripe_unit,
558 sbi->layout.group_width,
559 _LLU(sbi->layout.group_depth),
560 sbi->layout.mirrors_p1,
561 sbi->data_map.odm_raid_algorithm);
504 return 0; 562 return 0;
505} 563}
506 564
507static unsigned __ra_pages(struct exofs_layout *layout) 565static unsigned __ra_pages(struct ore_layout *layout)
508{ 566{
509 const unsigned _MIN_RA = 32; /* min 128K read-ahead */ 567 const unsigned _MIN_RA = 32; /* min 128K read-ahead */
510 unsigned ra_pages = layout->group_width * layout->stripe_unit / 568 unsigned ra_pages = layout->group_width * layout->stripe_unit /
@@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
547 return !(odi->systemid_len || odi->osdname_len); 605 return !(odi->systemid_len || odi->osdname_len);
548} 606}
549 607
550static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, 608static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
609 struct osd_dev *fscb_od,
551 unsigned table_count) 610 unsigned table_count)
552{ 611{
553 struct exofs_sb_info *sbi = *psbi; 612 struct ore_comp comp;
554 struct osd_dev *fscb_od;
555 struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
556 .id = EXOFS_DEVTABLE_ID};
557 struct exofs_device_table *dt; 613 struct exofs_device_table *dt;
558 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 614 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
559 sizeof(*dt); 615 sizeof(*dt);
@@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
567 return -ENOMEM; 623 return -ENOMEM;
568 } 624 }
569 625
570 fscb_od = sbi->layout.s_ods[0]; 626 sbi->comps.numdevs = 0;
571 sbi->layout.s_ods[0] = NULL; 627
572 sbi->layout.s_numdevs = 0; 628 comp.obj.partition = sbi->one_comp.obj.partition;
573 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); 629 comp.obj.id = EXOFS_DEVTABLE_ID;
630 exofs_make_credential(comp.cred, &comp.obj);
631
632 ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
633 table_bytes);
574 if (unlikely(ret)) { 634 if (unlikely(ret)) {
575 EXOFS_ERR("ERROR: reading device table\n"); 635 EXOFS_ERR("ERROR: reading device table\n");
576 goto out; 636 goto out;
@@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
588 goto out; 648 goto out;
589 649
590 if (likely(numdevs > 1)) { 650 if (likely(numdevs > 1)) {
591 unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); 651 unsigned size = numdevs * sizeof(sbi->comps.ods[0]);
592 652
593 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); 653 /* Twice bigger table: See exofs_init_comps() and below
594 if (unlikely(!sbi)) { 654 * comment
655 */
656 sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL);
657 if (unlikely(!sbi->comps.ods)) {
658 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
659 numdevs);
595 ret = -ENOMEM; 660 ret = -ENOMEM;
596 goto out; 661 goto out;
597 } 662 }
598 memset(&sbi->layout.s_ods[1], 0,
599 size - sizeof(sbi->layout.s_ods[0]));
600 *psbi = sbi;
601 } 663 }
602 664
603 for (i = 0; i < numdevs; i++) { 665 for (i = 0; i < numdevs; i++) {
@@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
619 * line. We always keep them in device-table order. 681 * line. We always keep them in device-table order.
620 */ 682 */
621 if (fscb_od && osduld_device_same(fscb_od, &odi)) { 683 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
622 sbi->layout.s_ods[i] = fscb_od; 684 sbi->comps.ods[i] = fscb_od;
623 ++sbi->layout.s_numdevs; 685 ++sbi->comps.numdevs;
624 fscb_od = NULL; 686 fscb_od = NULL;
625 continue; 687 continue;
626 } 688 }
@@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
633 goto out; 695 goto out;
634 } 696 }
635 697
636 sbi->layout.s_ods[i] = od; 698 sbi->comps.ods[i] = od;
637 ++sbi->layout.s_numdevs; 699 ++sbi->comps.numdevs;
638 700
639 /* Read the fscb of the other devices to make sure the FS 701 /* Read the fscb of the other devices to make sure the FS
640 * partition is there. 702 * partition is there.
641 */ 703 */
642 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, 704 ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
643 sizeof(fscb)); 705 sizeof(fscb));
644 if (unlikely(ret)) { 706 if (unlikely(ret)) {
645 EXOFS_ERR("ERROR: Malformed participating device " 707 EXOFS_ERR("ERROR: Malformed participating device "
@@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
656 718
657out: 719out:
658 kfree(dt); 720 kfree(dt);
659 if (unlikely(!ret && fscb_od)) { 721 if (likely(!ret)) {
660 EXOFS_ERR( 722 unsigned numdevs = sbi->comps.numdevs;
661 "ERROR: Bad device-table container device not present\n");
662 osduld_put_device(fscb_od);
663 ret = -EINVAL;
664 }
665 723
724 if (unlikely(fscb_od)) {
725 EXOFS_ERR("ERROR: Bad device-table container device not present\n");
726 osduld_put_device(fscb_od);
727 return -EINVAL;
728 }
729 /* exofs round-robins the device table view according to inode
730 * number. We hold a: twice bigger table hence inodes can point
731 * to any device and have a sequential view of the table
732 * starting at this device. See exofs_init_comps()
733 */
734 for (i = 0; i < numdevs - 1; ++i)
735 sbi->comps.ods[i + numdevs] = sbi->comps.ods[i];
736 }
666 return ret; 737 return ret;
667} 738}
668 739
@@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
676 struct exofs_sb_info *sbi; /*extended info */ 747 struct exofs_sb_info *sbi; /*extended info */
677 struct osd_dev *od; /* Master device */ 748 struct osd_dev *od; /* Master device */
678 struct exofs_fscb fscb; /*on-disk superblock info */ 749 struct exofs_fscb fscb; /*on-disk superblock info */
679 struct osd_obj_id obj; 750 struct ore_comp comp;
680 unsigned table_count; 751 unsigned table_count;
681 int ret; 752 int ret;
682 753
@@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
684 if (!sbi) 755 if (!sbi)
685 return -ENOMEM; 756 return -ENOMEM;
686 757
687 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
688 if (ret)
689 goto free_bdi;
690
691 /* use mount options to fill superblock */ 758 /* use mount options to fill superblock */
692 if (opts->is_osdname) { 759 if (opts->is_osdname) {
693 struct osd_dev_info odi = {.systemid_len = 0}; 760 struct osd_dev_info odi = {.systemid_len = 0};
@@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
695 odi.osdname_len = strlen(opts->dev_name); 762 odi.osdname_len = strlen(opts->dev_name);
696 odi.osdname = (u8 *)opts->dev_name; 763 odi.osdname = (u8 *)opts->dev_name;
697 od = osduld_info_lookup(&odi); 764 od = osduld_info_lookup(&odi);
765 kfree(opts->dev_name);
766 opts->dev_name = NULL;
698 } else { 767 } else {
699 od = osduld_path_lookup(opts->dev_name); 768 od = osduld_path_lookup(opts->dev_name);
700 } 769 }
@@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
709 sbi->layout.group_width = 1; 778 sbi->layout.group_width = 1;
710 sbi->layout.group_depth = -1; 779 sbi->layout.group_depth = -1;
711 sbi->layout.group_count = 1; 780 sbi->layout.group_count = 1;
712 sbi->layout.s_ods[0] = od;
713 sbi->layout.s_numdevs = 1;
714 sbi->layout.s_pid = opts->pid;
715 sbi->s_timeout = opts->timeout; 781 sbi->s_timeout = opts->timeout;
716 782
783 sbi->one_comp.obj.partition = opts->pid;
784 sbi->one_comp.obj.id = 0;
785 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
786 sbi->comps.numdevs = 1;
787 sbi->comps.single_comp = EC_SINGLE_COMP;
788 sbi->comps.comps = &sbi->one_comp;
789 sbi->comps.ods = sbi->_min_one_dev;
790
717 /* fill in some other data by hand */ 791 /* fill in some other data by hand */
718 memset(sb->s_id, 0, sizeof(sb->s_id)); 792 memset(sb->s_id, 0, sizeof(sb->s_id));
719 strcpy(sb->s_id, "exofs"); 793 strcpy(sb->s_id, "exofs");
@@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
724 sb->s_bdev = NULL; 798 sb->s_bdev = NULL;
725 sb->s_dev = 0; 799 sb->s_dev = 0;
726 800
727 obj.partition = sbi->layout.s_pid; 801 comp.obj.partition = sbi->one_comp.obj.partition;
728 obj.id = EXOFS_SUPER_ID; 802 comp.obj.id = EXOFS_SUPER_ID;
729 exofs_make_credential(sbi->s_cred, &obj); 803 exofs_make_credential(comp.cred, &comp.obj);
730 804
731 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); 805 ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
732 if (unlikely(ret)) 806 if (unlikely(ret))
733 goto free_sbi; 807 goto free_sbi;
734 808
@@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
757 831
758 table_count = le64_to_cpu(fscb.s_dev_table_count); 832 table_count = le64_to_cpu(fscb.s_dev_table_count);
759 if (table_count) { 833 if (table_count) {
760 ret = exofs_read_lookup_dev_table(&sbi, table_count); 834 ret = exofs_read_lookup_dev_table(sbi, od, table_count);
761 if (unlikely(ret)) 835 if (unlikely(ret))
762 goto free_sbi; 836 goto free_sbi;
837 } else {
838 sbi->comps.ods[0] = od;
763 } 839 }
764 840
765 __sbi_read_stats(sbi); 841 __sbi_read_stats(sbi);
@@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
793 goto free_sbi; 869 goto free_sbi;
794 } 870 }
795 871
796 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 872 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
797 sbi->layout.s_pid); 873 if (ret) {
798 if (opts->is_osdname) 874 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
799 kfree(opts->dev_name); 875 goto free_sbi;
876 }
877
878 _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0],
879 sbi->one_comp.obj.partition);
800 return 0; 880 return 0;
801 881
802free_sbi: 882free_sbi:
803 bdi_destroy(&sbi->bdi);
804free_bdi:
805 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 883 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
806 opts->dev_name, sbi->layout.s_pid, ret); 884 opts->dev_name, sbi->one_comp.obj.partition, ret);
807 exofs_free_sbi(sbi); 885 exofs_free_sbi(sbi);
808 if (opts->is_osdname)
809 kfree(opts->dev_name);
810 return ret; 886 return ret;
811} 887}
812 888
@@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
837{ 913{
838 struct super_block *sb = dentry->d_sb; 914 struct super_block *sb = dentry->d_sb;
839 struct exofs_sb_info *sbi = sb->s_fs_info; 915 struct exofs_sb_info *sbi = sb->s_fs_info;
840 struct exofs_io_state *ios; 916 struct ore_io_state *ios;
841 struct osd_attr attrs[] = { 917 struct osd_attr attrs[] = {
842 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 918 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
843 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), 919 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
846 }; 922 };
847 uint64_t capacity = ULLONG_MAX; 923 uint64_t capacity = ULLONG_MAX;
848 uint64_t used = ULLONG_MAX; 924 uint64_t used = ULLONG_MAX;
849 uint8_t cred_a[OSD_CAP_LEN];
850 int ret; 925 int ret;
851 926
852 ret = exofs_get_io_state(&sbi->layout, &ios); 927 ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
853 if (ret) { 928 if (ret) {
854 EXOFS_DBGMSG("exofs_get_io_state failed.\n"); 929 EXOFS_DBGMSG("ore_get_io_state failed.\n");
855 return ret; 930 return ret;
856 } 931 }
857 932
858 exofs_make_credential(cred_a, &ios->obj);
859 ios->cred = sbi->s_cred;
860 ios->in_attr = attrs; 933 ios->in_attr = attrs;
861 ios->in_attr_len = ARRAY_SIZE(attrs); 934 ios->in_attr_len = ARRAY_SIZE(attrs);
862 935
863 ret = exofs_sbi_read(ios); 936 ret = ore_read(ios);
864 if (unlikely(ret)) 937 if (unlikely(ret))
865 goto out; 938 goto out;
866 939
@@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
889 buf->f_namelen = EXOFS_NAME_LEN; 962 buf->f_namelen = EXOFS_NAME_LEN;
890 963
891out: 964out:
892 exofs_put_io_state(ios); 965 ore_put_io_state(ios);
893 return ret; 966 return ret;
894} 967}
895 968
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 52c05376394..35d6a3cfd9f 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -194,12 +194,10 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
194 case ACL_TYPE_ACCESS: 194 case ACL_TYPE_ACCESS:
195 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; 195 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
196 if (acl) { 196 if (acl) {
197 mode_t mode = inode->i_mode; 197 error = posix_acl_equiv_mode(acl, &inode->i_mode);
198 error = posix_acl_equiv_mode(acl, &mode);
199 if (error < 0) 198 if (error < 0)
200 return error; 199 return error;
201 else { 200 else {
202 inode->i_mode = mode;
203 inode->i_ctime = CURRENT_TIME_SEC; 201 inode->i_ctime = CURRENT_TIME_SEC;
204 mark_inode_dirty(inode); 202 mark_inode_dirty(inode);
205 if (error == 0) 203 if (error == 0)
@@ -253,16 +251,14 @@ ext2_init_acl(struct inode *inode, struct inode *dir)
253 inode->i_mode &= ~current_umask(); 251 inode->i_mode &= ~current_umask();
254 } 252 }
255 if (test_opt(inode->i_sb, POSIX_ACL) && acl) { 253 if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
256 mode_t mode = inode->i_mode;
257 if (S_ISDIR(inode->i_mode)) { 254 if (S_ISDIR(inode->i_mode)) {
258 error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); 255 error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
259 if (error) 256 if (error)
260 goto cleanup; 257 goto cleanup;
261 } 258 }
262 error = posix_acl_create(&acl, GFP_KERNEL, &mode); 259 error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
263 if (error < 0) 260 if (error < 0)
264 return error; 261 return error;
265 inode->i_mode = mode;
266 if (error > 0) { 262 if (error > 0) {
267 /* This is an extended ACL */ 263 /* This is an extended ACL */
268 error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); 264 error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 6c29bf0df04..3091f62e55b 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
199 case ACL_TYPE_ACCESS: 199 case ACL_TYPE_ACCESS:
200 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 200 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
201 if (acl) { 201 if (acl) {
202 mode_t mode = inode->i_mode; 202 error = posix_acl_equiv_mode(acl, &inode->i_mode);
203 error = posix_acl_equiv_mode(acl, &mode);
204 if (error < 0) 203 if (error < 0)
205 return error; 204 return error;
206 else { 205 else {
207 inode->i_mode = mode;
208 inode->i_ctime = CURRENT_TIME_SEC; 206 inode->i_ctime = CURRENT_TIME_SEC;
209 ext3_mark_inode_dirty(handle, inode); 207 ext3_mark_inode_dirty(handle, inode);
210 if (error == 0) 208 if (error == 0)
@@ -261,19 +259,16 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
261 inode->i_mode &= ~current_umask(); 259 inode->i_mode &= ~current_umask();
262 } 260 }
263 if (test_opt(inode->i_sb, POSIX_ACL) && acl) { 261 if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
264 mode_t mode = inode->i_mode;
265
266 if (S_ISDIR(inode->i_mode)) { 262 if (S_ISDIR(inode->i_mode)) {
267 error = ext3_set_acl(handle, inode, 263 error = ext3_set_acl(handle, inode,
268 ACL_TYPE_DEFAULT, acl); 264 ACL_TYPE_DEFAULT, acl);
269 if (error) 265 if (error)
270 goto cleanup; 266 goto cleanup;
271 } 267 }
272 error = posix_acl_create(&acl, GFP_NOFS, &mode); 268 error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
273 if (error < 0) 269 if (error < 0)
274 return error; 270 return error;
275 271
276 inode->i_mode = mode;
277 if (error > 0) { 272 if (error > 0) {
278 /* This is an extended ACL */ 273 /* This is an extended ACL */
279 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); 274 error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 04109460ba9..56fd8f86593 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ 7ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
10 mmp.o 10 mmp.o indirect.o
11 11
12ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 12ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index dca2d1ded93..a5c29bb3b83 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -198,12 +198,10 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
198 case ACL_TYPE_ACCESS: 198 case ACL_TYPE_ACCESS:
199 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 199 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
200 if (acl) { 200 if (acl) {
201 mode_t mode = inode->i_mode; 201 error = posix_acl_equiv_mode(acl, &inode->i_mode);
202 error = posix_acl_equiv_mode(acl, &mode);
203 if (error < 0) 202 if (error < 0)
204 return error; 203 return error;
205 else { 204 else {
206 inode->i_mode = mode;
207 inode->i_ctime = ext4_current_time(inode); 205 inode->i_ctime = ext4_current_time(inode);
208 ext4_mark_inode_dirty(handle, inode); 206 ext4_mark_inode_dirty(handle, inode);
209 if (error == 0) 207 if (error == 0)
@@ -259,19 +257,16 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
259 inode->i_mode &= ~current_umask(); 257 inode->i_mode &= ~current_umask();
260 } 258 }
261 if (test_opt(inode->i_sb, POSIX_ACL) && acl) { 259 if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
262 mode_t mode = inode->i_mode;
263
264 if (S_ISDIR(inode->i_mode)) { 260 if (S_ISDIR(inode->i_mode)) {
265 error = ext4_set_acl(handle, inode, 261 error = ext4_set_acl(handle, inode,
266 ACL_TYPE_DEFAULT, acl); 262 ACL_TYPE_DEFAULT, acl);
267 if (error) 263 if (error)
268 goto cleanup; 264 goto cleanup;
269 } 265 }
270 error = posix_acl_create(&acl, GFP_NOFS, &mode); 266 error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
271 if (error < 0) 267 if (error < 0)
272 return error; 268 return error;
273 269
274 inode->i_mode = mode;
275 if (error > 0) { 270 if (error > 0) {
276 /* This is an extended ACL */ 271 /* This is an extended ACL */
277 error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); 272 error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 264f6949511..f8224adf496 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -620,3 +620,51 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
620 620
621} 621}
622 622
623/**
624 * ext4_inode_to_goal_block - return a hint for block allocation
625 * @inode: inode for block allocation
626 *
627 * Return the ideal location to start allocating blocks for a
628 * newly created inode.
629 */
630ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
631{
632 struct ext4_inode_info *ei = EXT4_I(inode);
633 ext4_group_t block_group;
634 ext4_grpblk_t colour;
635 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
636 ext4_fsblk_t bg_start;
637 ext4_fsblk_t last_block;
638
639 block_group = ei->i_block_group;
640 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
641 /*
642 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
643 * block groups per flexgroup, reserve the first block
644 * group for directories and special files. Regular
645 * files will start at the second block group. This
646 * tends to speed up directory access and improves
647 * fsck times.
648 */
649 block_group &= ~(flex_size-1);
650 if (S_ISREG(inode->i_mode))
651 block_group++;
652 }
653 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
654 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
655
656 /*
657 * If we are doing delayed allocation, we don't need take
658 * colour into account.
659 */
660 if (test_opt(inode->i_sb, DELALLOC))
661 return bg_start;
662
663 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
664 colour = (current->pid % 16) *
665 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
666 else
667 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
668 return bg_start + colour;
669}
670
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index fac90f3fba8..8efb2f0a344 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -246,3 +246,24 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
246 return 1; 246 return 1;
247} 247}
248 248
249int ext4_check_blockref(const char *function, unsigned int line,
250 struct inode *inode, __le32 *p, unsigned int max)
251{
252 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
253 __le32 *bref = p;
254 unsigned int blk;
255
256 while (bref < p+max) {
257 blk = le32_to_cpu(*bref++);
258 if (blk &&
259 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
260 blk, 1))) {
261 es->s_last_error_block = cpu_to_le64(blk);
262 ext4_error_inode(inode, function, line, blk,
263 "invalid block");
264 return -EIO;
265 }
266 }
267 return 0;
268}
269
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fa44df87971..e717dfd2f2b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -526,6 +526,7 @@ struct ext4_new_group_data {
526#define EXT4_FREE_BLOCKS_METADATA 0x0001 526#define EXT4_FREE_BLOCKS_METADATA 0x0001
527#define EXT4_FREE_BLOCKS_FORGET 0x0002 527#define EXT4_FREE_BLOCKS_FORGET 0x0002
528#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 528#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
529#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
529 530
530/* 531/*
531 * ioctl commands 532 * ioctl commands
@@ -939,6 +940,8 @@ struct ext4_inode_info {
939#define ext4_find_next_zero_bit find_next_zero_bit_le 940#define ext4_find_next_zero_bit find_next_zero_bit_le
940#define ext4_find_next_bit find_next_bit_le 941#define ext4_find_next_bit find_next_bit_le
941 942
943extern void ext4_set_bits(void *bm, int cur, int len);
944
942/* 945/*
943 * Maximal mount counts between two filesystem checks 946 * Maximal mount counts between two filesystem checks
944 */ 947 */
@@ -1126,7 +1129,8 @@ struct ext4_sb_info {
1126 struct journal_s *s_journal; 1129 struct journal_s *s_journal;
1127 struct list_head s_orphan; 1130 struct list_head s_orphan;
1128 struct mutex s_orphan_lock; 1131 struct mutex s_orphan_lock;
1129 struct mutex s_resize_lock; 1132 unsigned long s_resize_flags; /* Flags indicating if there
1133 is a resizer */
1130 unsigned long s_commit_interval; 1134 unsigned long s_commit_interval;
1131 u32 s_max_batch_time; 1135 u32 s_max_batch_time;
1132 u32 s_min_batch_time; 1136 u32 s_min_batch_time;
@@ -1214,6 +1218,9 @@ struct ext4_sb_info {
1214 1218
1215 /* Kernel thread for multiple mount protection */ 1219 /* Kernel thread for multiple mount protection */
1216 struct task_struct *s_mmp_tsk; 1220 struct task_struct *s_mmp_tsk;
1221
1222 /* record the last minlen when FITRIM is called. */
1223 atomic_t s_last_trim_minblks;
1217}; 1224};
1218 1225
1219static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1226static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1743,6 +1750,7 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
1743 struct ext4_group_desc *desc); 1750 struct ext4_group_desc *desc);
1744#define ext4_free_blocks_after_init(sb, group, desc) \ 1751#define ext4_free_blocks_after_init(sb, group, desc) \
1745 ext4_init_block_bitmap(sb, NULL, group, desc) 1752 ext4_init_block_bitmap(sb, NULL, group, desc)
1753ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1746 1754
1747/* dir.c */ 1755/* dir.c */
1748extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 1756extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
@@ -1793,7 +1801,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1793 unsigned long count, int flags); 1801 unsigned long count, int flags);
1794extern int ext4_mb_add_groupinfo(struct super_block *sb, 1802extern int ext4_mb_add_groupinfo(struct super_block *sb,
1795 ext4_group_t i, struct ext4_group_desc *desc); 1803 ext4_group_t i, struct ext4_group_desc *desc);
1796extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1804extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
1797 ext4_fsblk_t block, unsigned long count); 1805 ext4_fsblk_t block, unsigned long count);
1798extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 1806extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
1799 1807
@@ -1834,6 +1842,17 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1834extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1842extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1835extern void ext4_da_update_reserve_space(struct inode *inode, 1843extern void ext4_da_update_reserve_space(struct inode *inode,
1836 int used, int quota_claim); 1844 int used, int quota_claim);
1845
1846/* indirect.c */
1847extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
1848 struct ext4_map_blocks *map, int flags);
1849extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
1850 const struct iovec *iov, loff_t offset,
1851 unsigned long nr_segs);
1852extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
1853extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
1854extern void ext4_ind_truncate(struct inode *inode);
1855
1837/* ioctl.c */ 1856/* ioctl.c */
1838extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 1857extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1839extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); 1858extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
@@ -1855,6 +1874,9 @@ extern int ext4_group_extend(struct super_block *sb,
1855 ext4_fsblk_t n_blocks_count); 1874 ext4_fsblk_t n_blocks_count);
1856 1875
1857/* super.c */ 1876/* super.c */
1877extern void *ext4_kvmalloc(size_t size, gfp_t flags);
1878extern void *ext4_kvzalloc(size_t size, gfp_t flags);
1879extern void ext4_kvfree(void *ptr);
1858extern void __ext4_error(struct super_block *, const char *, unsigned int, 1880extern void __ext4_error(struct super_block *, const char *, unsigned int,
1859 const char *, ...) 1881 const char *, ...)
1860 __attribute__ ((format (printf, 4, 5))); 1882 __attribute__ ((format (printf, 4, 5)));
@@ -2067,11 +2089,19 @@ struct ext4_group_info {
2067 * 5 free 8-block regions. */ 2089 * 5 free 8-block regions. */
2068}; 2090};
2069 2091
2070#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 2092#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
2093#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
2071 2094
2072#define EXT4_MB_GRP_NEED_INIT(grp) \ 2095#define EXT4_MB_GRP_NEED_INIT(grp) \
2073 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 2096 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2074 2097
2098#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
2099 (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
2100#define EXT4_MB_GRP_SET_TRIMMED(grp) \
2101 (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
2102#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
2103 (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
2104
2075#define EXT4_MAX_CONTENTION 8 2105#define EXT4_MAX_CONTENTION 8
2076#define EXT4_CONTENTION_THRESHOLD 2 2106#define EXT4_CONTENTION_THRESHOLD 2
2077 2107
@@ -2123,6 +2153,19 @@ static inline void ext4_mark_super_dirty(struct super_block *sb)
2123} 2153}
2124 2154
2125/* 2155/*
2156 * Block validity checking
2157 */
2158#define ext4_check_indirect_blockref(inode, bh) \
2159 ext4_check_blockref(__func__, __LINE__, inode, \
2160 (__le32 *)(bh)->b_data, \
2161 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
2162
2163#define ext4_ind_check_inode(inode) \
2164 ext4_check_blockref(__func__, __LINE__, inode, \
2165 EXT4_I(inode)->i_data, \
2166 EXT4_NDIR_BLOCKS)
2167
2168/*
2126 * Inodes and files operations 2169 * Inodes and files operations
2127 */ 2170 */
2128 2171
@@ -2151,6 +2194,8 @@ extern void ext4_exit_system_zone(void);
2151extern int ext4_data_block_valid(struct ext4_sb_info *sbi, 2194extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
2152 ext4_fsblk_t start_blk, 2195 ext4_fsblk_t start_blk,
2153 unsigned int count); 2196 unsigned int count);
2197extern int ext4_check_blockref(const char *, unsigned int,
2198 struct inode *, __le32 *, unsigned int);
2154 2199
2155/* extents.c */ 2200/* extents.c */
2156extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 2201extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
@@ -2230,6 +2275,10 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2230extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; 2275extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2231extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; 2276extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2232 2277
2278#define EXT4_RESIZING 0
2279extern int ext4_resize_begin(struct super_block *sb);
2280extern void ext4_resize_end(struct super_block *sb);
2281
2233#endif /* __KERNEL__ */ 2282#endif /* __KERNEL__ */
2234 2283
2235#endif /* _EXT4_H */ 2284#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f815cc81e7a..57cf568a98a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -114,12 +114,6 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
114 struct ext4_ext_path *path, 114 struct ext4_ext_path *path,
115 ext4_lblk_t block) 115 ext4_lblk_t block)
116{ 116{
117 struct ext4_inode_info *ei = EXT4_I(inode);
118 ext4_fsblk_t bg_start;
119 ext4_fsblk_t last_block;
120 ext4_grpblk_t colour;
121 ext4_group_t block_group;
122 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
123 int depth; 117 int depth;
124 118
125 if (path) { 119 if (path) {
@@ -161,36 +155,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
161 } 155 }
162 156
163 /* OK. use inode's group */ 157 /* OK. use inode's group */
164 block_group = ei->i_block_group; 158 return ext4_inode_to_goal_block(inode);
165 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
166 /*
167 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
168 * block groups per flexgroup, reserve the first block
169 * group for directories and special files. Regular
170 * files will start at the second block group. This
171 * tends to speed up directory access and improves
172 * fsck times.
173 */
174 block_group &= ~(flex_size-1);
175 if (S_ISREG(inode->i_mode))
176 block_group++;
177 }
178 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
179 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
180
181 /*
182 * If we are doing delayed allocation, we don't need take
183 * colour into account.
184 */
185 if (test_opt(inode->i_sb, DELALLOC))
186 return bg_start;
187
188 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
189 colour = (current->pid % 16) *
190 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
191 else
192 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
193 return bg_start + colour + block;
194} 159}
195 160
196/* 161/*
@@ -776,6 +741,16 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
776 logical, le32_to_cpu(curp->p_idx->ei_block)); 741 logical, le32_to_cpu(curp->p_idx->ei_block));
777 return -EIO; 742 return -EIO;
778 } 743 }
744
745 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
746 >= le16_to_cpu(curp->p_hdr->eh_max))) {
747 EXT4_ERROR_INODE(inode,
748 "eh_entries %d >= eh_max %d!",
749 le16_to_cpu(curp->p_hdr->eh_entries),
750 le16_to_cpu(curp->p_hdr->eh_max));
751 return -EIO;
752 }
753
779 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 754 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
780 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 755 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
781 /* insert after */ 756 /* insert after */
@@ -805,13 +780,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
805 ext4_idx_store_pblock(ix, ptr); 780 ext4_idx_store_pblock(ix, ptr);
806 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 781 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
807 782
808 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
809 > le16_to_cpu(curp->p_hdr->eh_max))) {
810 EXT4_ERROR_INODE(inode,
811 "logical %d == ei_block %d!",
812 logical, le32_to_cpu(curp->p_idx->ei_block));
813 return -EIO;
814 }
815 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { 783 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
816 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); 784 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
817 return -EIO; 785 return -EIO;
@@ -1446,8 +1414,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1446 * ext4_ext_next_leaf_block: 1414 * ext4_ext_next_leaf_block:
1447 * returns first allocated block from next leaf or EXT_MAX_BLOCKS 1415 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1448 */ 1416 */
1449static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode, 1417static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1450 struct ext4_ext_path *path)
1451{ 1418{
1452 int depth; 1419 int depth;
1453 1420
@@ -1757,7 +1724,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1757 goto merge; 1724 goto merge;
1758 } 1725 }
1759 1726
1760repeat:
1761 depth = ext_depth(inode); 1727 depth = ext_depth(inode);
1762 eh = path[depth].p_hdr; 1728 eh = path[depth].p_hdr;
1763 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1729 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
@@ -1765,9 +1731,10 @@ repeat:
1765 1731
1766 /* probably next leaf has space for us? */ 1732 /* probably next leaf has space for us? */
1767 fex = EXT_LAST_EXTENT(eh); 1733 fex = EXT_LAST_EXTENT(eh);
1768 next = ext4_ext_next_leaf_block(inode, path); 1734 next = EXT_MAX_BLOCKS;
1769 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1735 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
1770 && next != EXT_MAX_BLOCKS) { 1736 next = ext4_ext_next_leaf_block(path);
1737 if (next != EXT_MAX_BLOCKS) {
1771 ext_debug("next leaf block - %d\n", next); 1738 ext_debug("next leaf block - %d\n", next);
1772 BUG_ON(npath != NULL); 1739 BUG_ON(npath != NULL);
1773 npath = ext4_ext_find_extent(inode, next, NULL); 1740 npath = ext4_ext_find_extent(inode, next, NULL);
@@ -1779,7 +1746,7 @@ repeat:
1779 ext_debug("next leaf isn't full(%d)\n", 1746 ext_debug("next leaf isn't full(%d)\n",
1780 le16_to_cpu(eh->eh_entries)); 1747 le16_to_cpu(eh->eh_entries));
1781 path = npath; 1748 path = npath;
1782 goto repeat; 1749 goto has_space;
1783 } 1750 }
1784 ext_debug("next leaf has no free space(%d,%d)\n", 1751 ext_debug("next leaf has no free space(%d,%d)\n",
1785 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1752 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
@@ -1839,7 +1806,7 @@ has_space:
1839 ext4_ext_pblock(newext), 1806 ext4_ext_pblock(newext),
1840 ext4_ext_is_uninitialized(newext), 1807 ext4_ext_is_uninitialized(newext),
1841 ext4_ext_get_actual_len(newext), 1808 ext4_ext_get_actual_len(newext),
1842 nearex, len, nearex + 1, nearex + 2); 1809 nearex, len, nearex, nearex + 1);
1843 memmove(nearex + 1, nearex, len); 1810 memmove(nearex + 1, nearex, len);
1844 path[depth].p_ext = nearex; 1811 path[depth].p_ext = nearex;
1845 } 1812 }
@@ -2052,7 +2019,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2052} 2019}
2053 2020
2054/* 2021/*
2055 * ext4_ext_in_cache() 2022 * ext4_ext_check_cache()
2056 * Checks to see if the given block is in the cache. 2023 * Checks to see if the given block is in the cache.
2057 * If it is, the cached extent is stored in the given 2024 * If it is, the cached extent is stored in the given
2058 * cache extent pointer. If the cached extent is a hole, 2025 * cache extent pointer. If the cached extent is a hole,
@@ -2134,8 +2101,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2134/* 2101/*
2135 * ext4_ext_rm_idx: 2102 * ext4_ext_rm_idx:
2136 * removes index from the index block. 2103 * removes index from the index block.
2137 * It's used in truncate case only, thus all requests are for
2138 * last index in the block only.
2139 */ 2104 */
2140static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 2105static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2141 struct ext4_ext_path *path) 2106 struct ext4_ext_path *path)
@@ -2153,6 +2118,13 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2153 err = ext4_ext_get_access(handle, inode, path); 2118 err = ext4_ext_get_access(handle, inode, path);
2154 if (err) 2119 if (err)
2155 return err; 2120 return err;
2121
2122 if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2123 int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2124 len *= sizeof(struct ext4_extent_idx);
2125 memmove(path->p_idx, path->p_idx + 1, len);
2126 }
2127
2156 le16_add_cpu(&path->p_hdr->eh_entries, -1); 2128 le16_add_cpu(&path->p_hdr->eh_entries, -1);
2157 err = ext4_ext_dirty(handle, inode, path); 2129 err = ext4_ext_dirty(handle, inode, path);
2158 if (err) 2130 if (err)
@@ -2534,8 +2506,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
2534 return 1; 2506 return 1;
2535} 2507}
2536 2508
2537static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, 2509static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2538 ext4_lblk_t end)
2539{ 2510{
2540 struct super_block *sb = inode->i_sb; 2511 struct super_block *sb = inode->i_sb;
2541 int depth = ext_depth(inode); 2512 int depth = ext_depth(inode);
@@ -2575,7 +2546,7 @@ again:
2575 if (i == depth) { 2546 if (i == depth) {
2576 /* this is leaf block */ 2547 /* this is leaf block */
2577 err = ext4_ext_rm_leaf(handle, inode, path, 2548 err = ext4_ext_rm_leaf(handle, inode, path,
2578 start, end); 2549 start, EXT_MAX_BLOCKS - 1);
2579 /* root level has p_bh == NULL, brelse() eats this */ 2550 /* root level has p_bh == NULL, brelse() eats this */
2580 brelse(path[i].p_bh); 2551 brelse(path[i].p_bh);
2581 path[i].p_bh = NULL; 2552 path[i].p_bh = NULL;
@@ -3107,12 +3078,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3107 struct ext4_ext_path *path) 3078 struct ext4_ext_path *path)
3108{ 3079{
3109 struct ext4_extent *ex; 3080 struct ext4_extent *ex;
3110 struct ext4_extent_header *eh;
3111 int depth; 3081 int depth;
3112 int err = 0; 3082 int err = 0;
3113 3083
3114 depth = ext_depth(inode); 3084 depth = ext_depth(inode);
3115 eh = path[depth].p_hdr;
3116 ex = path[depth].p_ext; 3085 ex = path[depth].p_ext;
3117 3086
3118 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" 3087 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
@@ -3357,8 +3326,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3357 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3326 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3358 3327
3359 /* check in cache */ 3328 /* check in cache */
3360 if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && 3329 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3361 ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { 3330 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3362 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3331 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3363 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3332 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3364 /* 3333 /*
@@ -3497,8 +3466,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3497 3466
3498 ext4_ext_mark_uninitialized(ex); 3467 ext4_ext_mark_uninitialized(ex);
3499 3468
3500 err = ext4_ext_remove_space(inode, map->m_lblk, 3469 ext4_ext_invalidate_cache(inode);
3501 map->m_lblk + punched_out); 3470
3471 err = ext4_ext_rm_leaf(handle, inode, path,
3472 map->m_lblk, map->m_lblk + punched_out);
3473
3474 if (!err && path->p_hdr->eh_entries == 0) {
3475 /*
3476 * Punch hole freed all of this sub tree,
3477 * so we need to correct eh_depth
3478 */
3479 err = ext4_ext_get_access(handle, inode, path);
3480 if (err == 0) {
3481 ext_inode_hdr(inode)->eh_depth = 0;
3482 ext_inode_hdr(inode)->eh_max =
3483 cpu_to_le16(ext4_ext_space_root(
3484 inode, 0));
3485
3486 err = ext4_ext_dirty(
3487 handle, inode, path);
3488 }
3489 }
3502 3490
3503 goto out2; 3491 goto out2;
3504 } 3492 }
@@ -3596,17 +3584,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3596 } 3584 }
3597 3585
3598 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 3586 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
3599 if (err) 3587 if (!err)
3600 goto out2; 3588 err = ext4_ext_insert_extent(handle, inode, path,
3601 3589 &newex, flags);
3602 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3603 if (err) { 3590 if (err) {
3591 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
3592 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
3604 /* free data blocks we just allocated */ 3593 /* free data blocks we just allocated */
3605 /* not a good idea to call discard here directly, 3594 /* not a good idea to call discard here directly,
3606 * but otherwise we'd need to call it every free() */ 3595 * but otherwise we'd need to call it every free() */
3607 ext4_discard_preallocations(inode); 3596 ext4_discard_preallocations(inode);
3608 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), 3597 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
3609 ext4_ext_get_actual_len(&newex), 0); 3598 ext4_ext_get_actual_len(&newex), fb_flags);
3610 goto out2; 3599 goto out2;
3611 } 3600 }
3612 3601
@@ -3699,7 +3688,7 @@ void ext4_ext_truncate(struct inode *inode)
3699 3688
3700 last_block = (inode->i_size + sb->s_blocksize - 1) 3689 last_block = (inode->i_size + sb->s_blocksize - 1)
3701 >> EXT4_BLOCK_SIZE_BITS(sb); 3690 >> EXT4_BLOCK_SIZE_BITS(sb);
3702 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 3691 err = ext4_ext_remove_space(inode, last_block);
3703 3692
3704 /* In a multi-transaction truncate, we only make the final 3693 /* In a multi-transaction truncate, we only make the final
3705 * transaction synchronous. 3694 * transaction synchronous.
@@ -3835,7 +3824,7 @@ retry:
3835 blkbits) >> blkbits)) 3824 blkbits) >> blkbits))
3836 new_size = offset + len; 3825 new_size = offset + len;
3837 else 3826 else
3838 new_size = (map.m_lblk + ret) << blkbits; 3827 new_size = ((loff_t) map.m_lblk + ret) << blkbits;
3839 3828
3840 ext4_falloc_update_inode(inode, mode, new_size, 3829 ext4_falloc_update_inode(inode, mode, new_size,
3841 (map.m_flags & EXT4_MAP_NEW)); 3830 (map.m_flags & EXT4_MAP_NEW));
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index da3bed3e0c2..036f78f7a1e 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -129,15 +129,30 @@ static int ext4_sync_parent(struct inode *inode)
129{ 129{
130 struct writeback_control wbc; 130 struct writeback_control wbc;
131 struct dentry *dentry = NULL; 131 struct dentry *dentry = NULL;
132 struct inode *next;
132 int ret = 0; 133 int ret = 0;
133 134
134 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 135 if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
136 return 0;
137 inode = igrab(inode);
138 while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
135 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 139 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
136 dentry = list_entry(inode->i_dentry.next, 140 dentry = NULL;
137 struct dentry, d_alias); 141 spin_lock(&inode->i_lock);
138 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 142 if (!list_empty(&inode->i_dentry)) {
143 dentry = list_first_entry(&inode->i_dentry,
144 struct dentry, d_alias);
145 dget(dentry);
146 }
147 spin_unlock(&inode->i_lock);
148 if (!dentry)
139 break; 149 break;
140 inode = dentry->d_parent->d_inode; 150 next = igrab(dentry->d_parent->d_inode);
151 dput(dentry);
152 if (!next)
153 break;
154 iput(inode);
155 inode = next;
141 ret = sync_mapping_buffers(inode->i_mapping); 156 ret = sync_mapping_buffers(inode->i_mapping);
142 if (ret) 157 if (ret)
143 break; 158 break;
@@ -148,6 +163,7 @@ static int ext4_sync_parent(struct inode *inode)
148 if (ret) 163 if (ret)
149 break; 164 break;
150 } 165 }
166 iput(inode);
151 return ret; 167 return ret;
152} 168}
153 169
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 21bb2f61e50..9c63f273b55 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1287,7 +1287,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1287 group, used_blks, 1287 group, used_blks,
1288 ext4_itable_unused_count(sb, gdp)); 1288 ext4_itable_unused_count(sb, gdp));
1289 ret = 1; 1289 ret = 1;
1290 goto out; 1290 goto err_out;
1291 } 1291 }
1292 1292
1293 blk = ext4_inode_table(sb, gdp) + used_blks; 1293 blk = ext4_inode_table(sb, gdp) + used_blks;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
new file mode 100644
index 00000000000..b8602cde5b5
--- /dev/null
+++ b/fs/ext4/indirect.c
@@ -0,0 +1,1482 @@
1/*
2 * linux/fs/ext4/indirect.c
3 *
4 * from
5 *
6 * linux/fs/ext4/inode.c
7 *
8 * Copyright (C) 1992, 1993, 1994, 1995
9 * Remy Card (card@masi.ibp.fr)
10 * Laboratoire MASI - Institut Blaise Pascal
11 * Universite Pierre et Marie Curie (Paris VI)
12 *
13 * from
14 *
15 * linux/fs/minix/inode.c
16 *
17 * Copyright (C) 1991, 1992 Linus Torvalds
18 *
19 * Goal-directed block allocation by Stephen Tweedie
20 * (sct@redhat.com), 1993, 1998
21 */
22
23#include <linux/module.h>
24#include "ext4_jbd2.h"
25#include "truncate.h"
26
27#include <trace/events/ext4.h>
28
29typedef struct {
30 __le32 *p;
31 __le32 key;
32 struct buffer_head *bh;
33} Indirect;
34
35static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
36{
37 p->key = *(p->p = v);
38 p->bh = bh;
39}
40
41/**
42 * ext4_block_to_path - parse the block number into array of offsets
43 * @inode: inode in question (we are only interested in its superblock)
44 * @i_block: block number to be parsed
45 * @offsets: array to store the offsets in
46 * @boundary: set this non-zero if the referred-to block is likely to be
47 * followed (on disk) by an indirect block.
48 *
49 * To store the locations of file's data ext4 uses a data structure common
50 * for UNIX filesystems - tree of pointers anchored in the inode, with
51 * data blocks at leaves and indirect blocks in intermediate nodes.
52 * This function translates the block number into path in that tree -
53 * return value is the path length and @offsets[n] is the offset of
54 * pointer to (n+1)th node in the nth one. If @block is out of range
55 * (negative or too large) warning is printed and zero returned.
56 *
57 * Note: function doesn't find node addresses, so no IO is needed. All
58 * we need to know is the capacity of indirect blocks (taken from the
59 * inode->i_sb).
60 */
61
62/*
63 * Portability note: the last comparison (check that we fit into triple
64 * indirect block) is spelled differently, because otherwise on an
65 * architecture with 32-bit longs and 8Kb pages we might get into trouble
66 * if our filesystem had 8Kb blocks. We might use long long, but that would
67 * kill us on x86. Oh, well, at least the sign propagation does not matter -
68 * i_block would have to be negative in the very beginning, so we would not
69 * get there at all.
70 */
71
72static int ext4_block_to_path(struct inode *inode,
73 ext4_lblk_t i_block,
74 ext4_lblk_t offsets[4], int *boundary)
75{
76 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
77 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
78 const long direct_blocks = EXT4_NDIR_BLOCKS,
79 indirect_blocks = ptrs,
80 double_blocks = (1 << (ptrs_bits * 2));
81 int n = 0;
82 int final = 0;
83
84 if (i_block < direct_blocks) {
85 offsets[n++] = i_block;
86 final = direct_blocks;
87 } else if ((i_block -= direct_blocks) < indirect_blocks) {
88 offsets[n++] = EXT4_IND_BLOCK;
89 offsets[n++] = i_block;
90 final = ptrs;
91 } else if ((i_block -= indirect_blocks) < double_blocks) {
92 offsets[n++] = EXT4_DIND_BLOCK;
93 offsets[n++] = i_block >> ptrs_bits;
94 offsets[n++] = i_block & (ptrs - 1);
95 final = ptrs;
96 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
97 offsets[n++] = EXT4_TIND_BLOCK;
98 offsets[n++] = i_block >> (ptrs_bits * 2);
99 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
100 offsets[n++] = i_block & (ptrs - 1);
101 final = ptrs;
102 } else {
103 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
104 i_block + direct_blocks +
105 indirect_blocks + double_blocks, inode->i_ino);
106 }
107 if (boundary)
108 *boundary = final - 1 - (i_block & (ptrs - 1));
109 return n;
110}
111
112/**
113 * ext4_get_branch - read the chain of indirect blocks leading to data
114 * @inode: inode in question
115 * @depth: depth of the chain (1 - direct pointer, etc.)
116 * @offsets: offsets of pointers in inode/indirect blocks
117 * @chain: place to store the result
118 * @err: here we store the error value
119 *
120 * Function fills the array of triples <key, p, bh> and returns %NULL
121 * if everything went OK or the pointer to the last filled triple
122 * (incomplete one) otherwise. Upon the return chain[i].key contains
123 * the number of (i+1)-th block in the chain (as it is stored in memory,
124 * i.e. little-endian 32-bit), chain[i].p contains the address of that
125 * number (it points into struct inode for i==0 and into the bh->b_data
126 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
127 * block for i>0 and NULL for i==0. In other words, it holds the block
128 * numbers of the chain, addresses they were taken from (and where we can
129 * verify that chain did not change) and buffer_heads hosting these
130 * numbers.
131 *
132 * Function stops when it stumbles upon zero pointer (absent block)
133 * (pointer to last triple returned, *@err == 0)
134 * or when it gets an IO error reading an indirect block
135 * (ditto, *@err == -EIO)
136 * or when it reads all @depth-1 indirect blocks successfully and finds
137 * the whole chain, all way to the data (returns %NULL, *err == 0).
138 *
139 * Need to be called with
140 * down_read(&EXT4_I(inode)->i_data_sem)
141 */
142static Indirect *ext4_get_branch(struct inode *inode, int depth,
143 ext4_lblk_t *offsets,
144 Indirect chain[4], int *err)
145{
146 struct super_block *sb = inode->i_sb;
147 Indirect *p = chain;
148 struct buffer_head *bh;
149
150 *err = 0;
151 /* i_data is not going away, no lock needed */
152 add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
153 if (!p->key)
154 goto no_block;
155 while (--depth) {
156 bh = sb_getblk(sb, le32_to_cpu(p->key));
157 if (unlikely(!bh))
158 goto failure;
159
160 if (!bh_uptodate_or_lock(bh)) {
161 if (bh_submit_read(bh) < 0) {
162 put_bh(bh);
163 goto failure;
164 }
165 /* validate block references */
166 if (ext4_check_indirect_blockref(inode, bh)) {
167 put_bh(bh);
168 goto failure;
169 }
170 }
171
172 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
173 /* Reader: end */
174 if (!p->key)
175 goto no_block;
176 }
177 return NULL;
178
179failure:
180 *err = -EIO;
181no_block:
182 return p;
183}
184
185/**
186 * ext4_find_near - find a place for allocation with sufficient locality
187 * @inode: owner
188 * @ind: descriptor of indirect block.
189 *
190 * This function returns the preferred place for block allocation.
191 * It is used when heuristic for sequential allocation fails.
192 * Rules are:
193 * + if there is a block to the left of our position - allocate near it.
194 * + if pointer will live in indirect block - allocate near that block.
195 * + if pointer will live in inode - allocate in the same
196 * cylinder group.
197 *
198 * In the latter case we colour the starting block by the callers PID to
199 * prevent it from clashing with concurrent allocations for a different inode
200 * in the same block group. The PID is used here so that functionally related
201 * files will be close-by on-disk.
202 *
203 * Caller must make sure that @ind is valid and will stay that way.
204 */
205static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
206{
207 struct ext4_inode_info *ei = EXT4_I(inode);
208 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
209 __le32 *p;
210
211 /* Try to find previous block */
212 for (p = ind->p - 1; p >= start; p--) {
213 if (*p)
214 return le32_to_cpu(*p);
215 }
216
217 /* No such thing, so let's try location of indirect block */
218 if (ind->bh)
219 return ind->bh->b_blocknr;
220
221 /*
222 * It is going to be referred to from the inode itself? OK, just put it
223 * into the same cylinder group then.
224 */
225 return ext4_inode_to_goal_block(inode);
226}
227
228/**
229 * ext4_find_goal - find a preferred place for allocation.
230 * @inode: owner
231 * @block: block we want
232 * @partial: pointer to the last triple within a chain
233 *
234 * Normally this function find the preferred place for block allocation,
235 * returns it.
236 * Because this is only used for non-extent files, we limit the block nr
237 * to 32 bits.
238 */
239static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
240 Indirect *partial)
241{
242 ext4_fsblk_t goal;
243
244 /*
245 * XXX need to get goal block from mballoc's data structures
246 */
247
248 goal = ext4_find_near(inode, partial);
249 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
250 return goal;
251}
252
253/**
254 * ext4_blks_to_allocate - Look up the block map and count the number
255 * of direct blocks need to be allocated for the given branch.
256 *
257 * @branch: chain of indirect blocks
258 * @k: number of blocks need for indirect blocks
259 * @blks: number of data blocks to be mapped.
260 * @blocks_to_boundary: the offset in the indirect block
261 *
262 * return the total number of blocks to be allocate, including the
263 * direct and indirect blocks.
264 */
265static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
266 int blocks_to_boundary)
267{
268 unsigned int count = 0;
269
270 /*
271 * Simple case, [t,d]Indirect block(s) has not allocated yet
272 * then it's clear blocks on that path have not allocated
273 */
274 if (k > 0) {
275 /* right now we don't handle cross boundary allocation */
276 if (blks < blocks_to_boundary + 1)
277 count += blks;
278 else
279 count += blocks_to_boundary + 1;
280 return count;
281 }
282
283 count++;
284 while (count < blks && count <= blocks_to_boundary &&
285 le32_to_cpu(*(branch[0].p + count)) == 0) {
286 count++;
287 }
288 return count;
289}
290
291/**
292 * ext4_alloc_blocks: multiple allocate blocks needed for a branch
293 * @handle: handle for this transaction
294 * @inode: inode which needs allocated blocks
295 * @iblock: the logical block to start allocated at
296 * @goal: preferred physical block of allocation
297 * @indirect_blks: the number of blocks need to allocate for indirect
298 * blocks
299 * @blks: number of desired blocks
300 * @new_blocks: on return it will store the new block numbers for
301 * the indirect blocks(if needed) and the first direct block,
302 * @err: on return it will store the error code
303 *
304 * This function will return the number of blocks allocated as
305 * requested by the passed-in parameters.
306 */
307static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
308 ext4_lblk_t iblock, ext4_fsblk_t goal,
309 int indirect_blks, int blks,
310 ext4_fsblk_t new_blocks[4], int *err)
311{
312 struct ext4_allocation_request ar;
313 int target, i;
314 unsigned long count = 0, blk_allocated = 0;
315 int index = 0;
316 ext4_fsblk_t current_block = 0;
317 int ret = 0;
318
319 /*
320 * Here we try to allocate the requested multiple blocks at once,
321 * on a best-effort basis.
322 * To build a branch, we should allocate blocks for
323 * the indirect blocks(if not allocated yet), and at least
324 * the first direct block of this branch. That's the
325 * minimum number of blocks need to allocate(required)
326 */
327 /* first we try to allocate the indirect blocks */
328 target = indirect_blks;
329 while (target > 0) {
330 count = target;
331 /* allocating blocks for indirect blocks and direct blocks */
332 current_block = ext4_new_meta_blocks(handle, inode, goal,
333 0, &count, err);
334 if (*err)
335 goto failed_out;
336
337 if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
338 EXT4_ERROR_INODE(inode,
339 "current_block %llu + count %lu > %d!",
340 current_block, count,
341 EXT4_MAX_BLOCK_FILE_PHYS);
342 *err = -EIO;
343 goto failed_out;
344 }
345
346 target -= count;
347 /* allocate blocks for indirect blocks */
348 while (index < indirect_blks && count) {
349 new_blocks[index++] = current_block++;
350 count--;
351 }
352 if (count > 0) {
353 /*
354 * save the new block number
355 * for the first direct block
356 */
357 new_blocks[index] = current_block;
358 printk(KERN_INFO "%s returned more blocks than "
359 "requested\n", __func__);
360 WARN_ON(1);
361 break;
362 }
363 }
364
365 target = blks - count ;
366 blk_allocated = count;
367 if (!target)
368 goto allocated;
369 /* Now allocate data blocks */
370 memset(&ar, 0, sizeof(ar));
371 ar.inode = inode;
372 ar.goal = goal;
373 ar.len = target;
374 ar.logical = iblock;
375 if (S_ISREG(inode->i_mode))
376 /* enable in-core preallocation only for regular files */
377 ar.flags = EXT4_MB_HINT_DATA;
378
379 current_block = ext4_mb_new_blocks(handle, &ar, err);
380 if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
381 EXT4_ERROR_INODE(inode,
382 "current_block %llu + ar.len %d > %d!",
383 current_block, ar.len,
384 EXT4_MAX_BLOCK_FILE_PHYS);
385 *err = -EIO;
386 goto failed_out;
387 }
388
389 if (*err && (target == blks)) {
390 /*
391 * if the allocation failed and we didn't allocate
392 * any blocks before
393 */
394 goto failed_out;
395 }
396 if (!*err) {
397 if (target == blks) {
398 /*
399 * save the new block number
400 * for the first direct block
401 */
402 new_blocks[index] = current_block;
403 }
404 blk_allocated += ar.len;
405 }
406allocated:
407 /* total number of blocks allocated for direct blocks */
408 ret = blk_allocated;
409 *err = 0;
410 return ret;
411failed_out:
412 for (i = 0; i < index; i++)
413 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
414 return ret;
415}
416
417/**
418 * ext4_alloc_branch - allocate and set up a chain of blocks.
419 * @handle: handle for this transaction
420 * @inode: owner
421 * @indirect_blks: number of allocated indirect blocks
422 * @blks: number of allocated direct blocks
423 * @goal: preferred place for allocation
424 * @offsets: offsets (in the blocks) to store the pointers to next.
425 * @branch: place to store the chain in.
426 *
427 * This function allocates blocks, zeroes out all but the last one,
428 * links them into chain and (if we are synchronous) writes them to disk.
429 * In other words, it prepares a branch that can be spliced onto the
430 * inode. It stores the information about that chain in the branch[], in
431 * the same format as ext4_get_branch() would do. We are calling it after
432 * we had read the existing part of chain and partial points to the last
433 * triple of that (one with zero ->key). Upon the exit we have the same
434 * picture as after the successful ext4_get_block(), except that in one
435 * place chain is disconnected - *branch->p is still zero (we did not
436 * set the last link), but branch->key contains the number that should
437 * be placed into *branch->p to fill that gap.
438 *
439 * If allocation fails we free all blocks we've allocated (and forget
440 * their buffer_heads) and return the error value the from failed
441 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
442 * as described above and return 0.
443 */
444static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
445 ext4_lblk_t iblock, int indirect_blks,
446 int *blks, ext4_fsblk_t goal,
447 ext4_lblk_t *offsets, Indirect *branch)
448{
449 int blocksize = inode->i_sb->s_blocksize;
450 int i, n = 0;
451 int err = 0;
452 struct buffer_head *bh;
453 int num;
454 ext4_fsblk_t new_blocks[4];
455 ext4_fsblk_t current_block;
456
457 num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
458 *blks, new_blocks, &err);
459 if (err)
460 return err;
461
462 branch[0].key = cpu_to_le32(new_blocks[0]);
463 /*
464 * metadata blocks and data blocks are allocated.
465 */
466 for (n = 1; n <= indirect_blks; n++) {
467 /*
468 * Get buffer_head for parent block, zero it out
469 * and set the pointer to new one, then send
470 * parent to disk.
471 */
472 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
473 if (unlikely(!bh)) {
474 err = -EIO;
475 goto failed;
476 }
477
478 branch[n].bh = bh;
479 lock_buffer(bh);
480 BUFFER_TRACE(bh, "call get_create_access");
481 err = ext4_journal_get_create_access(handle, bh);
482 if (err) {
483 /* Don't brelse(bh) here; it's done in
484 * ext4_journal_forget() below */
485 unlock_buffer(bh);
486 goto failed;
487 }
488
489 memset(bh->b_data, 0, blocksize);
490 branch[n].p = (__le32 *) bh->b_data + offsets[n];
491 branch[n].key = cpu_to_le32(new_blocks[n]);
492 *branch[n].p = branch[n].key;
493 if (n == indirect_blks) {
494 current_block = new_blocks[n];
495 /*
496 * End of chain, update the last new metablock of
497 * the chain to point to the new allocated
498 * data blocks numbers
499 */
500 for (i = 1; i < num; i++)
501 *(branch[n].p + i) = cpu_to_le32(++current_block);
502 }
503 BUFFER_TRACE(bh, "marking uptodate");
504 set_buffer_uptodate(bh);
505 unlock_buffer(bh);
506
507 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
508 err = ext4_handle_dirty_metadata(handle, inode, bh);
509 if (err)
510 goto failed;
511 }
512 *blks = num;
513 return err;
514failed:
515 /* Allocation failed, free what we already allocated */
516 ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
517 for (i = 1; i <= n ; i++) {
518 /*
519 * branch[i].bh is newly allocated, so there is no
520 * need to revoke the block, which is why we don't
521 * need to set EXT4_FREE_BLOCKS_METADATA.
522 */
523 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
524 EXT4_FREE_BLOCKS_FORGET);
525 }
526 for (i = n+1; i < indirect_blks; i++)
527 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
528
529 ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
530
531 return err;
532}
533
534/**
535 * ext4_splice_branch - splice the allocated branch onto inode.
536 * @handle: handle for this transaction
537 * @inode: owner
538 * @block: (logical) number of block we are adding
539 * @chain: chain of indirect blocks (with a missing link - see
540 * ext4_alloc_branch)
541 * @where: location of missing link
542 * @num: number of indirect blocks we are adding
543 * @blks: number of direct blocks we are adding
544 *
545 * This function fills the missing link and does all housekeeping needed in
546 * inode (->i_blocks, etc.). In case of success we end up with the full
547 * chain to new block and return 0.
548 */
549static int ext4_splice_branch(handle_t *handle, struct inode *inode,
550 ext4_lblk_t block, Indirect *where, int num,
551 int blks)
552{
553 int i;
554 int err = 0;
555 ext4_fsblk_t current_block;
556
557 /*
558 * If we're splicing into a [td]indirect block (as opposed to the
559 * inode) then we need to get write access to the [td]indirect block
560 * before the splice.
561 */
562 if (where->bh) {
563 BUFFER_TRACE(where->bh, "get_write_access");
564 err = ext4_journal_get_write_access(handle, where->bh);
565 if (err)
566 goto err_out;
567 }
568 /* That's it */
569
570 *where->p = where->key;
571
572 /*
573 * Update the host buffer_head or inode to point to more just allocated
574 * direct blocks blocks
575 */
576 if (num == 0 && blks > 1) {
577 current_block = le32_to_cpu(where->key) + 1;
578 for (i = 1; i < blks; i++)
579 *(where->p + i) = cpu_to_le32(current_block++);
580 }
581
582 /* We are done with atomic stuff, now do the rest of housekeeping */
583 /* had we spliced it onto indirect block? */
584 if (where->bh) {
585 /*
586 * If we spliced it onto an indirect block, we haven't
587 * altered the inode. Note however that if it is being spliced
588 * onto an indirect block at the very end of the file (the
589 * file is growing) then we *will* alter the inode to reflect
590 * the new i_size. But that is not done here - it is done in
591 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
592 */
593 jbd_debug(5, "splicing indirect only\n");
594 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
595 err = ext4_handle_dirty_metadata(handle, inode, where->bh);
596 if (err)
597 goto err_out;
598 } else {
599 /*
600 * OK, we spliced it into the inode itself on a direct block.
601 */
602 ext4_mark_inode_dirty(handle, inode);
603 jbd_debug(5, "splicing direct\n");
604 }
605 return err;
606
607err_out:
608 for (i = 1; i <= num; i++) {
609 /*
610 * branch[i].bh is newly allocated, so there is no
611 * need to revoke the block, which is why we don't
612 * need to set EXT4_FREE_BLOCKS_METADATA.
613 */
614 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
615 EXT4_FREE_BLOCKS_FORGET);
616 }
617 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
618 blks, 0);
619
620 return err;
621}
622
623/*
624 * The ext4_ind_map_blocks() function handles non-extents inodes
625 * (i.e., using the traditional indirect/double-indirect i_blocks
626 * scheme) for ext4_map_blocks().
627 *
628 * Allocation strategy is simple: if we have to allocate something, we will
629 * have to go the whole way to leaf. So let's do it before attaching anything
630 * to tree, set linkage between the newborn blocks, write them if sync is
631 * required, recheck the path, free and repeat if check fails, otherwise
632 * set the last missing link (that will protect us from any truncate-generated
633 * removals - all blocks on the path are immune now) and possibly force the
634 * write on the parent block.
635 * That has a nice additional property: no special recovery from the failed
636 * allocations is needed - we simply release blocks and do not touch anything
637 * reachable from inode.
638 *
639 * `handle' can be NULL if create == 0.
640 *
641 * return > 0, # of blocks mapped or allocated.
642 * return = 0, if plain lookup failed.
643 * return < 0, error case.
644 *
645 * The ext4_ind_get_blocks() function should be called with
646 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
647 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
648 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
649 * blocks.
650 */
651int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
652 struct ext4_map_blocks *map,
653 int flags)
654{
655 int err = -EIO;
656 ext4_lblk_t offsets[4];
657 Indirect chain[4];
658 Indirect *partial;
659 ext4_fsblk_t goal;
660 int indirect_blks;
661 int blocks_to_boundary = 0;
662 int depth;
663 int count = 0;
664 ext4_fsblk_t first_block = 0;
665
666 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
667 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
668 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
669 depth = ext4_block_to_path(inode, map->m_lblk, offsets,
670 &blocks_to_boundary);
671
672 if (depth == 0)
673 goto out;
674
675 partial = ext4_get_branch(inode, depth, offsets, chain, &err);
676
677 /* Simplest case - block found, no allocation needed */
678 if (!partial) {
679 first_block = le32_to_cpu(chain[depth - 1].key);
680 count++;
681 /*map more blocks*/
682 while (count < map->m_len && count <= blocks_to_boundary) {
683 ext4_fsblk_t blk;
684
685 blk = le32_to_cpu(*(chain[depth-1].p + count));
686
687 if (blk == first_block + count)
688 count++;
689 else
690 break;
691 }
692 goto got_it;
693 }
694
695 /* Next simple case - plain lookup or failed read of indirect block */
696 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
697 goto cleanup;
698
699 /*
700 * Okay, we need to do block allocation.
701 */
702 goal = ext4_find_goal(inode, map->m_lblk, partial);
703
704 /* the number of blocks need to allocate for [d,t]indirect blocks */
705 indirect_blks = (chain + depth) - partial - 1;
706
707 /*
708 * Next look up the indirect map to count the totoal number of
709 * direct blocks to allocate for this branch.
710 */
711 count = ext4_blks_to_allocate(partial, indirect_blks,
712 map->m_len, blocks_to_boundary);
713 /*
714 * Block out ext4_truncate while we alter the tree
715 */
716 err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
717 &count, goal,
718 offsets + (partial - chain), partial);
719
720 /*
721 * The ext4_splice_branch call will free and forget any buffers
722 * on the new chain if there is a failure, but that risks using
723 * up transaction credits, especially for bitmaps where the
724 * credits cannot be returned. Can we handle this somehow? We
725 * may need to return -EAGAIN upwards in the worst case. --sct
726 */
727 if (!err)
728 err = ext4_splice_branch(handle, inode, map->m_lblk,
729 partial, indirect_blks, count);
730 if (err)
731 goto cleanup;
732
733 map->m_flags |= EXT4_MAP_NEW;
734
735 ext4_update_inode_fsync_trans(handle, inode, 1);
736got_it:
737 map->m_flags |= EXT4_MAP_MAPPED;
738 map->m_pblk = le32_to_cpu(chain[depth-1].key);
739 map->m_len = count;
740 if (count > blocks_to_boundary)
741 map->m_flags |= EXT4_MAP_BOUNDARY;
742 err = count;
743 /* Clean up and exit */
744 partial = chain + depth - 1; /* the whole chain */
745cleanup:
746 while (partial > chain) {
747 BUFFER_TRACE(partial->bh, "call brelse");
748 brelse(partial->bh);
749 partial--;
750 }
751out:
752 trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
753 map->m_pblk, map->m_len, err);
754 return err;
755}
756
757/*
758 * O_DIRECT for ext3 (or indirect map) based files
759 *
760 * If the O_DIRECT write will extend the file then add this inode to the
761 * orphan list. So recovery will truncate it back to the original size
762 * if the machine crashes during the write.
763 *
764 * If the O_DIRECT write is intantiating holes inside i_size and the machine
765 * crashes then stale disk data _may_ be exposed inside the file. But current
766 * VFS code falls back into buffered path in that case so we are safe.
767 */
768ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
769 const struct iovec *iov, loff_t offset,
770 unsigned long nr_segs)
771{
772 struct file *file = iocb->ki_filp;
773 struct inode *inode = file->f_mapping->host;
774 struct ext4_inode_info *ei = EXT4_I(inode);
775 handle_t *handle;
776 ssize_t ret;
777 int orphan = 0;
778 size_t count = iov_length(iov, nr_segs);
779 int retries = 0;
780
781 if (rw == WRITE) {
782 loff_t final_size = offset + count;
783
784 if (final_size > inode->i_size) {
785 /* Credits for sb + inode write */
786 handle = ext4_journal_start(inode, 2);
787 if (IS_ERR(handle)) {
788 ret = PTR_ERR(handle);
789 goto out;
790 }
791 ret = ext4_orphan_add(handle, inode);
792 if (ret) {
793 ext4_journal_stop(handle);
794 goto out;
795 }
796 orphan = 1;
797 ei->i_disksize = inode->i_size;
798 ext4_journal_stop(handle);
799 }
800 }
801
802retry:
803 if (rw == READ && ext4_should_dioread_nolock(inode))
804 ret = __blockdev_direct_IO(rw, iocb, inode,
805 inode->i_sb->s_bdev, iov,
806 offset, nr_segs,
807 ext4_get_block, NULL, NULL, 0);
808 else {
809 ret = blockdev_direct_IO(rw, iocb, inode, iov,
810 offset, nr_segs, ext4_get_block);
811
812 if (unlikely((rw & WRITE) && ret < 0)) {
813 loff_t isize = i_size_read(inode);
814 loff_t end = offset + iov_length(iov, nr_segs);
815
816 if (end > isize)
817 ext4_truncate_failed_write(inode);
818 }
819 }
820 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
821 goto retry;
822
823 if (orphan) {
824 int err;
825
826 /* Credits for sb + inode write */
827 handle = ext4_journal_start(inode, 2);
828 if (IS_ERR(handle)) {
829 /* This is really bad luck. We've written the data
830 * but cannot extend i_size. Bail out and pretend
831 * the write failed... */
832 ret = PTR_ERR(handle);
833 if (inode->i_nlink)
834 ext4_orphan_del(NULL, inode);
835
836 goto out;
837 }
838 if (inode->i_nlink)
839 ext4_orphan_del(handle, inode);
840 if (ret > 0) {
841 loff_t end = offset + ret;
842 if (end > inode->i_size) {
843 ei->i_disksize = end;
844 i_size_write(inode, end);
845 /*
846 * We're going to return a positive `ret'
847 * here due to non-zero-length I/O, so there's
848 * no way of reporting error returns from
849 * ext4_mark_inode_dirty() to userspace. So
850 * ignore it.
851 */
852 ext4_mark_inode_dirty(handle, inode);
853 }
854 }
855 err = ext4_journal_stop(handle);
856 if (ret == 0)
857 ret = err;
858 }
859out:
860 return ret;
861}
862
863/*
864 * Calculate the number of metadata blocks need to reserve
865 * to allocate a new block at @lblocks for non extent file based file
866 */
867int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
868{
869 struct ext4_inode_info *ei = EXT4_I(inode);
870 sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
871 int blk_bits;
872
873 if (lblock < EXT4_NDIR_BLOCKS)
874 return 0;
875
876 lblock -= EXT4_NDIR_BLOCKS;
877
878 if (ei->i_da_metadata_calc_len &&
879 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
880 ei->i_da_metadata_calc_len++;
881 return 0;
882 }
883 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
884 ei->i_da_metadata_calc_len = 1;
885 blk_bits = order_base_2(lblock);
886 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
887}
888
889int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
890{
891 int indirects;
892
893 /* if nrblocks are contiguous */
894 if (chunk) {
895 /*
896 * With N contiguous data blocks, we need at most
897 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
898 * 2 dindirect blocks, and 1 tindirect block
899 */
900 return DIV_ROUND_UP(nrblocks,
901 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
902 }
903 /*
904 * if nrblocks are not contiguous, worse case, each block touch
905 * a indirect block, and each indirect block touch a double indirect
906 * block, plus a triple indirect block
907 */
908 indirects = nrblocks * 2 + 1;
909 return indirects;
910}
911
912/*
913 * Truncate transactions can be complex and absolutely huge. So we need to
914 * be able to restart the transaction at a conventient checkpoint to make
915 * sure we don't overflow the journal.
916 *
917 * start_transaction gets us a new handle for a truncate transaction,
918 * and extend_transaction tries to extend the existing one a bit. If
919 * extend fails, we need to propagate the failure up and restart the
920 * transaction in the top-level truncate loop. --sct
921 */
922static handle_t *start_transaction(struct inode *inode)
923{
924 handle_t *result;
925
926 result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode));
927 if (!IS_ERR(result))
928 return result;
929
930 ext4_std_error(inode->i_sb, PTR_ERR(result));
931 return result;
932}
933
934/*
935 * Try to extend this transaction for the purposes of truncation.
936 *
937 * Returns 0 if we managed to create more room. If we can't create more
938 * room, and the transaction must be restarted we return 1.
939 */
940static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
941{
942 if (!ext4_handle_valid(handle))
943 return 0;
944 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
945 return 0;
946 if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
947 return 0;
948 return 1;
949}
950
951/*
952 * Probably it should be a library function... search for first non-zero word
953 * or memcmp with zero_page, whatever is better for particular architecture.
954 * Linus?
955 */
956static inline int all_zeroes(__le32 *p, __le32 *q)
957{
958 while (p < q)
959 if (*p++)
960 return 0;
961 return 1;
962}
963
964/**
965 * ext4_find_shared - find the indirect blocks for partial truncation.
966 * @inode: inode in question
967 * @depth: depth of the affected branch
968 * @offsets: offsets of pointers in that branch (see ext4_block_to_path)
969 * @chain: place to store the pointers to partial indirect blocks
970 * @top: place to the (detached) top of branch
971 *
972 * This is a helper function used by ext4_truncate().
973 *
974 * When we do truncate() we may have to clean the ends of several
975 * indirect blocks but leave the blocks themselves alive. Block is
976 * partially truncated if some data below the new i_size is referred
977 * from it (and it is on the path to the first completely truncated
978 * data block, indeed). We have to free the top of that path along
979 * with everything to the right of the path. Since no allocation
980 * past the truncation point is possible until ext4_truncate()
981 * finishes, we may safely do the latter, but top of branch may
982 * require special attention - pageout below the truncation point
983 * might try to populate it.
984 *
985 * We atomically detach the top of branch from the tree, store the
986 * block number of its root in *@top, pointers to buffer_heads of
987 * partially truncated blocks - in @chain[].bh and pointers to
988 * their last elements that should not be removed - in
989 * @chain[].p. Return value is the pointer to last filled element
990 * of @chain.
991 *
992 * The work left to caller to do the actual freeing of subtrees:
993 * a) free the subtree starting from *@top
994 * b) free the subtrees whose roots are stored in
995 * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
996 * c) free the subtrees growing from the inode past the @chain[0].
997 * (no partially truncated stuff there). */
998
999static Indirect *ext4_find_shared(struct inode *inode, int depth,
1000 ext4_lblk_t offsets[4], Indirect chain[4],
1001 __le32 *top)
1002{
1003 Indirect *partial, *p;
1004 int k, err;
1005
1006 *top = 0;
1007 /* Make k index the deepest non-null offset + 1 */
1008 for (k = depth; k > 1 && !offsets[k-1]; k--)
1009 ;
1010 partial = ext4_get_branch(inode, k, offsets, chain, &err);
1011 /* Writer: pointers */
1012 if (!partial)
1013 partial = chain + k-1;
1014 /*
1015 * If the branch acquired continuation since we've looked at it -
1016 * fine, it should all survive and (new) top doesn't belong to us.
1017 */
1018 if (!partial->key && *partial->p)
1019 /* Writer: end */
1020 goto no_top;
1021 for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
1022 ;
1023 /*
1024 * OK, we've found the last block that must survive. The rest of our
1025 * branch should be detached before unlocking. However, if that rest
1026 * of branch is all ours and does not grow immediately from the inode
1027 * it's easier to cheat and just decrement partial->p.
1028 */
1029 if (p == chain + k - 1 && p > chain) {
1030 p->p--;
1031 } else {
1032 *top = *p->p;
1033 /* Nope, don't do this in ext4. Must leave the tree intact */
1034#if 0
1035 *p->p = 0;
1036#endif
1037 }
1038 /* Writer: end */
1039
1040 while (partial > p) {
1041 brelse(partial->bh);
1042 partial--;
1043 }
1044no_top:
1045 return partial;
1046}
1047
1048/*
1049 * Zero a number of block pointers in either an inode or an indirect block.
1050 * If we restart the transaction we must again get write access to the
1051 * indirect block for further modification.
1052 *
1053 * We release `count' blocks on disk, but (last - first) may be greater
1054 * than `count' because there can be holes in there.
1055 *
1056 * Return 0 on success, 1 on invalid block range
1057 * and < 0 on fatal error.
1058 */
1059static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
1060 struct buffer_head *bh,
1061 ext4_fsblk_t block_to_free,
1062 unsigned long count, __le32 *first,
1063 __le32 *last)
1064{
1065 __le32 *p;
1066 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
1067 int err;
1068
1069 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1070 flags |= EXT4_FREE_BLOCKS_METADATA;
1071
1072 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
1073 count)) {
1074 EXT4_ERROR_INODE(inode, "attempt to clear invalid "
1075 "blocks %llu len %lu",
1076 (unsigned long long) block_to_free, count);
1077 return 1;
1078 }
1079
1080 if (try_to_extend_transaction(handle, inode)) {
1081 if (bh) {
1082 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1083 err = ext4_handle_dirty_metadata(handle, inode, bh);
1084 if (unlikely(err))
1085 goto out_err;
1086 }
1087 err = ext4_mark_inode_dirty(handle, inode);
1088 if (unlikely(err))
1089 goto out_err;
1090 err = ext4_truncate_restart_trans(handle, inode,
1091 ext4_blocks_for_truncate(inode));
1092 if (unlikely(err))
1093 goto out_err;
1094 if (bh) {
1095 BUFFER_TRACE(bh, "retaking write access");
1096 err = ext4_journal_get_write_access(handle, bh);
1097 if (unlikely(err))
1098 goto out_err;
1099 }
1100 }
1101
1102 for (p = first; p < last; p++)
1103 *p = 0;
1104
1105 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
1106 return 0;
1107out_err:
1108 ext4_std_error(inode->i_sb, err);
1109 return err;
1110}
1111
1112/**
1113 * ext4_free_data - free a list of data blocks
1114 * @handle: handle for this transaction
1115 * @inode: inode we are dealing with
1116 * @this_bh: indirect buffer_head which contains *@first and *@last
1117 * @first: array of block numbers
1118 * @last: points immediately past the end of array
1119 *
1120 * We are freeing all blocks referred from that array (numbers are stored as
1121 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
1122 *
1123 * We accumulate contiguous runs of blocks to free. Conveniently, if these
1124 * blocks are contiguous then releasing them at one time will only affect one
1125 * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
1126 * actually use a lot of journal space.
1127 *
1128 * @this_bh will be %NULL if @first and @last point into the inode's direct
1129 * block pointers.
1130 */
1131static void ext4_free_data(handle_t *handle, struct inode *inode,
1132 struct buffer_head *this_bh,
1133 __le32 *first, __le32 *last)
1134{
1135 ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */
1136 unsigned long count = 0; /* Number of blocks in the run */
1137 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
1138 corresponding to
1139 block_to_free */
1140 ext4_fsblk_t nr; /* Current block # */
1141 __le32 *p; /* Pointer into inode/ind
1142 for current block */
1143 int err = 0;
1144
1145 if (this_bh) { /* For indirect block */
1146 BUFFER_TRACE(this_bh, "get_write_access");
1147 err = ext4_journal_get_write_access(handle, this_bh);
1148 /* Important: if we can't update the indirect pointers
1149 * to the blocks, we can't free them. */
1150 if (err)
1151 return;
1152 }
1153
1154 for (p = first; p < last; p++) {
1155 nr = le32_to_cpu(*p);
1156 if (nr) {
1157 /* accumulate blocks to free if they're contiguous */
1158 if (count == 0) {
1159 block_to_free = nr;
1160 block_to_free_p = p;
1161 count = 1;
1162 } else if (nr == block_to_free + count) {
1163 count++;
1164 } else {
1165 err = ext4_clear_blocks(handle, inode, this_bh,
1166 block_to_free, count,
1167 block_to_free_p, p);
1168 if (err)
1169 break;
1170 block_to_free = nr;
1171 block_to_free_p = p;
1172 count = 1;
1173 }
1174 }
1175 }
1176
1177 if (!err && count > 0)
1178 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
1179 count, block_to_free_p, p);
1180 if (err < 0)
1181 /* fatal error */
1182 return;
1183
1184 if (this_bh) {
1185 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
1186
1187 /*
1188 * The buffer head should have an attached journal head at this
1189 * point. However, if the data is corrupted and an indirect
1190 * block pointed to itself, it would have been detached when
1191 * the block was cleared. Check for this instead of OOPSing.
1192 */
1193 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
1194 ext4_handle_dirty_metadata(handle, inode, this_bh);
1195 else
1196 EXT4_ERROR_INODE(inode,
1197 "circular indirect block detected at "
1198 "block %llu",
1199 (unsigned long long) this_bh->b_blocknr);
1200 }
1201}
1202
1203/**
1204 * ext4_free_branches - free an array of branches
1205 * @handle: JBD handle for this transaction
1206 * @inode: inode we are dealing with
1207 * @parent_bh: the buffer_head which contains *@first and *@last
1208 * @first: array of block numbers
1209 * @last: pointer immediately past the end of array
1210 * @depth: depth of the branches to free
1211 *
1212 * We are freeing all blocks referred from these branches (numbers are
1213 * stored as little-endian 32-bit) and updating @inode->i_blocks
1214 * appropriately.
1215 */
1216static void ext4_free_branches(handle_t *handle, struct inode *inode,
1217 struct buffer_head *parent_bh,
1218 __le32 *first, __le32 *last, int depth)
1219{
1220 ext4_fsblk_t nr;
1221 __le32 *p;
1222
1223 if (ext4_handle_is_aborted(handle))
1224 return;
1225
1226 if (depth--) {
1227 struct buffer_head *bh;
1228 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1229 p = last;
1230 while (--p >= first) {
1231 nr = le32_to_cpu(*p);
1232 if (!nr)
1233 continue; /* A hole */
1234
1235 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
1236 nr, 1)) {
1237 EXT4_ERROR_INODE(inode,
1238 "invalid indirect mapped "
1239 "block %lu (level %d)",
1240 (unsigned long) nr, depth);
1241 break;
1242 }
1243
1244 /* Go read the buffer for the next level down */
1245 bh = sb_bread(inode->i_sb, nr);
1246
1247 /*
1248 * A read failure? Report error and clear slot
1249 * (should be rare).
1250 */
1251 if (!bh) {
1252 EXT4_ERROR_INODE_BLOCK(inode, nr,
1253 "Read failure");
1254 continue;
1255 }
1256
1257 /* This zaps the entire block. Bottom up. */
1258 BUFFER_TRACE(bh, "free child branches");
1259 ext4_free_branches(handle, inode, bh,
1260 (__le32 *) bh->b_data,
1261 (__le32 *) bh->b_data + addr_per_block,
1262 depth);
1263 brelse(bh);
1264
1265 /*
1266 * Everything below this this pointer has been
1267 * released. Now let this top-of-subtree go.
1268 *
1269 * We want the freeing of this indirect block to be
1270 * atomic in the journal with the updating of the
1271 * bitmap block which owns it. So make some room in
1272 * the journal.
1273 *
1274 * We zero the parent pointer *after* freeing its
1275 * pointee in the bitmaps, so if extend_transaction()
1276 * for some reason fails to put the bitmap changes and
1277 * the release into the same transaction, recovery
1278 * will merely complain about releasing a free block,
1279 * rather than leaking blocks.
1280 */
1281 if (ext4_handle_is_aborted(handle))
1282 return;
1283 if (try_to_extend_transaction(handle, inode)) {
1284 ext4_mark_inode_dirty(handle, inode);
1285 ext4_truncate_restart_trans(handle, inode,
1286 ext4_blocks_for_truncate(inode));
1287 }
1288
1289 /*
1290 * The forget flag here is critical because if
1291 * we are journaling (and not doing data
1292 * journaling), we have to make sure a revoke
1293 * record is written to prevent the journal
1294 * replay from overwriting the (former)
1295 * indirect block if it gets reallocated as a
1296 * data block. This must happen in the same
1297 * transaction where the data blocks are
1298 * actually freed.
1299 */
1300 ext4_free_blocks(handle, inode, NULL, nr, 1,
1301 EXT4_FREE_BLOCKS_METADATA|
1302 EXT4_FREE_BLOCKS_FORGET);
1303
1304 if (parent_bh) {
1305 /*
1306 * The block which we have just freed is
1307 * pointed to by an indirect block: journal it
1308 */
1309 BUFFER_TRACE(parent_bh, "get_write_access");
1310 if (!ext4_journal_get_write_access(handle,
1311 parent_bh)){
1312 *p = 0;
1313 BUFFER_TRACE(parent_bh,
1314 "call ext4_handle_dirty_metadata");
1315 ext4_handle_dirty_metadata(handle,
1316 inode,
1317 parent_bh);
1318 }
1319 }
1320 }
1321 } else {
1322 /* We have reached the bottom of the tree. */
1323 BUFFER_TRACE(parent_bh, "free data blocks");
1324 ext4_free_data(handle, inode, parent_bh, first, last);
1325 }
1326}
1327
1328void ext4_ind_truncate(struct inode *inode)
1329{
1330 handle_t *handle;
1331 struct ext4_inode_info *ei = EXT4_I(inode);
1332 __le32 *i_data = ei->i_data;
1333 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1334 struct address_space *mapping = inode->i_mapping;
1335 ext4_lblk_t offsets[4];
1336 Indirect chain[4];
1337 Indirect *partial;
1338 __le32 nr = 0;
1339 int n = 0;
1340 ext4_lblk_t last_block, max_block;
1341 unsigned blocksize = inode->i_sb->s_blocksize;
1342
1343 handle = start_transaction(inode);
1344 if (IS_ERR(handle))
1345 return; /* AKPM: return what? */
1346
1347 last_block = (inode->i_size + blocksize-1)
1348 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1349 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1350 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1351
1352 if (inode->i_size & (blocksize - 1))
1353 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
1354 goto out_stop;
1355
1356 if (last_block != max_block) {
1357 n = ext4_block_to_path(inode, last_block, offsets, NULL);
1358 if (n == 0)
1359 goto out_stop; /* error */
1360 }
1361
1362 /*
1363 * OK. This truncate is going to happen. We add the inode to the
1364 * orphan list, so that if this truncate spans multiple transactions,
1365 * and we crash, we will resume the truncate when the filesystem
1366 * recovers. It also marks the inode dirty, to catch the new size.
1367 *
1368 * Implication: the file must always be in a sane, consistent
1369 * truncatable state while each transaction commits.
1370 */
1371 if (ext4_orphan_add(handle, inode))
1372 goto out_stop;
1373
1374 /*
1375 * From here we block out all ext4_get_block() callers who want to
1376 * modify the block allocation tree.
1377 */
1378 down_write(&ei->i_data_sem);
1379
1380 ext4_discard_preallocations(inode);
1381
1382 /*
1383 * The orphan list entry will now protect us from any crash which
1384 * occurs before the truncate completes, so it is now safe to propagate
1385 * the new, shorter inode size (held for now in i_size) into the
1386 * on-disk inode. We do this via i_disksize, which is the value which
1387 * ext4 *really* writes onto the disk inode.
1388 */
1389 ei->i_disksize = inode->i_size;
1390
1391 if (last_block == max_block) {
1392 /*
1393 * It is unnecessary to free any data blocks if last_block is
1394 * equal to the indirect block limit.
1395 */
1396 goto out_unlock;
1397 } else if (n == 1) { /* direct blocks */
1398 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
1399 i_data + EXT4_NDIR_BLOCKS);
1400 goto do_indirects;
1401 }
1402
1403 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1404 /* Kill the top of shared branch (not detached) */
1405 if (nr) {
1406 if (partial == chain) {
1407 /* Shared branch grows from the inode */
1408 ext4_free_branches(handle, inode, NULL,
1409 &nr, &nr+1, (chain+n-1) - partial);
1410 *partial->p = 0;
1411 /*
1412 * We mark the inode dirty prior to restart,
1413 * and prior to stop. No need for it here.
1414 */
1415 } else {
1416 /* Shared branch grows from an indirect block */
1417 BUFFER_TRACE(partial->bh, "get_write_access");
1418 ext4_free_branches(handle, inode, partial->bh,
1419 partial->p,
1420 partial->p+1, (chain+n-1) - partial);
1421 }
1422 }
1423 /* Clear the ends of indirect blocks on the shared branch */
1424 while (partial > chain) {
1425 ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
1426 (__le32*)partial->bh->b_data+addr_per_block,
1427 (chain+n-1) - partial);
1428 BUFFER_TRACE(partial->bh, "call brelse");
1429 brelse(partial->bh);
1430 partial--;
1431 }
1432do_indirects:
1433 /* Kill the remaining (whole) subtrees */
1434 switch (offsets[0]) {
1435 default:
1436 nr = i_data[EXT4_IND_BLOCK];
1437 if (nr) {
1438 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1439 i_data[EXT4_IND_BLOCK] = 0;
1440 }
1441 case EXT4_IND_BLOCK:
1442 nr = i_data[EXT4_DIND_BLOCK];
1443 if (nr) {
1444 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1445 i_data[EXT4_DIND_BLOCK] = 0;
1446 }
1447 case EXT4_DIND_BLOCK:
1448 nr = i_data[EXT4_TIND_BLOCK];
1449 if (nr) {
1450 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1451 i_data[EXT4_TIND_BLOCK] = 0;
1452 }
1453 case EXT4_TIND_BLOCK:
1454 ;
1455 }
1456
1457out_unlock:
1458 up_write(&ei->i_data_sem);
1459 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
1460 ext4_mark_inode_dirty(handle, inode);
1461
1462 /*
1463 * In a multi-transaction truncate, we only make the final transaction
1464 * synchronous
1465 */
1466 if (IS_SYNC(inode))
1467 ext4_handle_sync(handle);
1468out_stop:
1469 /*
1470 * If this was a simple ftruncate(), and the file will remain alive
1471 * then we need to clear up the orphan record which we created above.
1472 * However, if this was a real unlink then we were called by
1473 * ext4_delete_inode(), and we allow that function to clean up the
1474 * orphan info for us.
1475 */
1476 if (inode->i_nlink)
1477 ext4_orphan_del(handle, inode);
1478
1479 ext4_journal_stop(handle);
1480 trace_ext4_truncate_exit(inode);
1481}
1482
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3e5191f9f39..d47264cafee 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -12,10 +12,6 @@
12 * 12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 * 14 *
15 * Goal-directed block allocation by Stephen Tweedie
16 * (sct@redhat.com), 1993, 1998
17 * Big-endian to little-endian byte-swapping/bitmaps by
18 * David S. Miller (davem@caip.rutgers.edu), 1995
19 * 64-bit file support on 64-bit platforms by Jakub Jelinek 15 * 64-bit file support on 64-bit platforms by Jakub Jelinek
20 * (jj@sunsite.ms.mff.cuni.cz) 16 * (jj@sunsite.ms.mff.cuni.cz)
21 * 17 *
@@ -47,6 +43,7 @@
47#include "xattr.h" 43#include "xattr.h"
48#include "acl.h" 44#include "acl.h"
49#include "ext4_extents.h" 45#include "ext4_extents.h"
46#include "truncate.h"
50 47
51#include <trace/events/ext4.h> 48#include <trace/events/ext4.h>
52 49
@@ -89,72 +86,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
89} 86}
90 87
91/* 88/*
92 * Work out how many blocks we need to proceed with the next chunk of a
93 * truncate transaction.
94 */
95static unsigned long blocks_for_truncate(struct inode *inode)
96{
97 ext4_lblk_t needed;
98
99 needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
100
101 /* Give ourselves just enough room to cope with inodes in which
102 * i_blocks is corrupt: we've seen disk corruptions in the past
103 * which resulted in random data in an inode which looked enough
104 * like a regular file for ext4 to try to delete it. Things
105 * will go a bit crazy if that happens, but at least we should
106 * try not to panic the whole kernel. */
107 if (needed < 2)
108 needed = 2;
109
110 /* But we need to bound the transaction so we don't overflow the
111 * journal. */
112 if (needed > EXT4_MAX_TRANS_DATA)
113 needed = EXT4_MAX_TRANS_DATA;
114
115 return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
116}
117
118/*
119 * Truncate transactions can be complex and absolutely huge. So we need to
120 * be able to restart the transaction at a conventient checkpoint to make
121 * sure we don't overflow the journal.
122 *
123 * start_transaction gets us a new handle for a truncate transaction,
124 * and extend_transaction tries to extend the existing one a bit. If
125 * extend fails, we need to propagate the failure up and restart the
126 * transaction in the top-level truncate loop. --sct
127 */
128static handle_t *start_transaction(struct inode *inode)
129{
130 handle_t *result;
131
132 result = ext4_journal_start(inode, blocks_for_truncate(inode));
133 if (!IS_ERR(result))
134 return result;
135
136 ext4_std_error(inode->i_sb, PTR_ERR(result));
137 return result;
138}
139
140/*
141 * Try to extend this transaction for the purposes of truncation.
142 *
143 * Returns 0 if we managed to create more room. If we can't create more
144 * room, and the transaction must be restarted we return 1.
145 */
146static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
147{
148 if (!ext4_handle_valid(handle))
149 return 0;
150 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
151 return 0;
152 if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
153 return 0;
154 return 1;
155}
156
157/*
158 * Restart the transaction associated with *handle. This does a commit, 89 * Restart the transaction associated with *handle. This does a commit,
159 * so before we call here everything must be consistently dirtied against 90 * so before we call here everything must be consistently dirtied against
160 * this transaction. 91 * this transaction.
@@ -190,6 +121,33 @@ void ext4_evict_inode(struct inode *inode)
190 121
191 trace_ext4_evict_inode(inode); 122 trace_ext4_evict_inode(inode);
192 if (inode->i_nlink) { 123 if (inode->i_nlink) {
124 /*
125 * When journalling data dirty buffers are tracked only in the
126 * journal. So although mm thinks everything is clean and
127 * ready for reaping the inode might still have some pages to
128 * write in the running transaction or waiting to be
129 * checkpointed. Thus calling jbd2_journal_invalidatepage()
130 * (via truncate_inode_pages()) to discard these buffers can
131 * cause data loss. Also even if we did not discard these
132 * buffers, we would have no way to find them after the inode
133 * is reaped and thus user could see stale data if he tries to
134 * read them before the transaction is checkpointed. So be
135 * careful and force everything to disk here... We use
136 * ei->i_datasync_tid to store the newest transaction
137 * containing inode's data.
138 *
139 * Note that directories do not have this problem because they
140 * don't use page cache.
141 */
142 if (ext4_should_journal_data(inode) &&
143 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
144 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
145 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
146
147 jbd2_log_start_commit(journal, commit_tid);
148 jbd2_log_wait_commit(journal, commit_tid);
149 filemap_write_and_wait(&inode->i_data);
150 }
193 truncate_inode_pages(&inode->i_data, 0); 151 truncate_inode_pages(&inode->i_data, 0);
194 goto no_delete; 152 goto no_delete;
195 } 153 }
@@ -204,7 +162,7 @@ void ext4_evict_inode(struct inode *inode)
204 if (is_bad_inode(inode)) 162 if (is_bad_inode(inode))
205 goto no_delete; 163 goto no_delete;
206 164
207 handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); 165 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
208 if (IS_ERR(handle)) { 166 if (IS_ERR(handle)) {
209 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 167 ext4_std_error(inode->i_sb, PTR_ERR(handle));
210 /* 168 /*
@@ -277,793 +235,6 @@ no_delete:
277 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 235 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
278} 236}
279 237
280typedef struct {
281 __le32 *p;
282 __le32 key;
283 struct buffer_head *bh;
284} Indirect;
285
286static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
287{
288 p->key = *(p->p = v);
289 p->bh = bh;
290}
291
292/**
293 * ext4_block_to_path - parse the block number into array of offsets
294 * @inode: inode in question (we are only interested in its superblock)
295 * @i_block: block number to be parsed
296 * @offsets: array to store the offsets in
297 * @boundary: set this non-zero if the referred-to block is likely to be
298 * followed (on disk) by an indirect block.
299 *
300 * To store the locations of file's data ext4 uses a data structure common
301 * for UNIX filesystems - tree of pointers anchored in the inode, with
302 * data blocks at leaves and indirect blocks in intermediate nodes.
303 * This function translates the block number into path in that tree -
304 * return value is the path length and @offsets[n] is the offset of
305 * pointer to (n+1)th node in the nth one. If @block is out of range
306 * (negative or too large) warning is printed and zero returned.
307 *
308 * Note: function doesn't find node addresses, so no IO is needed. All
309 * we need to know is the capacity of indirect blocks (taken from the
310 * inode->i_sb).
311 */
312
313/*
314 * Portability note: the last comparison (check that we fit into triple
315 * indirect block) is spelled differently, because otherwise on an
316 * architecture with 32-bit longs and 8Kb pages we might get into trouble
317 * if our filesystem had 8Kb blocks. We might use long long, but that would
318 * kill us on x86. Oh, well, at least the sign propagation does not matter -
319 * i_block would have to be negative in the very beginning, so we would not
320 * get there at all.
321 */
322
323static int ext4_block_to_path(struct inode *inode,
324 ext4_lblk_t i_block,
325 ext4_lblk_t offsets[4], int *boundary)
326{
327 int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
328 int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
329 const long direct_blocks = EXT4_NDIR_BLOCKS,
330 indirect_blocks = ptrs,
331 double_blocks = (1 << (ptrs_bits * 2));
332 int n = 0;
333 int final = 0;
334
335 if (i_block < direct_blocks) {
336 offsets[n++] = i_block;
337 final = direct_blocks;
338 } else if ((i_block -= direct_blocks) < indirect_blocks) {
339 offsets[n++] = EXT4_IND_BLOCK;
340 offsets[n++] = i_block;
341 final = ptrs;
342 } else if ((i_block -= indirect_blocks) < double_blocks) {
343 offsets[n++] = EXT4_DIND_BLOCK;
344 offsets[n++] = i_block >> ptrs_bits;
345 offsets[n++] = i_block & (ptrs - 1);
346 final = ptrs;
347 } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
348 offsets[n++] = EXT4_TIND_BLOCK;
349 offsets[n++] = i_block >> (ptrs_bits * 2);
350 offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
351 offsets[n++] = i_block & (ptrs - 1);
352 final = ptrs;
353 } else {
354 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
355 i_block + direct_blocks +
356 indirect_blocks + double_blocks, inode->i_ino);
357 }
358 if (boundary)
359 *boundary = final - 1 - (i_block & (ptrs - 1));
360 return n;
361}
362
363static int __ext4_check_blockref(const char *function, unsigned int line,
364 struct inode *inode,
365 __le32 *p, unsigned int max)
366{
367 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
368 __le32 *bref = p;
369 unsigned int blk;
370
371 while (bref < p+max) {
372 blk = le32_to_cpu(*bref++);
373 if (blk &&
374 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
375 blk, 1))) {
376 es->s_last_error_block = cpu_to_le64(blk);
377 ext4_error_inode(inode, function, line, blk,
378 "invalid block");
379 return -EIO;
380 }
381 }
382 return 0;
383}
384
385
386#define ext4_check_indirect_blockref(inode, bh) \
387 __ext4_check_blockref(__func__, __LINE__, inode, \
388 (__le32 *)(bh)->b_data, \
389 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
390
391#define ext4_check_inode_blockref(inode) \
392 __ext4_check_blockref(__func__, __LINE__, inode, \
393 EXT4_I(inode)->i_data, \
394 EXT4_NDIR_BLOCKS)
395
396/**
397 * ext4_get_branch - read the chain of indirect blocks leading to data
398 * @inode: inode in question
399 * @depth: depth of the chain (1 - direct pointer, etc.)
400 * @offsets: offsets of pointers in inode/indirect blocks
401 * @chain: place to store the result
402 * @err: here we store the error value
403 *
404 * Function fills the array of triples <key, p, bh> and returns %NULL
405 * if everything went OK or the pointer to the last filled triple
406 * (incomplete one) otherwise. Upon the return chain[i].key contains
407 * the number of (i+1)-th block in the chain (as it is stored in memory,
408 * i.e. little-endian 32-bit), chain[i].p contains the address of that
409 * number (it points into struct inode for i==0 and into the bh->b_data
410 * for i>0) and chain[i].bh points to the buffer_head of i-th indirect
411 * block for i>0 and NULL for i==0. In other words, it holds the block
412 * numbers of the chain, addresses they were taken from (and where we can
413 * verify that chain did not change) and buffer_heads hosting these
414 * numbers.
415 *
416 * Function stops when it stumbles upon zero pointer (absent block)
417 * (pointer to last triple returned, *@err == 0)
418 * or when it gets an IO error reading an indirect block
419 * (ditto, *@err == -EIO)
420 * or when it reads all @depth-1 indirect blocks successfully and finds
421 * the whole chain, all way to the data (returns %NULL, *err == 0).
422 *
423 * Need to be called with
424 * down_read(&EXT4_I(inode)->i_data_sem)
425 */
426static Indirect *ext4_get_branch(struct inode *inode, int depth,
427 ext4_lblk_t *offsets,
428 Indirect chain[4], int *err)
429{
430 struct super_block *sb = inode->i_sb;
431 Indirect *p = chain;
432 struct buffer_head *bh;
433
434 *err = 0;
435 /* i_data is not going away, no lock needed */
436 add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
437 if (!p->key)
438 goto no_block;
439 while (--depth) {
440 bh = sb_getblk(sb, le32_to_cpu(p->key));
441 if (unlikely(!bh))
442 goto failure;
443
444 if (!bh_uptodate_or_lock(bh)) {
445 if (bh_submit_read(bh) < 0) {
446 put_bh(bh);
447 goto failure;
448 }
449 /* validate block references */
450 if (ext4_check_indirect_blockref(inode, bh)) {
451 put_bh(bh);
452 goto failure;
453 }
454 }
455
456 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
457 /* Reader: end */
458 if (!p->key)
459 goto no_block;
460 }
461 return NULL;
462
463failure:
464 *err = -EIO;
465no_block:
466 return p;
467}
468
469/**
470 * ext4_find_near - find a place for allocation with sufficient locality
471 * @inode: owner
472 * @ind: descriptor of indirect block.
473 *
474 * This function returns the preferred place for block allocation.
475 * It is used when heuristic for sequential allocation fails.
476 * Rules are:
477 * + if there is a block to the left of our position - allocate near it.
478 * + if pointer will live in indirect block - allocate near that block.
479 * + if pointer will live in inode - allocate in the same
480 * cylinder group.
481 *
482 * In the latter case we colour the starting block by the callers PID to
483 * prevent it from clashing with concurrent allocations for a different inode
484 * in the same block group. The PID is used here so that functionally related
485 * files will be close-by on-disk.
486 *
487 * Caller must make sure that @ind is valid and will stay that way.
488 */
489static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
490{
491 struct ext4_inode_info *ei = EXT4_I(inode);
492 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
493 __le32 *p;
494 ext4_fsblk_t bg_start;
495 ext4_fsblk_t last_block;
496 ext4_grpblk_t colour;
497 ext4_group_t block_group;
498 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
499
500 /* Try to find previous block */
501 for (p = ind->p - 1; p >= start; p--) {
502 if (*p)
503 return le32_to_cpu(*p);
504 }
505
506 /* No such thing, so let's try location of indirect block */
507 if (ind->bh)
508 return ind->bh->b_blocknr;
509
510 /*
511 * It is going to be referred to from the inode itself? OK, just put it
512 * into the same cylinder group then.
513 */
514 block_group = ei->i_block_group;
515 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
516 block_group &= ~(flex_size-1);
517 if (S_ISREG(inode->i_mode))
518 block_group++;
519 }
520 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
521 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
522
523 /*
524 * If we are doing delayed allocation, we don't need take
525 * colour into account.
526 */
527 if (test_opt(inode->i_sb, DELALLOC))
528 return bg_start;
529
530 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
531 colour = (current->pid % 16) *
532 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
533 else
534 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
535 return bg_start + colour;
536}
537
538/**
539 * ext4_find_goal - find a preferred place for allocation.
540 * @inode: owner
541 * @block: block we want
542 * @partial: pointer to the last triple within a chain
543 *
544 * Normally this function find the preferred place for block allocation,
545 * returns it.
546 * Because this is only used for non-extent files, we limit the block nr
547 * to 32 bits.
548 */
549static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
550 Indirect *partial)
551{
552 ext4_fsblk_t goal;
553
554 /*
555 * XXX need to get goal block from mballoc's data structures
556 */
557
558 goal = ext4_find_near(inode, partial);
559 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
560 return goal;
561}
562
563/**
564 * ext4_blks_to_allocate - Look up the block map and count the number
565 * of direct blocks need to be allocated for the given branch.
566 *
567 * @branch: chain of indirect blocks
568 * @k: number of blocks need for indirect blocks
569 * @blks: number of data blocks to be mapped.
570 * @blocks_to_boundary: the offset in the indirect block
571 *
572 * return the total number of blocks to be allocate, including the
573 * direct and indirect blocks.
574 */
575static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
576 int blocks_to_boundary)
577{
578 unsigned int count = 0;
579
580 /*
581 * Simple case, [t,d]Indirect block(s) has not allocated yet
582 * then it's clear blocks on that path have not allocated
583 */
584 if (k > 0) {
585 /* right now we don't handle cross boundary allocation */
586 if (blks < blocks_to_boundary + 1)
587 count += blks;
588 else
589 count += blocks_to_boundary + 1;
590 return count;
591 }
592
593 count++;
594 while (count < blks && count <= blocks_to_boundary &&
595 le32_to_cpu(*(branch[0].p + count)) == 0) {
596 count++;
597 }
598 return count;
599}
600
601/**
602 * ext4_alloc_blocks: multiple allocate blocks needed for a branch
603 * @handle: handle for this transaction
604 * @inode: inode which needs allocated blocks
605 * @iblock: the logical block to start allocated at
606 * @goal: preferred physical block of allocation
607 * @indirect_blks: the number of blocks need to allocate for indirect
608 * blocks
609 * @blks: number of desired blocks
610 * @new_blocks: on return it will store the new block numbers for
611 * the indirect blocks(if needed) and the first direct block,
612 * @err: on return it will store the error code
613 *
614 * This function will return the number of blocks allocated as
615 * requested by the passed-in parameters.
616 */
617static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
618 ext4_lblk_t iblock, ext4_fsblk_t goal,
619 int indirect_blks, int blks,
620 ext4_fsblk_t new_blocks[4], int *err)
621{
622 struct ext4_allocation_request ar;
623 int target, i;
624 unsigned long count = 0, blk_allocated = 0;
625 int index = 0;
626 ext4_fsblk_t current_block = 0;
627 int ret = 0;
628
629 /*
630 * Here we try to allocate the requested multiple blocks at once,
631 * on a best-effort basis.
632 * To build a branch, we should allocate blocks for
633 * the indirect blocks(if not allocated yet), and at least
634 * the first direct block of this branch. That's the
635 * minimum number of blocks need to allocate(required)
636 */
637 /* first we try to allocate the indirect blocks */
638 target = indirect_blks;
639 while (target > 0) {
640 count = target;
641 /* allocating blocks for indirect blocks and direct blocks */
642 current_block = ext4_new_meta_blocks(handle, inode, goal,
643 0, &count, err);
644 if (*err)
645 goto failed_out;
646
647 if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
648 EXT4_ERROR_INODE(inode,
649 "current_block %llu + count %lu > %d!",
650 current_block, count,
651 EXT4_MAX_BLOCK_FILE_PHYS);
652 *err = -EIO;
653 goto failed_out;
654 }
655
656 target -= count;
657 /* allocate blocks for indirect blocks */
658 while (index < indirect_blks && count) {
659 new_blocks[index++] = current_block++;
660 count--;
661 }
662 if (count > 0) {
663 /*
664 * save the new block number
665 * for the first direct block
666 */
667 new_blocks[index] = current_block;
668 printk(KERN_INFO "%s returned more blocks than "
669 "requested\n", __func__);
670 WARN_ON(1);
671 break;
672 }
673 }
674
675 target = blks - count ;
676 blk_allocated = count;
677 if (!target)
678 goto allocated;
679 /* Now allocate data blocks */
680 memset(&ar, 0, sizeof(ar));
681 ar.inode = inode;
682 ar.goal = goal;
683 ar.len = target;
684 ar.logical = iblock;
685 if (S_ISREG(inode->i_mode))
686 /* enable in-core preallocation only for regular files */
687 ar.flags = EXT4_MB_HINT_DATA;
688
689 current_block = ext4_mb_new_blocks(handle, &ar, err);
690 if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
691 EXT4_ERROR_INODE(inode,
692 "current_block %llu + ar.len %d > %d!",
693 current_block, ar.len,
694 EXT4_MAX_BLOCK_FILE_PHYS);
695 *err = -EIO;
696 goto failed_out;
697 }
698
699 if (*err && (target == blks)) {
700 /*
701 * if the allocation failed and we didn't allocate
702 * any blocks before
703 */
704 goto failed_out;
705 }
706 if (!*err) {
707 if (target == blks) {
708 /*
709 * save the new block number
710 * for the first direct block
711 */
712 new_blocks[index] = current_block;
713 }
714 blk_allocated += ar.len;
715 }
716allocated:
717 /* total number of blocks allocated for direct blocks */
718 ret = blk_allocated;
719 *err = 0;
720 return ret;
721failed_out:
722 for (i = 0; i < index; i++)
723 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
724 return ret;
725}
726
727/**
728 * ext4_alloc_branch - allocate and set up a chain of blocks.
729 * @handle: handle for this transaction
730 * @inode: owner
731 * @indirect_blks: number of allocated indirect blocks
732 * @blks: number of allocated direct blocks
733 * @goal: preferred place for allocation
734 * @offsets: offsets (in the blocks) to store the pointers to next.
735 * @branch: place to store the chain in.
736 *
737 * This function allocates blocks, zeroes out all but the last one,
738 * links them into chain and (if we are synchronous) writes them to disk.
739 * In other words, it prepares a branch that can be spliced onto the
740 * inode. It stores the information about that chain in the branch[], in
741 * the same format as ext4_get_branch() would do. We are calling it after
742 * we had read the existing part of chain and partial points to the last
743 * triple of that (one with zero ->key). Upon the exit we have the same
744 * picture as after the successful ext4_get_block(), except that in one
745 * place chain is disconnected - *branch->p is still zero (we did not
746 * set the last link), but branch->key contains the number that should
747 * be placed into *branch->p to fill that gap.
748 *
749 * If allocation fails we free all blocks we've allocated (and forget
750 * their buffer_heads) and return the error value the from failed
751 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
752 * as described above and return 0.
753 */
754static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
755 ext4_lblk_t iblock, int indirect_blks,
756 int *blks, ext4_fsblk_t goal,
757 ext4_lblk_t *offsets, Indirect *branch)
758{
759 int blocksize = inode->i_sb->s_blocksize;
760 int i, n = 0;
761 int err = 0;
762 struct buffer_head *bh;
763 int num;
764 ext4_fsblk_t new_blocks[4];
765 ext4_fsblk_t current_block;
766
767 num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
768 *blks, new_blocks, &err);
769 if (err)
770 return err;
771
772 branch[0].key = cpu_to_le32(new_blocks[0]);
773 /*
774 * metadata blocks and data blocks are allocated.
775 */
776 for (n = 1; n <= indirect_blks; n++) {
777 /*
778 * Get buffer_head for parent block, zero it out
779 * and set the pointer to new one, then send
780 * parent to disk.
781 */
782 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
783 if (unlikely(!bh)) {
784 err = -EIO;
785 goto failed;
786 }
787
788 branch[n].bh = bh;
789 lock_buffer(bh);
790 BUFFER_TRACE(bh, "call get_create_access");
791 err = ext4_journal_get_create_access(handle, bh);
792 if (err) {
793 /* Don't brelse(bh) here; it's done in
794 * ext4_journal_forget() below */
795 unlock_buffer(bh);
796 goto failed;
797 }
798
799 memset(bh->b_data, 0, blocksize);
800 branch[n].p = (__le32 *) bh->b_data + offsets[n];
801 branch[n].key = cpu_to_le32(new_blocks[n]);
802 *branch[n].p = branch[n].key;
803 if (n == indirect_blks) {
804 current_block = new_blocks[n];
805 /*
806 * End of chain, update the last new metablock of
807 * the chain to point to the new allocated
808 * data blocks numbers
809 */
810 for (i = 1; i < num; i++)
811 *(branch[n].p + i) = cpu_to_le32(++current_block);
812 }
813 BUFFER_TRACE(bh, "marking uptodate");
814 set_buffer_uptodate(bh);
815 unlock_buffer(bh);
816
817 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
818 err = ext4_handle_dirty_metadata(handle, inode, bh);
819 if (err)
820 goto failed;
821 }
822 *blks = num;
823 return err;
824failed:
825 /* Allocation failed, free what we already allocated */
826 ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
827 for (i = 1; i <= n ; i++) {
828 /*
829 * branch[i].bh is newly allocated, so there is no
830 * need to revoke the block, which is why we don't
831 * need to set EXT4_FREE_BLOCKS_METADATA.
832 */
833 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
834 EXT4_FREE_BLOCKS_FORGET);
835 }
836 for (i = n+1; i < indirect_blks; i++)
837 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
838
839 ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
840
841 return err;
842}
843
844/**
845 * ext4_splice_branch - splice the allocated branch onto inode.
846 * @handle: handle for this transaction
847 * @inode: owner
848 * @block: (logical) number of block we are adding
849 * @chain: chain of indirect blocks (with a missing link - see
850 * ext4_alloc_branch)
851 * @where: location of missing link
852 * @num: number of indirect blocks we are adding
853 * @blks: number of direct blocks we are adding
854 *
855 * This function fills the missing link and does all housekeeping needed in
856 * inode (->i_blocks, etc.). In case of success we end up with the full
857 * chain to new block and return 0.
858 */
859static int ext4_splice_branch(handle_t *handle, struct inode *inode,
860 ext4_lblk_t block, Indirect *where, int num,
861 int blks)
862{
863 int i;
864 int err = 0;
865 ext4_fsblk_t current_block;
866
867 /*
868 * If we're splicing into a [td]indirect block (as opposed to the
869 * inode) then we need to get write access to the [td]indirect block
870 * before the splice.
871 */
872 if (where->bh) {
873 BUFFER_TRACE(where->bh, "get_write_access");
874 err = ext4_journal_get_write_access(handle, where->bh);
875 if (err)
876 goto err_out;
877 }
878 /* That's it */
879
880 *where->p = where->key;
881
882 /*
883 * Update the host buffer_head or inode to point to more just allocated
884 * direct blocks blocks
885 */
886 if (num == 0 && blks > 1) {
887 current_block = le32_to_cpu(where->key) + 1;
888 for (i = 1; i < blks; i++)
889 *(where->p + i) = cpu_to_le32(current_block++);
890 }
891
892 /* We are done with atomic stuff, now do the rest of housekeeping */
893 /* had we spliced it onto indirect block? */
894 if (where->bh) {
895 /*
896 * If we spliced it onto an indirect block, we haven't
897 * altered the inode. Note however that if it is being spliced
898 * onto an indirect block at the very end of the file (the
899 * file is growing) then we *will* alter the inode to reflect
900 * the new i_size. But that is not done here - it is done in
901 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
902 */
903 jbd_debug(5, "splicing indirect only\n");
904 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
905 err = ext4_handle_dirty_metadata(handle, inode, where->bh);
906 if (err)
907 goto err_out;
908 } else {
909 /*
910 * OK, we spliced it into the inode itself on a direct block.
911 */
912 ext4_mark_inode_dirty(handle, inode);
913 jbd_debug(5, "splicing direct\n");
914 }
915 return err;
916
917err_out:
918 for (i = 1; i <= num; i++) {
919 /*
920 * branch[i].bh is newly allocated, so there is no
921 * need to revoke the block, which is why we don't
922 * need to set EXT4_FREE_BLOCKS_METADATA.
923 */
924 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
925 EXT4_FREE_BLOCKS_FORGET);
926 }
927 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
928 blks, 0);
929
930 return err;
931}
932
933/*
934 * The ext4_ind_map_blocks() function handles non-extents inodes
935 * (i.e., using the traditional indirect/double-indirect i_blocks
936 * scheme) for ext4_map_blocks().
937 *
938 * Allocation strategy is simple: if we have to allocate something, we will
939 * have to go the whole way to leaf. So let's do it before attaching anything
940 * to tree, set linkage between the newborn blocks, write them if sync is
941 * required, recheck the path, free and repeat if check fails, otherwise
942 * set the last missing link (that will protect us from any truncate-generated
943 * removals - all blocks on the path are immune now) and possibly force the
944 * write on the parent block.
945 * That has a nice additional property: no special recovery from the failed
946 * allocations is needed - we simply release blocks and do not touch anything
947 * reachable from inode.
948 *
949 * `handle' can be NULL if create == 0.
950 *
951 * return > 0, # of blocks mapped or allocated.
952 * return = 0, if plain lookup failed.
953 * return < 0, error case.
954 *
955 * The ext4_ind_get_blocks() function should be called with
956 * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
957 * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
958 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
959 * blocks.
960 */
961static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
962 struct ext4_map_blocks *map,
963 int flags)
964{
965 int err = -EIO;
966 ext4_lblk_t offsets[4];
967 Indirect chain[4];
968 Indirect *partial;
969 ext4_fsblk_t goal;
970 int indirect_blks;
971 int blocks_to_boundary = 0;
972 int depth;
973 int count = 0;
974 ext4_fsblk_t first_block = 0;
975
976 trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
977 J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
978 J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
979 depth = ext4_block_to_path(inode, map->m_lblk, offsets,
980 &blocks_to_boundary);
981
982 if (depth == 0)
983 goto out;
984
985 partial = ext4_get_branch(inode, depth, offsets, chain, &err);
986
987 /* Simplest case - block found, no allocation needed */
988 if (!partial) {
989 first_block = le32_to_cpu(chain[depth - 1].key);
990 count++;
991 /*map more blocks*/
992 while (count < map->m_len && count <= blocks_to_boundary) {
993 ext4_fsblk_t blk;
994
995 blk = le32_to_cpu(*(chain[depth-1].p + count));
996
997 if (blk == first_block + count)
998 count++;
999 else
1000 break;
1001 }
1002 goto got_it;
1003 }
1004
1005 /* Next simple case - plain lookup or failed read of indirect block */
1006 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
1007 goto cleanup;
1008
1009 /*
1010 * Okay, we need to do block allocation.
1011 */
1012 goal = ext4_find_goal(inode, map->m_lblk, partial);
1013
1014 /* the number of blocks need to allocate for [d,t]indirect blocks */
1015 indirect_blks = (chain + depth) - partial - 1;
1016
1017 /*
1018 * Next look up the indirect map to count the totoal number of
1019 * direct blocks to allocate for this branch.
1020 */
1021 count = ext4_blks_to_allocate(partial, indirect_blks,
1022 map->m_len, blocks_to_boundary);
1023 /*
1024 * Block out ext4_truncate while we alter the tree
1025 */
1026 err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
1027 &count, goal,
1028 offsets + (partial - chain), partial);
1029
1030 /*
1031 * The ext4_splice_branch call will free and forget any buffers
1032 * on the new chain if there is a failure, but that risks using
1033 * up transaction credits, especially for bitmaps where the
1034 * credits cannot be returned. Can we handle this somehow? We
1035 * may need to return -EAGAIN upwards in the worst case. --sct
1036 */
1037 if (!err)
1038 err = ext4_splice_branch(handle, inode, map->m_lblk,
1039 partial, indirect_blks, count);
1040 if (err)
1041 goto cleanup;
1042
1043 map->m_flags |= EXT4_MAP_NEW;
1044
1045 ext4_update_inode_fsync_trans(handle, inode, 1);
1046got_it:
1047 map->m_flags |= EXT4_MAP_MAPPED;
1048 map->m_pblk = le32_to_cpu(chain[depth-1].key);
1049 map->m_len = count;
1050 if (count > blocks_to_boundary)
1051 map->m_flags |= EXT4_MAP_BOUNDARY;
1052 err = count;
1053 /* Clean up and exit */
1054 partial = chain + depth - 1; /* the whole chain */
1055cleanup:
1056 while (partial > chain) {
1057 BUFFER_TRACE(partial->bh, "call brelse");
1058 brelse(partial->bh);
1059 partial--;
1060 }
1061out:
1062 trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
1063 map->m_pblk, map->m_len, err);
1064 return err;
1065}
1066
1067#ifdef CONFIG_QUOTA 238#ifdef CONFIG_QUOTA
1068qsize_t *ext4_get_reserved_space(struct inode *inode) 239qsize_t *ext4_get_reserved_space(struct inode *inode)
1069{ 240{
@@ -1073,33 +244,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1073 244
1074/* 245/*
1075 * Calculate the number of metadata blocks need to reserve 246 * Calculate the number of metadata blocks need to reserve
1076 * to allocate a new block at @lblocks for non extent file based file
1077 */
1078static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1079 sector_t lblock)
1080{
1081 struct ext4_inode_info *ei = EXT4_I(inode);
1082 sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
1083 int blk_bits;
1084
1085 if (lblock < EXT4_NDIR_BLOCKS)
1086 return 0;
1087
1088 lblock -= EXT4_NDIR_BLOCKS;
1089
1090 if (ei->i_da_metadata_calc_len &&
1091 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1092 ei->i_da_metadata_calc_len++;
1093 return 0;
1094 }
1095 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1096 ei->i_da_metadata_calc_len = 1;
1097 blk_bits = order_base_2(lblock);
1098 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1099}
1100
1101/*
1102 * Calculate the number of metadata blocks need to reserve
1103 * to allocate a block located at @lblock 247 * to allocate a block located at @lblock
1104 */ 248 */
1105static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) 249static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
@@ -1107,7 +251,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
1107 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 251 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1108 return ext4_ext_calc_metadata_amount(inode, lblock); 252 return ext4_ext_calc_metadata_amount(inode, lblock);
1109 253
1110 return ext4_indirect_calc_metadata_amount(inode, lblock); 254 return ext4_ind_calc_metadata_amount(inode, lblock);
1111} 255}
1112 256
1113/* 257/*
@@ -1589,16 +733,6 @@ static int do_journal_get_write_access(handle_t *handle,
1589 return ret; 733 return ret;
1590} 734}
1591 735
1592/*
1593 * Truncate blocks that were not used by write. We have to truncate the
1594 * pagecache as well so that corresponding buffers get properly unmapped.
1595 */
1596static void ext4_truncate_failed_write(struct inode *inode)
1597{
1598 truncate_inode_pages(inode->i_mapping, inode->i_size);
1599 ext4_truncate(inode);
1600}
1601
1602static int ext4_get_block_write(struct inode *inode, sector_t iblock, 736static int ext4_get_block_write(struct inode *inode, sector_t iblock,
1603 struct buffer_head *bh_result, int create); 737 struct buffer_head *bh_result, int create);
1604static int ext4_write_begin(struct file *file, struct address_space *mapping, 738static int ext4_write_begin(struct file *file, struct address_space *mapping,
@@ -1863,6 +997,7 @@ static int ext4_journalled_write_end(struct file *file,
1863 if (new_i_size > inode->i_size) 997 if (new_i_size > inode->i_size)
1864 i_size_write(inode, pos+copied); 998 i_size_write(inode, pos+copied);
1865 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 999 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1000 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1866 if (new_i_size > EXT4_I(inode)->i_disksize) { 1001 if (new_i_size > EXT4_I(inode)->i_disksize) {
1867 ext4_update_i_disksize(inode, new_i_size); 1002 ext4_update_i_disksize(inode, new_i_size);
1868 ret2 = ext4_mark_inode_dirty(handle, inode); 1003 ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -2571,6 +1706,7 @@ static int __ext4_journalled_writepage(struct page *page,
2571 write_end_fn); 1706 write_end_fn);
2572 if (ret == 0) 1707 if (ret == 0)
2573 ret = err; 1708 ret = err;
1709 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
2574 err = ext4_journal_stop(handle); 1710 err = ext4_journal_stop(handle);
2575 if (!ret) 1711 if (!ret)
2576 ret = err; 1712 ret = err;
@@ -3450,112 +2586,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3450} 2586}
3451 2587
3452/* 2588/*
3453 * O_DIRECT for ext3 (or indirect map) based files
3454 *
3455 * If the O_DIRECT write will extend the file then add this inode to the
3456 * orphan list. So recovery will truncate it back to the original size
3457 * if the machine crashes during the write.
3458 *
3459 * If the O_DIRECT write is intantiating holes inside i_size and the machine
3460 * crashes then stale disk data _may_ be exposed inside the file. But current
3461 * VFS code falls back into buffered path in that case so we are safe.
3462 */
3463static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3464 const struct iovec *iov, loff_t offset,
3465 unsigned long nr_segs)
3466{
3467 struct file *file = iocb->ki_filp;
3468 struct inode *inode = file->f_mapping->host;
3469 struct ext4_inode_info *ei = EXT4_I(inode);
3470 handle_t *handle;
3471 ssize_t ret;
3472 int orphan = 0;
3473 size_t count = iov_length(iov, nr_segs);
3474 int retries = 0;
3475
3476 if (rw == WRITE) {
3477 loff_t final_size = offset + count;
3478
3479 if (final_size > inode->i_size) {
3480 /* Credits for sb + inode write */
3481 handle = ext4_journal_start(inode, 2);
3482 if (IS_ERR(handle)) {
3483 ret = PTR_ERR(handle);
3484 goto out;
3485 }
3486 ret = ext4_orphan_add(handle, inode);
3487 if (ret) {
3488 ext4_journal_stop(handle);
3489 goto out;
3490 }
3491 orphan = 1;
3492 ei->i_disksize = inode->i_size;
3493 ext4_journal_stop(handle);
3494 }
3495 }
3496
3497retry:
3498 if (rw == READ && ext4_should_dioread_nolock(inode))
3499 ret = __blockdev_direct_IO(rw, iocb, inode,
3500 inode->i_sb->s_bdev, iov,
3501 offset, nr_segs,
3502 ext4_get_block, NULL, NULL, 0);
3503 else {
3504 ret = blockdev_direct_IO(rw, iocb, inode, iov,
3505 offset, nr_segs, ext4_get_block);
3506
3507 if (unlikely((rw & WRITE) && ret < 0)) {
3508 loff_t isize = i_size_read(inode);
3509 loff_t end = offset + iov_length(iov, nr_segs);
3510
3511 if (end > isize)
3512 ext4_truncate_failed_write(inode);
3513 }
3514 }
3515 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3516 goto retry;
3517
3518 if (orphan) {
3519 int err;
3520
3521 /* Credits for sb + inode write */
3522 handle = ext4_journal_start(inode, 2);
3523 if (IS_ERR(handle)) {
3524 /* This is really bad luck. We've written the data
3525 * but cannot extend i_size. Bail out and pretend
3526 * the write failed... */
3527 ret = PTR_ERR(handle);
3528 if (inode->i_nlink)
3529 ext4_orphan_del(NULL, inode);
3530
3531 goto out;
3532 }
3533 if (inode->i_nlink)
3534 ext4_orphan_del(handle, inode);
3535 if (ret > 0) {
3536 loff_t end = offset + ret;
3537 if (end > inode->i_size) {
3538 ei->i_disksize = end;
3539 i_size_write(inode, end);
3540 /*
3541 * We're going to return a positive `ret'
3542 * here due to non-zero-length I/O, so there's
3543 * no way of reporting error returns from
3544 * ext4_mark_inode_dirty() to userspace. So
3545 * ignore it.
3546 */
3547 ext4_mark_inode_dirty(handle, inode);
3548 }
3549 }
3550 err = ext4_journal_stop(handle);
3551 if (ret == 0)
3552 ret = err;
3553 }
3554out:
3555 return ret;
3556}
3557
3558/*
3559 * ext4_get_block used when preparing for a DIO write or buffer write. 2589 * ext4_get_block used when preparing for a DIO write or buffer write.
3560 * We allocate an uinitialized extent if blocks haven't been allocated. 2590 * We allocate an uinitialized extent if blocks haven't been allocated.
3561 * The extent will be converted to initialized after the IO is complete. 2591 * The extent will be converted to initialized after the IO is complete.
@@ -4033,383 +3063,6 @@ unlock:
4033 return err; 3063 return err;
4034} 3064}
4035 3065
4036/*
4037 * Probably it should be a library function... search for first non-zero word
4038 * or memcmp with zero_page, whatever is better for particular architecture.
4039 * Linus?
4040 */
4041static inline int all_zeroes(__le32 *p, __le32 *q)
4042{
4043 while (p < q)
4044 if (*p++)
4045 return 0;
4046 return 1;
4047}
4048
4049/**
4050 * ext4_find_shared - find the indirect blocks for partial truncation.
4051 * @inode: inode in question
4052 * @depth: depth of the affected branch
4053 * @offsets: offsets of pointers in that branch (see ext4_block_to_path)
4054 * @chain: place to store the pointers to partial indirect blocks
4055 * @top: place to the (detached) top of branch
4056 *
4057 * This is a helper function used by ext4_truncate().
4058 *
4059 * When we do truncate() we may have to clean the ends of several
4060 * indirect blocks but leave the blocks themselves alive. Block is
4061 * partially truncated if some data below the new i_size is referred
4062 * from it (and it is on the path to the first completely truncated
4063 * data block, indeed). We have to free the top of that path along
4064 * with everything to the right of the path. Since no allocation
4065 * past the truncation point is possible until ext4_truncate()
4066 * finishes, we may safely do the latter, but top of branch may
4067 * require special attention - pageout below the truncation point
4068 * might try to populate it.
4069 *
4070 * We atomically detach the top of branch from the tree, store the
4071 * block number of its root in *@top, pointers to buffer_heads of
4072 * partially truncated blocks - in @chain[].bh and pointers to
4073 * their last elements that should not be removed - in
4074 * @chain[].p. Return value is the pointer to last filled element
4075 * of @chain.
4076 *
4077 * The work left to caller to do the actual freeing of subtrees:
4078 * a) free the subtree starting from *@top
4079 * b) free the subtrees whose roots are stored in
4080 * (@chain[i].p+1 .. end of @chain[i].bh->b_data)
4081 * c) free the subtrees growing from the inode past the @chain[0].
4082 * (no partially truncated stuff there). */
4083
4084static Indirect *ext4_find_shared(struct inode *inode, int depth,
4085 ext4_lblk_t offsets[4], Indirect chain[4],
4086 __le32 *top)
4087{
4088 Indirect *partial, *p;
4089 int k, err;
4090
4091 *top = 0;
4092 /* Make k index the deepest non-null offset + 1 */
4093 for (k = depth; k > 1 && !offsets[k-1]; k--)
4094 ;
4095 partial = ext4_get_branch(inode, k, offsets, chain, &err);
4096 /* Writer: pointers */
4097 if (!partial)
4098 partial = chain + k-1;
4099 /*
4100 * If the branch acquired continuation since we've looked at it -
4101 * fine, it should all survive and (new) top doesn't belong to us.
4102 */
4103 if (!partial->key && *partial->p)
4104 /* Writer: end */
4105 goto no_top;
4106 for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
4107 ;
4108 /*
4109 * OK, we've found the last block that must survive. The rest of our
4110 * branch should be detached before unlocking. However, if that rest
4111 * of branch is all ours and does not grow immediately from the inode
4112 * it's easier to cheat and just decrement partial->p.
4113 */
4114 if (p == chain + k - 1 && p > chain) {
4115 p->p--;
4116 } else {
4117 *top = *p->p;
4118 /* Nope, don't do this in ext4. Must leave the tree intact */
4119#if 0
4120 *p->p = 0;
4121#endif
4122 }
4123 /* Writer: end */
4124
4125 while (partial > p) {
4126 brelse(partial->bh);
4127 partial--;
4128 }
4129no_top:
4130 return partial;
4131}
4132
4133/*
4134 * Zero a number of block pointers in either an inode or an indirect block.
4135 * If we restart the transaction we must again get write access to the
4136 * indirect block for further modification.
4137 *
4138 * We release `count' blocks on disk, but (last - first) may be greater
4139 * than `count' because there can be holes in there.
4140 *
4141 * Return 0 on success, 1 on invalid block range
4142 * and < 0 on fatal error.
4143 */
4144static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4145 struct buffer_head *bh,
4146 ext4_fsblk_t block_to_free,
4147 unsigned long count, __le32 *first,
4148 __le32 *last)
4149{
4150 __le32 *p;
4151 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4152 int err;
4153
4154 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4155 flags |= EXT4_FREE_BLOCKS_METADATA;
4156
4157 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
4158 count)) {
4159 EXT4_ERROR_INODE(inode, "attempt to clear invalid "
4160 "blocks %llu len %lu",
4161 (unsigned long long) block_to_free, count);
4162 return 1;
4163 }
4164
4165 if (try_to_extend_transaction(handle, inode)) {
4166 if (bh) {
4167 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4168 err = ext4_handle_dirty_metadata(handle, inode, bh);
4169 if (unlikely(err))
4170 goto out_err;
4171 }
4172 err = ext4_mark_inode_dirty(handle, inode);
4173 if (unlikely(err))
4174 goto out_err;
4175 err = ext4_truncate_restart_trans(handle, inode,
4176 blocks_for_truncate(inode));
4177 if (unlikely(err))
4178 goto out_err;
4179 if (bh) {
4180 BUFFER_TRACE(bh, "retaking write access");
4181 err = ext4_journal_get_write_access(handle, bh);
4182 if (unlikely(err))
4183 goto out_err;
4184 }
4185 }
4186
4187 for (p = first; p < last; p++)
4188 *p = 0;
4189
4190 ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
4191 return 0;
4192out_err:
4193 ext4_std_error(inode->i_sb, err);
4194 return err;
4195}
4196
4197/**
4198 * ext4_free_data - free a list of data blocks
4199 * @handle: handle for this transaction
4200 * @inode: inode we are dealing with
4201 * @this_bh: indirect buffer_head which contains *@first and *@last
4202 * @first: array of block numbers
4203 * @last: points immediately past the end of array
4204 *
4205 * We are freeing all blocks referred from that array (numbers are stored as
4206 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
4207 *
4208 * We accumulate contiguous runs of blocks to free. Conveniently, if these
4209 * blocks are contiguous then releasing them at one time will only affect one
4210 * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
4211 * actually use a lot of journal space.
4212 *
4213 * @this_bh will be %NULL if @first and @last point into the inode's direct
4214 * block pointers.
4215 */
4216static void ext4_free_data(handle_t *handle, struct inode *inode,
4217 struct buffer_head *this_bh,
4218 __le32 *first, __le32 *last)
4219{
4220 ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */
4221 unsigned long count = 0; /* Number of blocks in the run */
4222 __le32 *block_to_free_p = NULL; /* Pointer into inode/ind
4223 corresponding to
4224 block_to_free */
4225 ext4_fsblk_t nr; /* Current block # */
4226 __le32 *p; /* Pointer into inode/ind
4227 for current block */
4228 int err = 0;
4229
4230 if (this_bh) { /* For indirect block */
4231 BUFFER_TRACE(this_bh, "get_write_access");
4232 err = ext4_journal_get_write_access(handle, this_bh);
4233 /* Important: if we can't update the indirect pointers
4234 * to the blocks, we can't free them. */
4235 if (err)
4236 return;
4237 }
4238
4239 for (p = first; p < last; p++) {
4240 nr = le32_to_cpu(*p);
4241 if (nr) {
4242 /* accumulate blocks to free if they're contiguous */
4243 if (count == 0) {
4244 block_to_free = nr;
4245 block_to_free_p = p;
4246 count = 1;
4247 } else if (nr == block_to_free + count) {
4248 count++;
4249 } else {
4250 err = ext4_clear_blocks(handle, inode, this_bh,
4251 block_to_free, count,
4252 block_to_free_p, p);
4253 if (err)
4254 break;
4255 block_to_free = nr;
4256 block_to_free_p = p;
4257 count = 1;
4258 }
4259 }
4260 }
4261
4262 if (!err && count > 0)
4263 err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
4264 count, block_to_free_p, p);
4265 if (err < 0)
4266 /* fatal error */
4267 return;
4268
4269 if (this_bh) {
4270 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
4271
4272 /*
4273 * The buffer head should have an attached journal head at this
4274 * point. However, if the data is corrupted and an indirect
4275 * block pointed to itself, it would have been detached when
4276 * the block was cleared. Check for this instead of OOPSing.
4277 */
4278 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
4279 ext4_handle_dirty_metadata(handle, inode, this_bh);
4280 else
4281 EXT4_ERROR_INODE(inode,
4282 "circular indirect block detected at "
4283 "block %llu",
4284 (unsigned long long) this_bh->b_blocknr);
4285 }
4286}
4287
4288/**
4289 * ext4_free_branches - free an array of branches
4290 * @handle: JBD handle for this transaction
4291 * @inode: inode we are dealing with
4292 * @parent_bh: the buffer_head which contains *@first and *@last
4293 * @first: array of block numbers
4294 * @last: pointer immediately past the end of array
4295 * @depth: depth of the branches to free
4296 *
4297 * We are freeing all blocks referred from these branches (numbers are
4298 * stored as little-endian 32-bit) and updating @inode->i_blocks
4299 * appropriately.
4300 */
4301static void ext4_free_branches(handle_t *handle, struct inode *inode,
4302 struct buffer_head *parent_bh,
4303 __le32 *first, __le32 *last, int depth)
4304{
4305 ext4_fsblk_t nr;
4306 __le32 *p;
4307
4308 if (ext4_handle_is_aborted(handle))
4309 return;
4310
4311 if (depth--) {
4312 struct buffer_head *bh;
4313 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
4314 p = last;
4315 while (--p >= first) {
4316 nr = le32_to_cpu(*p);
4317 if (!nr)
4318 continue; /* A hole */
4319
4320 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
4321 nr, 1)) {
4322 EXT4_ERROR_INODE(inode,
4323 "invalid indirect mapped "
4324 "block %lu (level %d)",
4325 (unsigned long) nr, depth);
4326 break;
4327 }
4328
4329 /* Go read the buffer for the next level down */
4330 bh = sb_bread(inode->i_sb, nr);
4331
4332 /*
4333 * A read failure? Report error and clear slot
4334 * (should be rare).
4335 */
4336 if (!bh) {
4337 EXT4_ERROR_INODE_BLOCK(inode, nr,
4338 "Read failure");
4339 continue;
4340 }
4341
4342 /* This zaps the entire block. Bottom up. */
4343 BUFFER_TRACE(bh, "free child branches");
4344 ext4_free_branches(handle, inode, bh,
4345 (__le32 *) bh->b_data,
4346 (__le32 *) bh->b_data + addr_per_block,
4347 depth);
4348 brelse(bh);
4349
4350 /*
4351 * Everything below this this pointer has been
4352 * released. Now let this top-of-subtree go.
4353 *
4354 * We want the freeing of this indirect block to be
4355 * atomic in the journal with the updating of the
4356 * bitmap block which owns it. So make some room in
4357 * the journal.
4358 *
4359 * We zero the parent pointer *after* freeing its
4360 * pointee in the bitmaps, so if extend_transaction()
4361 * for some reason fails to put the bitmap changes and
4362 * the release into the same transaction, recovery
4363 * will merely complain about releasing a free block,
4364 * rather than leaking blocks.
4365 */
4366 if (ext4_handle_is_aborted(handle))
4367 return;
4368 if (try_to_extend_transaction(handle, inode)) {
4369 ext4_mark_inode_dirty(handle, inode);
4370 ext4_truncate_restart_trans(handle, inode,
4371 blocks_for_truncate(inode));
4372 }
4373
4374 /*
4375 * The forget flag here is critical because if
4376 * we are journaling (and not doing data
4377 * journaling), we have to make sure a revoke
4378 * record is written to prevent the journal
4379 * replay from overwriting the (former)
4380 * indirect block if it gets reallocated as a
4381 * data block. This must happen in the same
4382 * transaction where the data blocks are
4383 * actually freed.
4384 */
4385 ext4_free_blocks(handle, inode, NULL, nr, 1,
4386 EXT4_FREE_BLOCKS_METADATA|
4387 EXT4_FREE_BLOCKS_FORGET);
4388
4389 if (parent_bh) {
4390 /*
4391 * The block which we have just freed is
4392 * pointed to by an indirect block: journal it
4393 */
4394 BUFFER_TRACE(parent_bh, "get_write_access");
4395 if (!ext4_journal_get_write_access(handle,
4396 parent_bh)){
4397 *p = 0;
4398 BUFFER_TRACE(parent_bh,
4399 "call ext4_handle_dirty_metadata");
4400 ext4_handle_dirty_metadata(handle,
4401 inode,
4402 parent_bh);
4403 }
4404 }
4405 }
4406 } else {
4407 /* We have reached the bottom of the tree. */
4408 BUFFER_TRACE(parent_bh, "free data blocks");
4409 ext4_free_data(handle, inode, parent_bh, first, last);
4410 }
4411}
4412
4413int ext4_can_truncate(struct inode *inode) 3066int ext4_can_truncate(struct inode *inode)
4414{ 3067{
4415 if (S_ISREG(inode->i_mode)) 3068 if (S_ISREG(inode->i_mode))
@@ -4476,19 +3129,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
4476 */ 3129 */
4477void ext4_truncate(struct inode *inode) 3130void ext4_truncate(struct inode *inode)
4478{ 3131{
4479 handle_t *handle;
4480 struct ext4_inode_info *ei = EXT4_I(inode);
4481 __le32 *i_data = ei->i_data;
4482 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
4483 struct address_space *mapping = inode->i_mapping;
4484 ext4_lblk_t offsets[4];
4485 Indirect chain[4];
4486 Indirect *partial;
4487 __le32 nr = 0;
4488 int n = 0;
4489 ext4_lblk_t last_block, max_block;
4490 unsigned blocksize = inode->i_sb->s_blocksize;
4491
4492 trace_ext4_truncate_enter(inode); 3132 trace_ext4_truncate_enter(inode);
4493 3133
4494 if (!ext4_can_truncate(inode)) 3134 if (!ext4_can_truncate(inode))
@@ -4499,149 +3139,11 @@ void ext4_truncate(struct inode *inode)
4499 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 3139 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
4500 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 3140 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
4501 3141
4502 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3142 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4503 ext4_ext_truncate(inode); 3143 ext4_ext_truncate(inode);
4504 trace_ext4_truncate_exit(inode); 3144 else
4505 return; 3145 ext4_ind_truncate(inode);
4506 }
4507
4508 handle = start_transaction(inode);
4509 if (IS_ERR(handle))
4510 return; /* AKPM: return what? */
4511
4512 last_block = (inode->i_size + blocksize-1)
4513 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4514 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
4515 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4516
4517 if (inode->i_size & (blocksize - 1))
4518 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
4519 goto out_stop;
4520
4521 if (last_block != max_block) {
4522 n = ext4_block_to_path(inode, last_block, offsets, NULL);
4523 if (n == 0)
4524 goto out_stop; /* error */
4525 }
4526
4527 /*
4528 * OK. This truncate is going to happen. We add the inode to the
4529 * orphan list, so that if this truncate spans multiple transactions,
4530 * and we crash, we will resume the truncate when the filesystem
4531 * recovers. It also marks the inode dirty, to catch the new size.
4532 *
4533 * Implication: the file must always be in a sane, consistent
4534 * truncatable state while each transaction commits.
4535 */
4536 if (ext4_orphan_add(handle, inode))
4537 goto out_stop;
4538
4539 /*
4540 * From here we block out all ext4_get_block() callers who want to
4541 * modify the block allocation tree.
4542 */
4543 down_write(&ei->i_data_sem);
4544
4545 ext4_discard_preallocations(inode);
4546
4547 /*
4548 * The orphan list entry will now protect us from any crash which
4549 * occurs before the truncate completes, so it is now safe to propagate
4550 * the new, shorter inode size (held for now in i_size) into the
4551 * on-disk inode. We do this via i_disksize, which is the value which
4552 * ext4 *really* writes onto the disk inode.
4553 */
4554 ei->i_disksize = inode->i_size;
4555
4556 if (last_block == max_block) {
4557 /*
4558 * It is unnecessary to free any data blocks if last_block is
4559 * equal to the indirect block limit.
4560 */
4561 goto out_unlock;
4562 } else if (n == 1) { /* direct blocks */
4563 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
4564 i_data + EXT4_NDIR_BLOCKS);
4565 goto do_indirects;
4566 }
4567
4568 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
4569 /* Kill the top of shared branch (not detached) */
4570 if (nr) {
4571 if (partial == chain) {
4572 /* Shared branch grows from the inode */
4573 ext4_free_branches(handle, inode, NULL,
4574 &nr, &nr+1, (chain+n-1) - partial);
4575 *partial->p = 0;
4576 /*
4577 * We mark the inode dirty prior to restart,
4578 * and prior to stop. No need for it here.
4579 */
4580 } else {
4581 /* Shared branch grows from an indirect block */
4582 BUFFER_TRACE(partial->bh, "get_write_access");
4583 ext4_free_branches(handle, inode, partial->bh,
4584 partial->p,
4585 partial->p+1, (chain+n-1) - partial);
4586 }
4587 }
4588 /* Clear the ends of indirect blocks on the shared branch */
4589 while (partial > chain) {
4590 ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
4591 (__le32*)partial->bh->b_data+addr_per_block,
4592 (chain+n-1) - partial);
4593 BUFFER_TRACE(partial->bh, "call brelse");
4594 brelse(partial->bh);
4595 partial--;
4596 }
4597do_indirects:
4598 /* Kill the remaining (whole) subtrees */
4599 switch (offsets[0]) {
4600 default:
4601 nr = i_data[EXT4_IND_BLOCK];
4602 if (nr) {
4603 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
4604 i_data[EXT4_IND_BLOCK] = 0;
4605 }
4606 case EXT4_IND_BLOCK:
4607 nr = i_data[EXT4_DIND_BLOCK];
4608 if (nr) {
4609 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
4610 i_data[EXT4_DIND_BLOCK] = 0;
4611 }
4612 case EXT4_DIND_BLOCK:
4613 nr = i_data[EXT4_TIND_BLOCK];
4614 if (nr) {
4615 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
4616 i_data[EXT4_TIND_BLOCK] = 0;
4617 }
4618 case EXT4_TIND_BLOCK:
4619 ;
4620 }
4621
4622out_unlock:
4623 up_write(&ei->i_data_sem);
4624 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4625 ext4_mark_inode_dirty(handle, inode);
4626
4627 /*
4628 * In a multi-transaction truncate, we only make the final transaction
4629 * synchronous
4630 */
4631 if (IS_SYNC(inode))
4632 ext4_handle_sync(handle);
4633out_stop:
4634 /*
4635 * If this was a simple ftruncate(), and the file will remain alive
4636 * then we need to clear up the orphan record which we created above.
4637 * However, if this was a real unlink then we were called by
4638 * ext4_delete_inode(), and we allow that function to clean up the
4639 * orphan info for us.
4640 */
4641 if (inode->i_nlink)
4642 ext4_orphan_del(handle, inode);
4643 3146
4644 ext4_journal_stop(handle);
4645 trace_ext4_truncate_exit(inode); 3147 trace_ext4_truncate_exit(inode);
4646} 3148}
4647 3149
@@ -5012,7 +3514,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
5012 (S_ISLNK(inode->i_mode) && 3514 (S_ISLNK(inode->i_mode) &&
5013 !ext4_inode_is_fast_symlink(inode))) { 3515 !ext4_inode_is_fast_symlink(inode))) {
5014 /* Validate block references which are part of inode */ 3516 /* Validate block references which are part of inode */
5015 ret = ext4_check_inode_blockref(inode); 3517 ret = ext4_ind_check_inode(inode);
5016 } 3518 }
5017 if (ret) 3519 if (ret)
5018 goto bad_inode; 3520 goto bad_inode;
@@ -5459,34 +3961,10 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
5459 return 0; 3961 return 0;
5460} 3962}
5461 3963
5462static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
5463 int chunk)
5464{
5465 int indirects;
5466
5467 /* if nrblocks are contiguous */
5468 if (chunk) {
5469 /*
5470 * With N contiguous data blocks, we need at most
5471 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
5472 * 2 dindirect blocks, and 1 tindirect block
5473 */
5474 return DIV_ROUND_UP(nrblocks,
5475 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
5476 }
5477 /*
5478 * if nrblocks are not contiguous, worse case, each block touch
5479 * a indirect block, and each indirect block touch a double indirect
5480 * block, plus a triple indirect block
5481 */
5482 indirects = nrblocks * 2 + 1;
5483 return indirects;
5484}
5485
5486static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 3964static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5487{ 3965{
5488 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 3966 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
5489 return ext4_indirect_trans_blocks(inode, nrblocks, chunk); 3967 return ext4_ind_trans_blocks(inode, nrblocks, chunk);
5490 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); 3968 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
5491} 3969}
5492 3970
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 808c554e773..f18bfe37aff 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -202,8 +202,9 @@ setversion_out:
202 struct super_block *sb = inode->i_sb; 202 struct super_block *sb = inode->i_sb;
203 int err, err2=0; 203 int err, err2=0;
204 204
205 if (!capable(CAP_SYS_RESOURCE)) 205 err = ext4_resize_begin(sb);
206 return -EPERM; 206 if (err)
207 return err;
207 208
208 if (get_user(n_blocks_count, (__u32 __user *)arg)) 209 if (get_user(n_blocks_count, (__u32 __user *)arg))
209 return -EFAULT; 210 return -EFAULT;
@@ -221,6 +222,7 @@ setversion_out:
221 if (err == 0) 222 if (err == 0)
222 err = err2; 223 err = err2;
223 mnt_drop_write(filp->f_path.mnt); 224 mnt_drop_write(filp->f_path.mnt);
225 ext4_resize_end(sb);
224 226
225 return err; 227 return err;
226 } 228 }
@@ -271,8 +273,9 @@ mext_out:
271 struct super_block *sb = inode->i_sb; 273 struct super_block *sb = inode->i_sb;
272 int err, err2=0; 274 int err, err2=0;
273 275
274 if (!capable(CAP_SYS_RESOURCE)) 276 err = ext4_resize_begin(sb);
275 return -EPERM; 277 if (err)
278 return err;
276 279
277 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 280 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
278 sizeof(input))) 281 sizeof(input)))
@@ -291,6 +294,7 @@ mext_out:
291 if (err == 0) 294 if (err == 0)
292 err = err2; 295 err = err2;
293 mnt_drop_write(filp->f_path.mnt); 296 mnt_drop_write(filp->f_path.mnt);
297 ext4_resize_end(sb);
294 298
295 return err; 299 return err;
296 } 300 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6ed859d5685..17a5a57c415 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -75,8 +75,8 @@
75 * 75 *
76 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
77 * block. If only the logical file block falls within the range of prealloc 77 * block. If only the logical file block falls within the range of prealloc
78 * space we will consume the particular prealloc space. This make sure that 78 * space we will consume the particular prealloc space. This makes sure that
79 * that the we have contiguous physical blocks representing the file blocks 79 * we have contiguous physical blocks representing the file blocks
80 * 80 *
81 * The important thing to be noted in case of inode prealloc space is that 81 * The important thing to be noted in case of inode prealloc space is that
82 * we don't modify the values associated to inode prealloc space except 82 * we don't modify the values associated to inode prealloc space except
@@ -84,7 +84,7 @@
84 * 84 *
85 * If we are not able to find blocks in the inode prealloc space and if we 85 * If we are not able to find blocks in the inode prealloc space and if we
86 * have the group allocation flag set then we look at the locality group 86 * have the group allocation flag set then we look at the locality group
87 * prealloc space. These are per CPU prealloc list repreasented as 87 * prealloc space. These are per CPU prealloc list represented as
88 * 88 *
89 * ext4_sb_info.s_locality_groups[smp_processor_id()] 89 * ext4_sb_info.s_locality_groups[smp_processor_id()]
90 * 90 *
@@ -128,12 +128,13 @@
128 * we are doing a group prealloc we try to normalize the request to 128 * we are doing a group prealloc we try to normalize the request to
129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is 129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
130 * 512 blocks. This can be tuned via 130 * 512 blocks. This can be tuned via
131 * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in 131 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
132 * terms of number of blocks. If we have mounted the file system with -O 132 * terms of number of blocks. If we have mounted the file system with -O
133 * stripe=<value> option the group prealloc request is normalized to the 133 * stripe=<value> option the group prealloc request is normalized to the
134 * stripe value (sbi->s_stripe) 134 * the smallest multiple of the stripe value (sbi->s_stripe) which is
135 * greater than the default mb_group_prealloc.
135 * 136 *
136 * The regular allocator(using the buddy cache) supports few tunables. 137 * The regular allocator (using the buddy cache) supports a few tunables.
137 * 138 *
138 * /sys/fs/ext4/<partition>/mb_min_to_scan 139 * /sys/fs/ext4/<partition>/mb_min_to_scan
139 * /sys/fs/ext4/<partition>/mb_max_to_scan 140 * /sys/fs/ext4/<partition>/mb_max_to_scan
@@ -152,7 +153,7 @@
152 * best extent in the found extents. Searching for the blocks starts with 153 * best extent in the found extents. Searching for the blocks starts with
153 * the group specified as the goal value in allocation context via 154 * the group specified as the goal value in allocation context via
154 * ac_g_ex. Each group is first checked based on the criteria whether it 155 * ac_g_ex. Each group is first checked based on the criteria whether it
155 * can used for allocation. ext4_mb_good_group explains how the groups are 156 * can be used for allocation. ext4_mb_good_group explains how the groups are
156 * checked. 157 * checked.
157 * 158 *
158 * Both the prealloc space are getting populated as above. So for the first 159 * Both the prealloc space are getting populated as above. So for the first
@@ -492,10 +493,11 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
492 b2 = (unsigned char *) bitmap; 493 b2 = (unsigned char *) bitmap;
493 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { 494 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
494 if (b1[i] != b2[i]) { 495 if (b1[i] != b2[i]) {
495 printk(KERN_ERR "corruption in group %u " 496 ext4_msg(e4b->bd_sb, KERN_ERR,
496 "at byte %u(%u): %x in copy != %x " 497 "corruption in group %u "
497 "on disk/prealloc\n", 498 "at byte %u(%u): %x in copy != %x "
498 e4b->bd_group, i, i * 8, b1[i], b2[i]); 499 "on disk/prealloc",
500 e4b->bd_group, i, i * 8, b1[i], b2[i]);
499 BUG(); 501 BUG();
500 } 502 }
501 } 503 }
@@ -1125,7 +1127,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1125 grp = ext4_get_group_info(sb, group); 1127 grp = ext4_get_group_info(sb, group);
1126 1128
1127 e4b->bd_blkbits = sb->s_blocksize_bits; 1129 e4b->bd_blkbits = sb->s_blocksize_bits;
1128 e4b->bd_info = ext4_get_group_info(sb, group); 1130 e4b->bd_info = grp;
1129 e4b->bd_sb = sb; 1131 e4b->bd_sb = sb;
1130 e4b->bd_group = group; 1132 e4b->bd_group = group;
1131 e4b->bd_buddy_page = NULL; 1133 e4b->bd_buddy_page = NULL;
@@ -1281,7 +1283,7 @@ static void mb_clear_bits(void *bm, int cur, int len)
1281 } 1283 }
1282} 1284}
1283 1285
1284static void mb_set_bits(void *bm, int cur, int len) 1286void ext4_set_bits(void *bm, int cur, int len)
1285{ 1287{
1286 __u32 *addr; 1288 __u32 *addr;
1287 1289
@@ -1510,7 +1512,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1510 } 1512 }
1511 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1513 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1512 1514
1513 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); 1515 ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1514 mb_check_buddy(e4b); 1516 mb_check_buddy(e4b);
1515 1517
1516 return ret; 1518 return ret;
@@ -2223,8 +2225,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2223 EXT4_DESC_PER_BLOCK_BITS(sb); 2225 EXT4_DESC_PER_BLOCK_BITS(sb);
2224 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2226 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2225 if (meta_group_info == NULL) { 2227 if (meta_group_info == NULL) {
2226 printk(KERN_ERR "EXT4-fs: can't allocate mem for a " 2228 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
2227 "buddy group\n"); 2229 "for a buddy group");
2228 goto exit_meta_group_info; 2230 goto exit_meta_group_info;
2229 } 2231 }
2230 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = 2232 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
@@ -2237,7 +2239,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2237 2239
2238 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2240 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2239 if (meta_group_info[i] == NULL) { 2241 if (meta_group_info[i] == NULL) {
2240 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2242 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
2241 goto exit_group_info; 2243 goto exit_group_info;
2242 } 2244 }
2243 memset(meta_group_info[i], 0, kmem_cache_size(cachep)); 2245 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
@@ -2279,8 +2281,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2279 2281
2280exit_group_info: 2282exit_group_info:
2281 /* If a meta_group_info table has been allocated, release it now */ 2283 /* If a meta_group_info table has been allocated, release it now */
2282 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) 2284 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2283 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); 2285 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2286 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2287 }
2284exit_meta_group_info: 2288exit_meta_group_info:
2285 return -ENOMEM; 2289 return -ENOMEM;
2286} /* ext4_mb_add_groupinfo */ 2290} /* ext4_mb_add_groupinfo */
@@ -2328,23 +2332,26 @@ static int ext4_mb_init_backend(struct super_block *sb)
2328 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte 2332 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2329 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. 2333 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2330 * So a two level scheme suffices for now. */ 2334 * So a two level scheme suffices for now. */
2331 sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); 2335 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2332 if (sbi->s_group_info == NULL) { 2336 if (sbi->s_group_info == NULL) {
2333 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); 2337 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2334 return -ENOMEM; 2338 return -ENOMEM;
2335 } 2339 }
2336 sbi->s_buddy_cache = new_inode(sb); 2340 sbi->s_buddy_cache = new_inode(sb);
2337 if (sbi->s_buddy_cache == NULL) { 2341 if (sbi->s_buddy_cache == NULL) {
2338 printk(KERN_ERR "EXT4-fs: can't get new inode\n"); 2342 ext4_msg(sb, KERN_ERR, "can't get new inode");
2339 goto err_freesgi; 2343 goto err_freesgi;
2340 } 2344 }
2341 sbi->s_buddy_cache->i_ino = get_next_ino(); 2345 /* To avoid potentially colliding with an valid on-disk inode number,
2346 * use EXT4_BAD_INO for the buddy cache inode number. This inode is
2347 * not in the inode hash, so it should never be found by iget(), but
2348 * this will avoid confusion if it ever shows up during debugging. */
2349 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2342 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; 2350 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2343 for (i = 0; i < ngroups; i++) { 2351 for (i = 0; i < ngroups; i++) {
2344 desc = ext4_get_group_desc(sb, i, NULL); 2352 desc = ext4_get_group_desc(sb, i, NULL);
2345 if (desc == NULL) { 2353 if (desc == NULL) {
2346 printk(KERN_ERR 2354 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2347 "EXT4-fs: can't read descriptor %u\n", i);
2348 goto err_freebuddy; 2355 goto err_freebuddy;
2349 } 2356 }
2350 if (ext4_mb_add_groupinfo(sb, i, desc) != 0) 2357 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
@@ -2362,7 +2369,7 @@ err_freebuddy:
2362 kfree(sbi->s_group_info[i]); 2369 kfree(sbi->s_group_info[i]);
2363 iput(sbi->s_buddy_cache); 2370 iput(sbi->s_buddy_cache);
2364err_freesgi: 2371err_freesgi:
2365 kfree(sbi->s_group_info); 2372 ext4_kvfree(sbi->s_group_info);
2366 return -ENOMEM; 2373 return -ENOMEM;
2367} 2374}
2368 2375
@@ -2404,14 +2411,15 @@ static int ext4_groupinfo_create_slab(size_t size)
2404 slab_size, 0, SLAB_RECLAIM_ACCOUNT, 2411 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2405 NULL); 2412 NULL);
2406 2413
2414 ext4_groupinfo_caches[cache_index] = cachep;
2415
2407 mutex_unlock(&ext4_grpinfo_slab_create_mutex); 2416 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2408 if (!cachep) { 2417 if (!cachep) {
2409 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); 2418 printk(KERN_EMERG
2419 "EXT4-fs: no memory for groupinfo slab cache\n");
2410 return -ENOMEM; 2420 return -ENOMEM;
2411 } 2421 }
2412 2422
2413 ext4_groupinfo_caches[cache_index] = cachep;
2414
2415 return 0; 2423 return 0;
2416} 2424}
2417 2425
@@ -2457,12 +2465,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2457 i++; 2465 i++;
2458 } while (i <= sb->s_blocksize_bits + 1); 2466 } while (i <= sb->s_blocksize_bits + 1);
2459 2467
2460 /* init file for buddy data */
2461 ret = ext4_mb_init_backend(sb);
2462 if (ret != 0) {
2463 goto out;
2464 }
2465
2466 spin_lock_init(&sbi->s_md_lock); 2468 spin_lock_init(&sbi->s_md_lock);
2467 spin_lock_init(&sbi->s_bal_lock); 2469 spin_lock_init(&sbi->s_bal_lock);
2468 2470
@@ -2472,6 +2474,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2472 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2473 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2474 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2477 /*
2478 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
2479 * to the lowest multiple of s_stripe which is bigger than
2480 * the s_mb_group_prealloc as determined above. We want
2481 * the preallocation size to be an exact multiple of the
2482 * RAID stripe size so that preallocations don't fragment
2483 * the stripes.
2484 */
2485 if (sbi->s_stripe > 1) {
2486 sbi->s_mb_group_prealloc = roundup(
2487 sbi->s_mb_group_prealloc, sbi->s_stripe);
2488 }
2475 2489
2476 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2490 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2477 if (sbi->s_locality_groups == NULL) { 2491 if (sbi->s_locality_groups == NULL) {
@@ -2487,6 +2501,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2487 spin_lock_init(&lg->lg_prealloc_lock); 2501 spin_lock_init(&lg->lg_prealloc_lock);
2488 } 2502 }
2489 2503
2504 /* init file for buddy data */
2505 ret = ext4_mb_init_backend(sb);
2506 if (ret != 0) {
2507 goto out;
2508 }
2509
2490 if (sbi->s_proc) 2510 if (sbi->s_proc)
2491 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2511 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2492 &ext4_mb_seq_groups_fops, sb); 2512 &ext4_mb_seq_groups_fops, sb);
@@ -2544,32 +2564,32 @@ int ext4_mb_release(struct super_block *sb)
2544 EXT4_DESC_PER_BLOCK_BITS(sb); 2564 EXT4_DESC_PER_BLOCK_BITS(sb);
2545 for (i = 0; i < num_meta_group_infos; i++) 2565 for (i = 0; i < num_meta_group_infos; i++)
2546 kfree(sbi->s_group_info[i]); 2566 kfree(sbi->s_group_info[i]);
2547 kfree(sbi->s_group_info); 2567 ext4_kvfree(sbi->s_group_info);
2548 } 2568 }
2549 kfree(sbi->s_mb_offsets); 2569 kfree(sbi->s_mb_offsets);
2550 kfree(sbi->s_mb_maxs); 2570 kfree(sbi->s_mb_maxs);
2551 if (sbi->s_buddy_cache) 2571 if (sbi->s_buddy_cache)
2552 iput(sbi->s_buddy_cache); 2572 iput(sbi->s_buddy_cache);
2553 if (sbi->s_mb_stats) { 2573 if (sbi->s_mb_stats) {
2554 printk(KERN_INFO 2574 ext4_msg(sb, KERN_INFO,
2555 "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n", 2575 "mballoc: %u blocks %u reqs (%u success)",
2556 atomic_read(&sbi->s_bal_allocated), 2576 atomic_read(&sbi->s_bal_allocated),
2557 atomic_read(&sbi->s_bal_reqs), 2577 atomic_read(&sbi->s_bal_reqs),
2558 atomic_read(&sbi->s_bal_success)); 2578 atomic_read(&sbi->s_bal_success));
2559 printk(KERN_INFO 2579 ext4_msg(sb, KERN_INFO,
2560 "EXT4-fs: mballoc: %u extents scanned, %u goal hits, " 2580 "mballoc: %u extents scanned, %u goal hits, "
2561 "%u 2^N hits, %u breaks, %u lost\n", 2581 "%u 2^N hits, %u breaks, %u lost",
2562 atomic_read(&sbi->s_bal_ex_scanned), 2582 atomic_read(&sbi->s_bal_ex_scanned),
2563 atomic_read(&sbi->s_bal_goals), 2583 atomic_read(&sbi->s_bal_goals),
2564 atomic_read(&sbi->s_bal_2orders), 2584 atomic_read(&sbi->s_bal_2orders),
2565 atomic_read(&sbi->s_bal_breaks), 2585 atomic_read(&sbi->s_bal_breaks),
2566 atomic_read(&sbi->s_mb_lost_chunks)); 2586 atomic_read(&sbi->s_mb_lost_chunks));
2567 printk(KERN_INFO 2587 ext4_msg(sb, KERN_INFO,
2568 "EXT4-fs: mballoc: %lu generated and it took %Lu\n", 2588 "mballoc: %lu generated and it took %Lu",
2569 sbi->s_mb_buddies_generated++, 2589 sbi->s_mb_buddies_generated,
2570 sbi->s_mb_generation_time); 2590 sbi->s_mb_generation_time);
2571 printk(KERN_INFO 2591 ext4_msg(sb, KERN_INFO,
2572 "EXT4-fs: mballoc: %u preallocated, %u discarded\n", 2592 "mballoc: %u preallocated, %u discarded",
2573 atomic_read(&sbi->s_mb_preallocated), 2593 atomic_read(&sbi->s_mb_preallocated),
2574 atomic_read(&sbi->s_mb_discarded)); 2594 atomic_read(&sbi->s_mb_discarded));
2575 } 2595 }
@@ -2628,6 +2648,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2628 rb_erase(&entry->node, &(db->bb_free_root)); 2648 rb_erase(&entry->node, &(db->bb_free_root));
2629 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); 2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2630 2650
2651 /*
2652 * Clear the trimmed flag for the group so that the next
2653 * ext4_trim_fs can trim it.
2654 * If the volume is mounted with -o discard, online discard
2655 * is supported and the free blocks will be trimmed online.
2656 */
2657 if (!test_opt(sb, DISCARD))
2658 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2659
2631 if (!db->bb_free_root.rb_node) { 2660 if (!db->bb_free_root.rb_node) {
2632 /* No more items in the per group rb tree 2661 /* No more items in the per group rb tree
2633 * balance refcounts from ext4_mb_free_metadata() 2662 * balance refcounts from ext4_mb_free_metadata()
@@ -2771,8 +2800,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2771 * We leak some of the blocks here. 2800 * We leak some of the blocks here.
2772 */ 2801 */
2773 ext4_lock_group(sb, ac->ac_b_ex.fe_group); 2802 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2774 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, 2803 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2775 ac->ac_b_ex.fe_len); 2804 ac->ac_b_ex.fe_len);
2776 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2805 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2777 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2806 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2778 if (!err) 2807 if (!err)
@@ -2790,7 +2819,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2790 } 2819 }
2791 } 2820 }
2792#endif 2821#endif
2793 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); 2822 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2823 ac->ac_b_ex.fe_len);
2794 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2795 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2796 ext4_free_blks_set(sb, gdp, 2826 ext4_free_blks_set(sb, gdp,
@@ -2830,8 +2860,9 @@ out_err:
2830 2860
2831/* 2861/*
2832 * here we normalize request for locality group 2862 * here we normalize request for locality group
2833 * Group request are normalized to s_strip size if we set the same via mount 2863 * Group request are normalized to s_mb_group_prealloc, which goes to
2834 * option. If not we set it to s_mb_group_prealloc which can be configured via 2864 * s_strip if we set the same via mount option.
2865 * s_mb_group_prealloc can be configured via
2835 * /sys/fs/ext4/<partition>/mb_group_prealloc 2866 * /sys/fs/ext4/<partition>/mb_group_prealloc
2836 * 2867 *
2837 * XXX: should we try to preallocate more than the group has now? 2868 * XXX: should we try to preallocate more than the group has now?
@@ -2842,10 +2873,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2842 struct ext4_locality_group *lg = ac->ac_lg; 2873 struct ext4_locality_group *lg = ac->ac_lg;
2843 2874
2844 BUG_ON(lg == NULL); 2875 BUG_ON(lg == NULL);
2845 if (EXT4_SB(sb)->s_stripe) 2876 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2846 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
2847 else
2848 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2849 mb_debug(1, "#%u: goal %u blocks for locality group\n", 2877 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2850 current->pid, ac->ac_g_ex.fe_len); 2878 current->pid, ac->ac_g_ex.fe_len);
2851} 2879}
@@ -3001,9 +3029,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3001 3029
3002 if (start + size <= ac->ac_o_ex.fe_logical && 3030 if (start + size <= ac->ac_o_ex.fe_logical &&
3003 start > ac->ac_o_ex.fe_logical) { 3031 start > ac->ac_o_ex.fe_logical) {
3004 printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n", 3032 ext4_msg(ac->ac_sb, KERN_ERR,
3005 (unsigned long) start, (unsigned long) size, 3033 "start %lu, size %lu, fe_logical %lu",
3006 (unsigned long) ac->ac_o_ex.fe_logical); 3034 (unsigned long) start, (unsigned long) size,
3035 (unsigned long) ac->ac_o_ex.fe_logical);
3007 } 3036 }
3008 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3037 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3009 start > ac->ac_o_ex.fe_logical); 3038 start > ac->ac_o_ex.fe_logical);
@@ -3262,7 +3291,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3262 3291
3263 while (n) { 3292 while (n) {
3264 entry = rb_entry(n, struct ext4_free_data, node); 3293 entry = rb_entry(n, struct ext4_free_data, node);
3265 mb_set_bits(bitmap, entry->start_blk, entry->count); 3294 ext4_set_bits(bitmap, entry->start_blk, entry->count);
3266 n = rb_next(n); 3295 n = rb_next(n);
3267 } 3296 }
3268 return; 3297 return;
@@ -3304,7 +3333,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3304 if (unlikely(len == 0)) 3333 if (unlikely(len == 0))
3305 continue; 3334 continue;
3306 BUG_ON(groupnr != group); 3335 BUG_ON(groupnr != group);
3307 mb_set_bits(bitmap, start, len); 3336 ext4_set_bits(bitmap, start, len);
3308 preallocated += len; 3337 preallocated += len;
3309 count++; 3338 count++;
3310 } 3339 }
@@ -3584,10 +3613,11 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3584 bit = next + 1; 3613 bit = next + 1;
3585 } 3614 }
3586 if (free != pa->pa_free) { 3615 if (free != pa->pa_free) {
3587 printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n", 3616 ext4_msg(e4b->bd_sb, KERN_CRIT,
3588 pa, (unsigned long) pa->pa_lstart, 3617 "pa %p: logic %lu, phys. %lu, len %lu",
3589 (unsigned long) pa->pa_pstart, 3618 pa, (unsigned long) pa->pa_lstart,
3590 (unsigned long) pa->pa_len); 3619 (unsigned long) pa->pa_pstart,
3620 (unsigned long) pa->pa_len);
3591 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", 3621 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3592 free, pa->pa_free); 3622 free, pa->pa_free);
3593 /* 3623 /*
@@ -3775,7 +3805,8 @@ repeat:
3775 * use preallocation while we're discarding it */ 3805 * use preallocation while we're discarding it */
3776 spin_unlock(&pa->pa_lock); 3806 spin_unlock(&pa->pa_lock);
3777 spin_unlock(&ei->i_prealloc_lock); 3807 spin_unlock(&ei->i_prealloc_lock);
3778 printk(KERN_ERR "uh-oh! used pa while discarding\n"); 3808 ext4_msg(sb, KERN_ERR,
3809 "uh-oh! used pa while discarding");
3779 WARN_ON(1); 3810 WARN_ON(1);
3780 schedule_timeout_uninterruptible(HZ); 3811 schedule_timeout_uninterruptible(HZ);
3781 goto repeat; 3812 goto repeat;
@@ -3852,12 +3883,13 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3852 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3883 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3853 return; 3884 return;
3854 3885
3855 printk(KERN_ERR "EXT4-fs: Can't allocate:" 3886 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
3856 " Allocation context details:\n"); 3887 " Allocation context details:");
3857 printk(KERN_ERR "EXT4-fs: status %d flags %d\n", 3888 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
3858 ac->ac_status, ac->ac_flags); 3889 ac->ac_status, ac->ac_flags);
3859 printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, " 3890 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
3860 "best %lu/%lu/%lu@%lu cr %d\n", 3891 "goal %lu/%lu/%lu@%lu, "
3892 "best %lu/%lu/%lu@%lu cr %d",
3861 (unsigned long)ac->ac_o_ex.fe_group, 3893 (unsigned long)ac->ac_o_ex.fe_group,
3862 (unsigned long)ac->ac_o_ex.fe_start, 3894 (unsigned long)ac->ac_o_ex.fe_start,
3863 (unsigned long)ac->ac_o_ex.fe_len, 3895 (unsigned long)ac->ac_o_ex.fe_len,
@@ -3871,9 +3903,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3871 (unsigned long)ac->ac_b_ex.fe_len, 3903 (unsigned long)ac->ac_b_ex.fe_len,
3872 (unsigned long)ac->ac_b_ex.fe_logical, 3904 (unsigned long)ac->ac_b_ex.fe_logical,
3873 (int)ac->ac_criteria); 3905 (int)ac->ac_criteria);
3874 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, 3906 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
3875 ac->ac_found); 3907 ac->ac_ex_scanned, ac->ac_found);
3876 printk(KERN_ERR "EXT4-fs: groups: \n"); 3908 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
3877 ngroups = ext4_get_groups_count(sb); 3909 ngroups = ext4_get_groups_count(sb);
3878 for (i = 0; i < ngroups; i++) { 3910 for (i = 0; i < ngroups; i++) {
3879 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 3911 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
@@ -4637,7 +4669,7 @@ do_more:
4637 } 4669 }
4638 ext4_mark_super_dirty(sb); 4670 ext4_mark_super_dirty(sb);
4639error_return: 4671error_return:
4640 if (freed) 4672 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4641 dquot_free_block(inode, freed); 4673 dquot_free_block(inode, freed);
4642 brelse(bitmap_bh); 4674 brelse(bitmap_bh);
4643 ext4_std_error(sb, err); 4675 ext4_std_error(sb, err);
@@ -4645,7 +4677,7 @@ error_return:
4645} 4677}
4646 4678
4647/** 4679/**
4648 * ext4_add_groupblocks() -- Add given blocks to an existing group 4680 * ext4_group_add_blocks() -- Add given blocks to an existing group
4649 * @handle: handle to this transaction 4681 * @handle: handle to this transaction
4650 * @sb: super block 4682 * @sb: super block
4651 * @block: start physcial block to add to the block group 4683 * @block: start physcial block to add to the block group
@@ -4653,7 +4685,7 @@ error_return:
4653 * 4685 *
4654 * This marks the blocks as free in the bitmap and buddy. 4686 * This marks the blocks as free in the bitmap and buddy.
4655 */ 4687 */
4656void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 4688int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4657 ext4_fsblk_t block, unsigned long count) 4689 ext4_fsblk_t block, unsigned long count)
4658{ 4690{
4659 struct buffer_head *bitmap_bh = NULL; 4691 struct buffer_head *bitmap_bh = NULL;
@@ -4666,25 +4698,35 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
4666 struct ext4_buddy e4b; 4698 struct ext4_buddy e4b;
4667 int err = 0, ret, blk_free_count; 4699 int err = 0, ret, blk_free_count;
4668 ext4_grpblk_t blocks_freed; 4700 ext4_grpblk_t blocks_freed;
4669 struct ext4_group_info *grp;
4670 4701
4671 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); 4702 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4672 4703
4704 if (count == 0)
4705 return 0;
4706
4673 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4707 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4674 grp = ext4_get_group_info(sb, block_group);
4675 /* 4708 /*
4676 * Check to see if we are freeing blocks across a group 4709 * Check to see if we are freeing blocks across a group
4677 * boundary. 4710 * boundary.
4678 */ 4711 */
4679 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) 4712 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4713 ext4_warning(sb, "too much blocks added to group %u\n",
4714 block_group);
4715 err = -EINVAL;
4680 goto error_return; 4716 goto error_return;
4717 }
4681 4718
4682 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4719 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4683 if (!bitmap_bh) 4720 if (!bitmap_bh) {
4721 err = -EIO;
4684 goto error_return; 4722 goto error_return;
4723 }
4724
4685 desc = ext4_get_group_desc(sb, block_group, &gd_bh); 4725 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4686 if (!desc) 4726 if (!desc) {
4727 err = -EIO;
4687 goto error_return; 4728 goto error_return;
4729 }
4688 4730
4689 if (in_range(ext4_block_bitmap(sb, desc), block, count) || 4731 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4690 in_range(ext4_inode_bitmap(sb, desc), block, count) || 4732 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
@@ -4694,6 +4736,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
4694 ext4_error(sb, "Adding blocks in system zones - " 4736 ext4_error(sb, "Adding blocks in system zones - "
4695 "Block = %llu, count = %lu", 4737 "Block = %llu, count = %lu",
4696 block, count); 4738 block, count);
4739 err = -EINVAL;
4697 goto error_return; 4740 goto error_return;
4698 } 4741 }
4699 4742
@@ -4762,7 +4805,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
4762error_return: 4805error_return:
4763 brelse(bitmap_bh); 4806 brelse(bitmap_bh);
4764 ext4_std_error(sb, err); 4807 ext4_std_error(sb, err);
4765 return; 4808 return err;
4766} 4809}
4767 4810
4768/** 4811/**
@@ -4782,6 +4825,8 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
4782{ 4825{
4783 struct ext4_free_extent ex; 4826 struct ext4_free_extent ex;
4784 4827
4828 trace_ext4_trim_extent(sb, group, start, count);
4829
4785 assert_spin_locked(ext4_group_lock_ptr(sb, group)); 4830 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4786 4831
4787 ex.fe_start = start; 4832 ex.fe_start = start;
@@ -4802,7 +4847,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
4802/** 4847/**
4803 * ext4_trim_all_free -- function to trim all free space in alloc. group 4848 * ext4_trim_all_free -- function to trim all free space in alloc. group
4804 * @sb: super block for file system 4849 * @sb: super block for file system
4805 * @e4b: ext4 buddy 4850 * @group: group to be trimmed
4806 * @start: first group block to examine 4851 * @start: first group block to examine
4807 * @max: last group block to examine 4852 * @max: last group block to examine
4808 * @minblocks: minimum extent block count 4853 * @minblocks: minimum extent block count
@@ -4823,10 +4868,12 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4823 ext4_grpblk_t minblocks) 4868 ext4_grpblk_t minblocks)
4824{ 4869{
4825 void *bitmap; 4870 void *bitmap;
4826 ext4_grpblk_t next, count = 0; 4871 ext4_grpblk_t next, count = 0, free_count = 0;
4827 struct ext4_buddy e4b; 4872 struct ext4_buddy e4b;
4828 int ret; 4873 int ret;
4829 4874
4875 trace_ext4_trim_all_free(sb, group, start, max);
4876
4830 ret = ext4_mb_load_buddy(sb, group, &e4b); 4877 ret = ext4_mb_load_buddy(sb, group, &e4b);
4831 if (ret) { 4878 if (ret) {
4832 ext4_error(sb, "Error in loading buddy " 4879 ext4_error(sb, "Error in loading buddy "
@@ -4836,6 +4883,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4836 bitmap = e4b.bd_bitmap; 4883 bitmap = e4b.bd_bitmap;
4837 4884
4838 ext4_lock_group(sb, group); 4885 ext4_lock_group(sb, group);
4886 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
4887 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
4888 goto out;
4889
4839 start = (e4b.bd_info->bb_first_free > start) ? 4890 start = (e4b.bd_info->bb_first_free > start) ?
4840 e4b.bd_info->bb_first_free : start; 4891 e4b.bd_info->bb_first_free : start;
4841 4892
@@ -4850,6 +4901,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4850 next - start, group, &e4b); 4901 next - start, group, &e4b);
4851 count += next - start; 4902 count += next - start;
4852 } 4903 }
4904 free_count += next - start;
4853 start = next + 1; 4905 start = next + 1;
4854 4906
4855 if (fatal_signal_pending(current)) { 4907 if (fatal_signal_pending(current)) {
@@ -4863,9 +4915,13 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4863 ext4_lock_group(sb, group); 4915 ext4_lock_group(sb, group);
4864 } 4916 }
4865 4917
4866 if ((e4b.bd_info->bb_free - count) < minblocks) 4918 if ((e4b.bd_info->bb_free - free_count) < minblocks)
4867 break; 4919 break;
4868 } 4920 }
4921
4922 if (!ret)
4923 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
4924out:
4869 ext4_unlock_group(sb, group); 4925 ext4_unlock_group(sb, group);
4870 ext4_mb_unload_buddy(&e4b); 4926 ext4_mb_unload_buddy(&e4b);
4871 4927
@@ -4904,6 +4960,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4904 4960
4905 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 4961 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4906 return -EINVAL; 4962 return -EINVAL;
4963 if (start + len <= first_data_blk)
4964 goto out;
4907 if (start < first_data_blk) { 4965 if (start < first_data_blk) {
4908 len -= first_data_blk - start; 4966 len -= first_data_blk - start;
4909 start = first_data_blk; 4967 start = first_data_blk;
@@ -4952,5 +5010,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4952 } 5010 }
4953 range->len = trimmed * sb->s_blocksize; 5011 range->len = trimmed * sb->s_blocksize;
4954 5012
5013 if (!ret)
5014 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5015
5016out:
4955 return ret; 5017 return ret;
4956} 5018}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 20b5e7bfebd..9d4a636b546 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -187,7 +187,6 @@ struct ext4_allocation_context {
187 __u16 ac_flags; /* allocation hints */ 187 __u16 ac_flags; /* allocation hints */
188 __u8 ac_status; 188 __u8 ac_status;
189 __u8 ac_criteria; 189 __u8 ac_criteria;
190 __u8 ac_repeats;
191 __u8 ac_2order; /* if request is to allocate 2^N blocks and 190 __u8 ac_2order; /* if request is to allocate 2^N blocks and
192 * N > 0, the field stores N, otherwise 0 */ 191 * N > 0, the field stores N, otherwise 0 */
193 __u8 ac_op; /* operation, for history only */ 192 __u8 ac_op; /* operation, for history only */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8c9babac43d..565a154e22d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -289,7 +289,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
289 while (len--) printk("%c", *name++); 289 while (len--) printk("%c", *name++);
290 ext4fs_dirhash(de->name, de->name_len, &h); 290 ext4fs_dirhash(de->name, de->name_len, &h);
291 printk(":%x.%u ", h.hash, 291 printk(":%x.%u ", h.hash,
292 ((char *) de - base)); 292 (unsigned) ((char *) de - base));
293 } 293 }
294 space += EXT4_DIR_REC_LEN(de->name_len); 294 space += EXT4_DIR_REC_LEN(de->name_len);
295 names++; 295 names++;
@@ -1013,7 +1013,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1013 1013
1014 *err = -ENOENT; 1014 *err = -ENOENT;
1015errout: 1015errout:
1016 dxtrace(printk(KERN_DEBUG "%s not found\n", name)); 1016 dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1017 dx_release (frames); 1017 dx_release (frames);
1018 return NULL; 1018 return NULL;
1019} 1019}
@@ -1985,18 +1985,11 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
1985 if (!list_empty(&EXT4_I(inode)->i_orphan)) 1985 if (!list_empty(&EXT4_I(inode)->i_orphan))
1986 goto out_unlock; 1986 goto out_unlock;
1987 1987
1988 /* Orphan handling is only valid for files with data blocks 1988 /*
1989 * being truncated, or files being unlinked. */ 1989 * Orphan handling is only valid for files with data blocks
1990 1990 * being truncated, or files being unlinked. Note that we either
1991 /* @@@ FIXME: Observation from aviro: 1991 * hold i_mutex, or the inode can not be referenced from outside,
1992 * I think I can trigger J_ASSERT in ext4_orphan_add(). We block 1992 * so i_nlink should not be bumped due to race
1993 * here (on s_orphan_lock), so race with ext4_link() which might bump
1994 * ->i_nlink. For, say it, character device. Not a regular file,
1995 * not a directory, not a symlink and ->i_nlink > 0.
1996 *
1997 * tytso, 4/25/2009: I'm not sure how that could happen;
1998 * shouldn't the fs core protect us from these sort of
1999 * unlink()/link() races?
2000 */ 1993 */
2001 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1994 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2002 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 1995 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7bb8f76d470..430c401d089 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -285,11 +285,7 @@ static int io_submit_init(struct ext4_io_submit *io,
285 io_end = ext4_init_io_end(inode, GFP_NOFS); 285 io_end = ext4_init_io_end(inode, GFP_NOFS);
286 if (!io_end) 286 if (!io_end)
287 return -ENOMEM; 287 return -ENOMEM;
288 do { 288 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
289 bio = bio_alloc(GFP_NOIO, nvecs);
290 nvecs >>= 1;
291 } while (bio == NULL);
292
293 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 289 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
294 bio->bi_bdev = bh->b_bdev; 290 bio->bi_bdev = bh->b_bdev;
295 bio->bi_private = io->io_end = io_end; 291 bio->bi_private = io->io_end = io_end;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 80bbc9c60c2..707d3f16f7c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -16,6 +16,35 @@
16 16
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18 18
19int ext4_resize_begin(struct super_block *sb)
20{
21 int ret = 0;
22
23 if (!capable(CAP_SYS_RESOURCE))
24 return -EPERM;
25
26 /*
27 * We are not allowed to do online-resizing on a filesystem mounted
28 * with error, because it can destroy the filesystem easily.
29 */
30 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
31 ext4_warning(sb, "There are errors in the filesystem, "
32 "so online resizing is not allowed\n");
33 return -EPERM;
34 }
35
36 if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags))
37 ret = -EBUSY;
38
39 return ret;
40}
41
42void ext4_resize_end(struct super_block *sb)
43{
44 clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
45 smp_mb__after_clear_bit();
46}
47
19#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 48#define outside(b, first, last) ((b) < (first) || (b) >= (last))
20#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 49#define inside(b, first, last) ((b) >= (first) && (b) < (last))
21 50
@@ -118,10 +147,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
118 brelse(bh); 147 brelse(bh);
119 bh = ERR_PTR(err); 148 bh = ERR_PTR(err);
120 } else { 149 } else {
121 lock_buffer(bh);
122 memset(bh->b_data, 0, sb->s_blocksize); 150 memset(bh->b_data, 0, sb->s_blocksize);
123 set_buffer_uptodate(bh); 151 set_buffer_uptodate(bh);
124 unlock_buffer(bh);
125 } 152 }
126 153
127 return bh; 154 return bh;
@@ -132,8 +159,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
132 * If that fails, restart the transaction & regain write access for the 159 * If that fails, restart the transaction & regain write access for the
133 * buffer head which is used for block_bitmap modifications. 160 * buffer head which is used for block_bitmap modifications.
134 */ 161 */
135static int extend_or_restart_transaction(handle_t *handle, int thresh, 162static int extend_or_restart_transaction(handle_t *handle, int thresh)
136 struct buffer_head *bh)
137{ 163{
138 int err; 164 int err;
139 165
@@ -144,9 +170,8 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
144 if (err < 0) 170 if (err < 0)
145 return err; 171 return err;
146 if (err) { 172 if (err) {
147 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 173 err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
148 return err; 174 if (err)
149 if ((err = ext4_journal_get_write_access(handle, bh)))
150 return err; 175 return err;
151 } 176 }
152 177
@@ -181,21 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb,
181 if (IS_ERR(handle)) 206 if (IS_ERR(handle))
182 return PTR_ERR(handle); 207 return PTR_ERR(handle);
183 208
184 mutex_lock(&sbi->s_resize_lock); 209 BUG_ON(input->group != sbi->s_groups_count);
185 if (input->group != sbi->s_groups_count) {
186 err = -EBUSY;
187 goto exit_journal;
188 }
189
190 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
191 err = PTR_ERR(bh);
192 goto exit_journal;
193 }
194
195 if (ext4_bg_has_super(sb, input->group)) {
196 ext4_debug("mark backup superblock %#04llx (+0)\n", start);
197 ext4_set_bit(0, bh->b_data);
198 }
199 210
200 /* Copy all of the GDT blocks into the backup in this group */ 211 /* Copy all of the GDT blocks into the backup in this group */
201 for (i = 0, bit = 1, block = start + 1; 212 for (i = 0, bit = 1, block = start + 1;
@@ -203,29 +214,26 @@ static int setup_new_group_blocks(struct super_block *sb,
203 struct buffer_head *gdb; 214 struct buffer_head *gdb;
204 215
205 ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 216 ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
206 217 err = extend_or_restart_transaction(handle, 1);
207 if ((err = extend_or_restart_transaction(handle, 1, bh))) 218 if (err)
208 goto exit_bh; 219 goto exit_journal;
209 220
210 gdb = sb_getblk(sb, block); 221 gdb = sb_getblk(sb, block);
211 if (!gdb) { 222 if (!gdb) {
212 err = -EIO; 223 err = -EIO;
213 goto exit_bh; 224 goto exit_journal;
214 } 225 }
215 if ((err = ext4_journal_get_write_access(handle, gdb))) { 226 if ((err = ext4_journal_get_write_access(handle, gdb))) {
216 brelse(gdb); 227 brelse(gdb);
217 goto exit_bh; 228 goto exit_journal;
218 } 229 }
219 lock_buffer(gdb);
220 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 230 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
221 set_buffer_uptodate(gdb); 231 set_buffer_uptodate(gdb);
222 unlock_buffer(gdb);
223 err = ext4_handle_dirty_metadata(handle, NULL, gdb); 232 err = ext4_handle_dirty_metadata(handle, NULL, gdb);
224 if (unlikely(err)) { 233 if (unlikely(err)) {
225 brelse(gdb); 234 brelse(gdb);
226 goto exit_bh; 235 goto exit_journal;
227 } 236 }
228 ext4_set_bit(bit, bh->b_data);
229 brelse(gdb); 237 brelse(gdb);
230 } 238 }
231 239
@@ -235,9 +243,22 @@ static int setup_new_group_blocks(struct super_block *sb,
235 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, 243 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
236 GFP_NOFS); 244 GFP_NOFS);
237 if (err) 245 if (err)
238 goto exit_bh; 246 goto exit_journal;
239 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) 247
240 ext4_set_bit(bit, bh->b_data); 248 err = extend_or_restart_transaction(handle, 2);
249 if (err)
250 goto exit_journal;
251
252 bh = bclean(handle, sb, input->block_bitmap);
253 if (IS_ERR(bh)) {
254 err = PTR_ERR(bh);
255 goto exit_journal;
256 }
257
258 if (ext4_bg_has_super(sb, input->group)) {
259 ext4_debug("mark backup group tables %#04llx (+0)\n", start);
260 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1);
261 }
241 262
242 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 263 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
243 input->block_bitmap - start); 264 input->block_bitmap - start);
@@ -253,12 +274,9 @@ static int setup_new_group_blocks(struct super_block *sb,
253 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 274 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
254 if (err) 275 if (err)
255 goto exit_bh; 276 goto exit_bh;
256 for (i = 0, bit = input->inode_table - start; 277 ext4_set_bits(bh->b_data, input->inode_table - start,
257 i < sbi->s_itb_per_group; i++, bit++) 278 sbi->s_itb_per_group);
258 ext4_set_bit(bit, bh->b_data);
259 279
260 if ((err = extend_or_restart_transaction(handle, 2, bh)))
261 goto exit_bh;
262 280
263 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 281 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
264 bh->b_data); 282 bh->b_data);
@@ -285,7 +303,6 @@ exit_bh:
285 brelse(bh); 303 brelse(bh);
286 304
287exit_journal: 305exit_journal:
288 mutex_unlock(&sbi->s_resize_lock);
289 if ((err2 = ext4_journal_stop(handle)) && !err) 306 if ((err2 = ext4_journal_stop(handle)) && !err)
290 err = err2; 307 err = err2;
291 308
@@ -377,15 +394,15 @@ static int verify_reserved_gdb(struct super_block *sb,
377 * fail once we start modifying the data on disk, because JBD has no rollback. 394 * fail once we start modifying the data on disk, because JBD has no rollback.
378 */ 395 */
379static int add_new_gdb(handle_t *handle, struct inode *inode, 396static int add_new_gdb(handle_t *handle, struct inode *inode,
380 struct ext4_new_group_data *input, 397 ext4_group_t group)
381 struct buffer_head **primary)
382{ 398{
383 struct super_block *sb = inode->i_sb; 399 struct super_block *sb = inode->i_sb;
384 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 400 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
385 unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 401 unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
386 ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; 402 ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
387 struct buffer_head **o_group_desc, **n_group_desc; 403 struct buffer_head **o_group_desc, **n_group_desc;
388 struct buffer_head *dind; 404 struct buffer_head *dind;
405 struct buffer_head *gdb_bh;
389 int gdbackups; 406 int gdbackups;
390 struct ext4_iloc iloc; 407 struct ext4_iloc iloc;
391 __le32 *data; 408 __le32 *data;
@@ -408,11 +425,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
408 return -EPERM; 425 return -EPERM;
409 } 426 }
410 427
411 *primary = sb_bread(sb, gdblock); 428 gdb_bh = sb_bread(sb, gdblock);
412 if (!*primary) 429 if (!gdb_bh)
413 return -EIO; 430 return -EIO;
414 431
415 if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { 432 gdbackups = verify_reserved_gdb(sb, gdb_bh);
433 if (gdbackups < 0) {
416 err = gdbackups; 434 err = gdbackups;
417 goto exit_bh; 435 goto exit_bh;
418 } 436 }
@@ -427,7 +445,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
427 data = (__le32 *)dind->b_data; 445 data = (__le32 *)dind->b_data;
428 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 446 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
429 ext4_warning(sb, "new group %u GDT block %llu not reserved", 447 ext4_warning(sb, "new group %u GDT block %llu not reserved",
430 input->group, gdblock); 448 group, gdblock);
431 err = -EINVAL; 449 err = -EINVAL;
432 goto exit_dind; 450 goto exit_dind;
433 } 451 }
@@ -436,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
436 if (unlikely(err)) 454 if (unlikely(err))
437 goto exit_dind; 455 goto exit_dind;
438 456
439 err = ext4_journal_get_write_access(handle, *primary); 457 err = ext4_journal_get_write_access(handle, gdb_bh);
440 if (unlikely(err)) 458 if (unlikely(err))
441 goto exit_sbh; 459 goto exit_sbh;
442 460
@@ -449,12 +467,13 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
449 if (unlikely(err)) 467 if (unlikely(err))
450 goto exit_dindj; 468 goto exit_dindj;
451 469
452 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 470 n_group_desc = ext4_kvmalloc((gdb_num + 1) *
453 GFP_NOFS); 471 sizeof(struct buffer_head *),
472 GFP_NOFS);
454 if (!n_group_desc) { 473 if (!n_group_desc) {
455 err = -ENOMEM; 474 err = -ENOMEM;
456 ext4_warning(sb, 475 ext4_warning(sb, "not enough memory for %lu groups",
457 "not enough memory for %lu groups", gdb_num + 1); 476 gdb_num + 1);
458 goto exit_inode; 477 goto exit_inode;
459 } 478 }
460 479
@@ -475,8 +494,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
475 } 494 }
476 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 495 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
477 ext4_mark_iloc_dirty(handle, inode, &iloc); 496 ext4_mark_iloc_dirty(handle, inode, &iloc);
478 memset((*primary)->b_data, 0, sb->s_blocksize); 497 memset(gdb_bh->b_data, 0, sb->s_blocksize);
479 err = ext4_handle_dirty_metadata(handle, NULL, *primary); 498 err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
480 if (unlikely(err)) { 499 if (unlikely(err)) {
481 ext4_std_error(sb, err); 500 ext4_std_error(sb, err);
482 goto exit_inode; 501 goto exit_inode;
@@ -486,10 +505,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
486 o_group_desc = EXT4_SB(sb)->s_group_desc; 505 o_group_desc = EXT4_SB(sb)->s_group_desc;
487 memcpy(n_group_desc, o_group_desc, 506 memcpy(n_group_desc, o_group_desc,
488 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 507 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
489 n_group_desc[gdb_num] = *primary; 508 n_group_desc[gdb_num] = gdb_bh;
490 EXT4_SB(sb)->s_group_desc = n_group_desc; 509 EXT4_SB(sb)->s_group_desc = n_group_desc;
491 EXT4_SB(sb)->s_gdb_count++; 510 EXT4_SB(sb)->s_gdb_count++;
492 kfree(o_group_desc); 511 ext4_kvfree(o_group_desc);
493 512
494 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 513 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
495 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 514 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
@@ -499,6 +518,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
499 return err; 518 return err;
500 519
501exit_inode: 520exit_inode:
521 ext4_kvfree(n_group_desc);
502 /* ext4_handle_release_buffer(handle, iloc.bh); */ 522 /* ext4_handle_release_buffer(handle, iloc.bh); */
503 brelse(iloc.bh); 523 brelse(iloc.bh);
504exit_dindj: 524exit_dindj:
@@ -508,7 +528,7 @@ exit_sbh:
508exit_dind: 528exit_dind:
509 brelse(dind); 529 brelse(dind);
510exit_bh: 530exit_bh:
511 brelse(*primary); 531 brelse(gdb_bh);
512 532
513 ext4_debug("leaving with error %d\n", err); 533 ext4_debug("leaving with error %d\n", err);
514 return err; 534 return err;
@@ -528,7 +548,7 @@ exit_bh:
528 * backup GDT blocks are stored in their reserved primary GDT block. 548 * backup GDT blocks are stored in their reserved primary GDT block.
529 */ 549 */
530static int reserve_backup_gdb(handle_t *handle, struct inode *inode, 550static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
531 struct ext4_new_group_data *input) 551 ext4_group_t group)
532{ 552{
533 struct super_block *sb = inode->i_sb; 553 struct super_block *sb = inode->i_sb;
534 int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); 554 int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
@@ -599,7 +619,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
599 * Finally we can add each of the reserved backup GDT blocks from 619 * Finally we can add each of the reserved backup GDT blocks from
600 * the new group to its reserved primary GDT block. 620 * the new group to its reserved primary GDT block.
601 */ 621 */
602 blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); 622 blk = group * EXT4_BLOCKS_PER_GROUP(sb);
603 for (i = 0; i < reserved_gdb; i++) { 623 for (i = 0; i < reserved_gdb; i++) {
604 int err2; 624 int err2;
605 data = (__le32 *)primary[i]->b_data; 625 data = (__le32 *)primary[i]->b_data;
@@ -799,13 +819,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
799 goto exit_put; 819 goto exit_put;
800 } 820 }
801 821
802 mutex_lock(&sbi->s_resize_lock);
803 if (input->group != sbi->s_groups_count) {
804 ext4_warning(sb, "multiple resizers run on filesystem!");
805 err = -EBUSY;
806 goto exit_journal;
807 }
808
809 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 822 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
810 goto exit_journal; 823 goto exit_journal;
811 824
@@ -820,16 +833,25 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
820 if ((err = ext4_journal_get_write_access(handle, primary))) 833 if ((err = ext4_journal_get_write_access(handle, primary)))
821 goto exit_journal; 834 goto exit_journal;
822 835
823 if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && 836 if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
824 (err = reserve_backup_gdb(handle, inode, input))) 837 err = reserve_backup_gdb(handle, inode, input->group);
838 if (err)
839 goto exit_journal;
840 }
841 } else {
842 /*
843 * Note that we can access new group descriptor block safely
844 * only if add_new_gdb() succeeds.
845 */
846 err = add_new_gdb(handle, inode, input->group);
847 if (err)
825 goto exit_journal; 848 goto exit_journal;
826 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 849 primary = sbi->s_group_desc[gdb_num];
827 goto exit_journal; 850 }
828 851
829 /* 852 /*
830 * OK, now we've set up the new group. Time to make it active. 853 * OK, now we've set up the new group. Time to make it active.
831 * 854 *
832 * We do not lock all allocations via s_resize_lock
833 * so we have to be safe wrt. concurrent accesses the group 855 * so we have to be safe wrt. concurrent accesses the group
834 * data. So we need to be careful to set all of the relevant 856 * data. So we need to be careful to set all of the relevant
835 * group descriptor data etc. *before* we enable the group. 857 * group descriptor data etc. *before* we enable the group.
@@ -886,13 +908,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
886 * 908 *
887 * The precise rules we use are: 909 * The precise rules we use are:
888 * 910 *
889 * * Writers of s_groups_count *must* hold s_resize_lock
890 * AND
891 * * Writers must perform a smp_wmb() after updating all dependent 911 * * Writers must perform a smp_wmb() after updating all dependent
892 * data and before modifying the groups count 912 * data and before modifying the groups count
893 * 913 *
894 * * Readers must hold s_resize_lock over the access
895 * OR
896 * * Readers must perform an smp_rmb() after reading the groups count 914 * * Readers must perform an smp_rmb() after reading the groups count
897 * and before reading any dependent data. 915 * and before reading any dependent data.
898 * 916 *
@@ -937,10 +955,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
937 ext4_handle_dirty_super(handle, sb); 955 ext4_handle_dirty_super(handle, sb);
938 956
939exit_journal: 957exit_journal:
940 mutex_unlock(&sbi->s_resize_lock);
941 if ((err2 = ext4_journal_stop(handle)) && !err) 958 if ((err2 = ext4_journal_stop(handle)) && !err)
942 err = err2; 959 err = err2;
943 if (!err) { 960 if (!err && primary) {
944 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 961 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
945 sizeof(struct ext4_super_block)); 962 sizeof(struct ext4_super_block));
946 update_backups(sb, primary->b_blocknr, primary->b_data, 963 update_backups(sb, primary->b_blocknr, primary->b_data,
@@ -969,16 +986,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
969 ext4_grpblk_t add; 986 ext4_grpblk_t add;
970 struct buffer_head *bh; 987 struct buffer_head *bh;
971 handle_t *handle; 988 handle_t *handle;
972 int err; 989 int err, err2;
973 ext4_group_t group; 990 ext4_group_t group;
974 991
975 /* We don't need to worry about locking wrt other resizers just
976 * yet: we're going to revalidate es->s_blocks_count after
977 * taking the s_resize_lock below. */
978 o_blocks_count = ext4_blocks_count(es); 992 o_blocks_count = ext4_blocks_count(es);
979 993
980 if (test_opt(sb, DEBUG)) 994 if (test_opt(sb, DEBUG))
981 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", 995 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n",
982 o_blocks_count, n_blocks_count); 996 o_blocks_count, n_blocks_count);
983 997
984 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 998 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -995,7 +1009,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
995 1009
996 if (n_blocks_count < o_blocks_count) { 1010 if (n_blocks_count < o_blocks_count) {
997 ext4_warning(sb, "can't shrink FS - resize aborted"); 1011 ext4_warning(sb, "can't shrink FS - resize aborted");
998 return -EBUSY; 1012 return -EINVAL;
999 } 1013 }
1000 1014
1001 /* Handle the remaining blocks in the last group only. */ 1015 /* Handle the remaining blocks in the last group only. */
@@ -1038,32 +1052,25 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1038 goto exit_put; 1052 goto exit_put;
1039 } 1053 }
1040 1054
1041 mutex_lock(&EXT4_SB(sb)->s_resize_lock);
1042 if (o_blocks_count != ext4_blocks_count(es)) {
1043 ext4_warning(sb, "multiple resizers run on filesystem!");
1044 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1045 ext4_journal_stop(handle);
1046 err = -EBUSY;
1047 goto exit_put;
1048 }
1049
1050 if ((err = ext4_journal_get_write_access(handle, 1055 if ((err = ext4_journal_get_write_access(handle,
1051 EXT4_SB(sb)->s_sbh))) { 1056 EXT4_SB(sb)->s_sbh))) {
1052 ext4_warning(sb, "error %d on journal write access", err); 1057 ext4_warning(sb, "error %d on journal write access", err);
1053 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1054 ext4_journal_stop(handle); 1058 ext4_journal_stop(handle);
1055 goto exit_put; 1059 goto exit_put;
1056 } 1060 }
1057 ext4_blocks_count_set(es, o_blocks_count + add); 1061 ext4_blocks_count_set(es, o_blocks_count + add);
1058 mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
1059 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1062 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1060 o_blocks_count + add); 1063 o_blocks_count + add);
1061 /* We add the blocks to the bitmap and set the group need init bit */ 1064 /* We add the blocks to the bitmap and set the group need init bit */
1062 ext4_add_groupblocks(handle, sb, o_blocks_count, add); 1065 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
1063 ext4_handle_dirty_super(handle, sb); 1066 ext4_handle_dirty_super(handle, sb);
1064 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1067 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1065 o_blocks_count + add); 1068 o_blocks_count + add);
1066 if ((err = ext4_journal_stop(handle))) 1069 err2 = ext4_journal_stop(handle);
1070 if (!err && err2)
1071 err = err2;
1072
1073 if (err)
1067 goto exit_put; 1074 goto exit_put;
1068 1075
1069 if (test_opt(sb, DEBUG)) 1076 if (test_opt(sb, DEBUG))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9ea71aa864b..4687fea0c00 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -110,6 +110,35 @@ static struct file_system_type ext3_fs_type = {
110#define IS_EXT3_SB(sb) (0) 110#define IS_EXT3_SB(sb) (0)
111#endif 111#endif
112 112
113void *ext4_kvmalloc(size_t size, gfp_t flags)
114{
115 void *ret;
116
117 ret = kmalloc(size, flags);
118 if (!ret)
119 ret = __vmalloc(size, flags, PAGE_KERNEL);
120 return ret;
121}
122
123void *ext4_kvzalloc(size_t size, gfp_t flags)
124{
125 void *ret;
126
127 ret = kzalloc(size, flags);
128 if (!ret)
129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
130 return ret;
131}
132
133void ext4_kvfree(void *ptr)
134{
135 if (is_vmalloc_addr(ptr))
136 vfree(ptr);
137 else
138 kfree(ptr);
139
140}
141
113ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, 142ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
114 struct ext4_group_desc *bg) 143 struct ext4_group_desc *bg)
115{ 144{
@@ -269,6 +298,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
269 journal_t *journal; 298 journal_t *journal;
270 handle_t *handle; 299 handle_t *handle;
271 300
301 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
272 if (sb->s_flags & MS_RDONLY) 302 if (sb->s_flags & MS_RDONLY)
273 return ERR_PTR(-EROFS); 303 return ERR_PTR(-EROFS);
274 304
@@ -789,11 +819,8 @@ static void ext4_put_super(struct super_block *sb)
789 819
790 for (i = 0; i < sbi->s_gdb_count; i++) 820 for (i = 0; i < sbi->s_gdb_count; i++)
791 brelse(sbi->s_group_desc[i]); 821 brelse(sbi->s_group_desc[i]);
792 kfree(sbi->s_group_desc); 822 ext4_kvfree(sbi->s_group_desc);
793 if (is_vmalloc_addr(sbi->s_flex_groups)) 823 ext4_kvfree(sbi->s_flex_groups);
794 vfree(sbi->s_flex_groups);
795 else
796 kfree(sbi->s_flex_groups);
797 percpu_counter_destroy(&sbi->s_freeblocks_counter); 824 percpu_counter_destroy(&sbi->s_freeblocks_counter);
798 percpu_counter_destroy(&sbi->s_freeinodes_counter); 825 percpu_counter_destroy(&sbi->s_freeinodes_counter);
799 percpu_counter_destroy(&sbi->s_dirs_counter); 826 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1976,15 +2003,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
1976 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 2003 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1977 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; 2004 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1978 size = flex_group_count * sizeof(struct flex_groups); 2005 size = flex_group_count * sizeof(struct flex_groups);
1979 sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); 2006 sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
1980 if (sbi->s_flex_groups == NULL) { 2007 if (sbi->s_flex_groups == NULL) {
1981 sbi->s_flex_groups = vzalloc(size); 2008 ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
1982 if (sbi->s_flex_groups == NULL) { 2009 flex_group_count);
1983 ext4_msg(sb, KERN_ERR, 2010 goto failed;
1984 "not enough memory for %u flex groups",
1985 flex_group_count);
1986 goto failed;
1987 }
1988 } 2011 }
1989 2012
1990 for (i = 0; i < sbi->s_groups_count; i++) { 2013 for (i = 0; i < sbi->s_groups_count; i++) {
@@ -2383,17 +2406,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2383 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); 2406 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2384 unsigned long stripe_width = 2407 unsigned long stripe_width =
2385 le32_to_cpu(sbi->s_es->s_raid_stripe_width); 2408 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2409 int ret;
2386 2410
2387 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) 2411 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2388 return sbi->s_stripe; 2412 ret = sbi->s_stripe;
2389 2413 else if (stripe_width <= sbi->s_blocks_per_group)
2390 if (stripe_width <= sbi->s_blocks_per_group) 2414 ret = stripe_width;
2391 return stripe_width; 2415 else if (stride <= sbi->s_blocks_per_group)
2416 ret = stride;
2417 else
2418 ret = 0;
2392 2419
2393 if (stride <= sbi->s_blocks_per_group) 2420 /*
2394 return stride; 2421 * If the stripe width is 1, this makes no sense and
2422 * we set it to 0 to turn off stripe handling code.
2423 */
2424 if (ret <= 1)
2425 ret = 0;
2395 2426
2396 return 0; 2427 return ret;
2397} 2428}
2398 2429
2399/* sysfs supprt */ 2430/* sysfs supprt */
@@ -3408,8 +3439,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3408 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3439 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3409 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3440 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3410 EXT4_DESC_PER_BLOCK(sb); 3441 EXT4_DESC_PER_BLOCK(sb);
3411 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), 3442 sbi->s_group_desc = ext4_kvmalloc(db_count *
3412 GFP_KERNEL); 3443 sizeof(struct buffer_head *),
3444 GFP_KERNEL);
3413 if (sbi->s_group_desc == NULL) { 3445 if (sbi->s_group_desc == NULL) {
3414 ext4_msg(sb, KERN_ERR, "not enough memory"); 3446 ext4_msg(sb, KERN_ERR, "not enough memory");
3415 goto failed_mount; 3447 goto failed_mount;
@@ -3491,7 +3523,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3491 3523
3492 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3524 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3493 mutex_init(&sbi->s_orphan_lock); 3525 mutex_init(&sbi->s_orphan_lock);
3494 mutex_init(&sbi->s_resize_lock); 3526 sbi->s_resize_flags = 0;
3495 3527
3496 sb->s_root = NULL; 3528 sb->s_root = NULL;
3497 3529
@@ -3741,12 +3773,8 @@ failed_mount_wq:
3741 } 3773 }
3742failed_mount3: 3774failed_mount3:
3743 del_timer(&sbi->s_err_report); 3775 del_timer(&sbi->s_err_report);
3744 if (sbi->s_flex_groups) { 3776 if (sbi->s_flex_groups)
3745 if (is_vmalloc_addr(sbi->s_flex_groups)) 3777 ext4_kvfree(sbi->s_flex_groups);
3746 vfree(sbi->s_flex_groups);
3747 else
3748 kfree(sbi->s_flex_groups);
3749 }
3750 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3778 percpu_counter_destroy(&sbi->s_freeblocks_counter);
3751 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3779 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3752 percpu_counter_destroy(&sbi->s_dirs_counter); 3780 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -3756,7 +3784,7 @@ failed_mount3:
3756failed_mount2: 3784failed_mount2:
3757 for (i = 0; i < db_count; i++) 3785 for (i = 0; i < db_count; i++)
3758 brelse(sbi->s_group_desc[i]); 3786 brelse(sbi->s_group_desc[i]);
3759 kfree(sbi->s_group_desc); 3787 ext4_kvfree(sbi->s_group_desc);
3760failed_mount: 3788failed_mount:
3761 if (sbi->s_proc) { 3789 if (sbi->s_proc) {
3762 remove_proc_entry(sb->s_id, ext4_proc_root); 3790 remove_proc_entry(sb->s_id, ext4_proc_root);
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
new file mode 100644
index 00000000000..011ba6670d9
--- /dev/null
+++ b/fs/ext4/truncate.h
@@ -0,0 +1,43 @@
1/*
2 * linux/fs/ext4/truncate.h
3 *
4 * Common inline functions needed for truncate support
5 */
6
7/*
8 * Truncate blocks that were not used by write. We have to truncate the
9 * pagecache as well so that corresponding buffers get properly unmapped.
10 */
11static inline void ext4_truncate_failed_write(struct inode *inode)
12{
13 truncate_inode_pages(inode->i_mapping, inode->i_size);
14 ext4_truncate(inode);
15}
16
17/*
18 * Work out how many blocks we need to proceed with the next chunk of a
19 * truncate transaction.
20 */
21static inline unsigned long ext4_blocks_for_truncate(struct inode *inode)
22{
23 ext4_lblk_t needed;
24
25 needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
26
27 /* Give ourselves just enough room to cope with inodes in which
28 * i_blocks is corrupt: we've seen disk corruptions in the past
29 * which resulted in random data in an inode which looked enough
30 * like a regular file for ext4 to try to delete it. Things
31 * will go a bit crazy if that happens, but at least we should
32 * try not to panic the whole kernel. */
33 if (needed < 2)
34 needed = 2;
35
36 /* But we need to bound the transaction so we don't overflow the
37 * journal. */
38 if (needed > EXT4_MAX_TRANS_DATA)
39 needed = EXT4_MAX_TRANS_DATA;
40
41 return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
42}
43
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index d5e33a077a6..d0dddaceac5 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -82,18 +82,14 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
82 return PTR_ERR(acl); 82 return PTR_ERR(acl);
83 } 83 }
84 if (acl) { 84 if (acl) {
85 mode_t mode;
86
87 error = posix_acl_valid(acl); 85 error = posix_acl_valid(acl);
88 if (error) 86 if (error)
89 goto failed; 87 goto failed;
90 switch (type) { 88 switch (type) {
91 case ACL_TYPE_ACCESS: 89 case ACL_TYPE_ACCESS:
92 mode = inode->i_mode; 90 error = posix_acl_equiv_mode(acl, &inode->i_mode);
93 error = posix_acl_equiv_mode(acl, &mode);
94 if (error < 0) 91 if (error < 0)
95 goto failed; 92 goto failed;
96 inode->i_mode = mode;
97 inode->i_ctime = CURRENT_TIME; 93 inode->i_ctime = CURRENT_TIME;
98 if (error == 0) { 94 if (error == 0) {
99 posix_acl_release(acl); 95 posix_acl_release(acl);
@@ -125,21 +121,20 @@ int
125generic_acl_init(struct inode *inode, struct inode *dir) 121generic_acl_init(struct inode *inode, struct inode *dir)
126{ 122{
127 struct posix_acl *acl = NULL; 123 struct posix_acl *acl = NULL;
128 mode_t mode = inode->i_mode;
129 int error; 124 int error;
130 125
131 inode->i_mode = mode & ~current_umask();
132 if (!S_ISLNK(inode->i_mode)) 126 if (!S_ISLNK(inode->i_mode))
133 acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); 127 acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
134 if (acl) { 128 if (acl) {
135 if (S_ISDIR(inode->i_mode)) 129 if (S_ISDIR(inode->i_mode))
136 set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); 130 set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
137 error = posix_acl_create(&acl, GFP_KERNEL, &mode); 131 error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
138 if (error < 0) 132 if (error < 0)
139 return error; 133 return error;
140 inode->i_mode = mode;
141 if (error > 0) 134 if (error > 0)
142 set_cached_acl(inode, ACL_TYPE_ACCESS, acl); 135 set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
136 } else {
137 inode->i_mode &= ~current_umask();
143 } 138 }
144 error = 0; 139 error = 0;
145 140
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 884c9af0542..34501b64bc4 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -72,7 +72,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
72 return gfs2_acl_get(GFS2_I(inode), type); 72 return gfs2_acl_get(GFS2_I(inode), type);
73} 73}
74 74
75static int gfs2_set_mode(struct inode *inode, mode_t mode) 75static int gfs2_set_mode(struct inode *inode, umode_t mode)
76{ 76{
77 int error = 0; 77 int error = 0;
78 78
@@ -117,7 +117,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
117{ 117{
118 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 118 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
119 struct posix_acl *acl; 119 struct posix_acl *acl;
120 mode_t mode = inode->i_mode; 120 umode_t mode = inode->i_mode;
121 int error = 0; 121 int error = 0;
122 122
123 if (!sdp->sd_args.ar_posix_acl) 123 if (!sdp->sd_args.ar_posix_acl)
@@ -276,7 +276,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
276 goto out_release; 276 goto out_release;
277 277
278 if (type == ACL_TYPE_ACCESS) { 278 if (type == ACL_TYPE_ACCESS) {
279 mode_t mode = inode->i_mode; 279 umode_t mode = inode->i_mode;
280 error = posix_acl_equiv_mode(acl, &mode); 280 error = posix_acl_equiv_mode(acl, &mode);
281 281
282 if (error <= 0) { 282 if (error <= 0) {
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 8635be5ffd9..970ea987b3f 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -16,6 +16,7 @@
16#include <linux/statfs.h> 16#include <linux/statfs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/pid_namespace.h> 18#include <linux/pid_namespace.h>
19#include <linux/namei.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include "os.h" 21#include "os.h"
21 22
diff --git a/fs/inode.c b/fs/inode.c
index d0c72ff6b30..73920d555c8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
143 inode->i_op = &empty_iops; 143 inode->i_op = &empty_iops;
144 inode->i_fop = &empty_fops; 144 inode->i_fop = &empty_fops;
145 inode->i_nlink = 1; 145 inode->i_nlink = 1;
146 inode->i_opflags = 0;
146 inode->i_uid = 0; 147 inode->i_uid = 0;
147 inode->i_gid = 0; 148 inode->i_gid = 0;
148 atomic_set(&inode->i_writecount, 0); 149 atomic_set(&inode->i_writecount, 0);
@@ -399,12 +400,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
399EXPORT_SYMBOL(__insert_inode_hash); 400EXPORT_SYMBOL(__insert_inode_hash);
400 401
401/** 402/**
402 * remove_inode_hash - remove an inode from the hash 403 * __remove_inode_hash - remove an inode from the hash
403 * @inode: inode to unhash 404 * @inode: inode to unhash
404 * 405 *
405 * Remove an inode from the superblock. 406 * Remove an inode from the superblock.
406 */ 407 */
407void remove_inode_hash(struct inode *inode) 408void __remove_inode_hash(struct inode *inode)
408{ 409{
409 spin_lock(&inode_hash_lock); 410 spin_lock(&inode_hash_lock);
410 spin_lock(&inode->i_lock); 411 spin_lock(&inode->i_lock);
@@ -412,7 +413,7 @@ void remove_inode_hash(struct inode *inode)
412 spin_unlock(&inode->i_lock); 413 spin_unlock(&inode->i_lock);
413 spin_unlock(&inode_hash_lock); 414 spin_unlock(&inode_hash_lock);
414} 415}
415EXPORT_SYMBOL(remove_inode_hash); 416EXPORT_SYMBOL(__remove_inode_hash);
416 417
417void end_writeback(struct inode *inode) 418void end_writeback(struct inode *inode)
418{ 419{
@@ -454,7 +455,9 @@ static void evict(struct inode *inode)
454 BUG_ON(!(inode->i_state & I_FREEING)); 455 BUG_ON(!(inode->i_state & I_FREEING));
455 BUG_ON(!list_empty(&inode->i_lru)); 456 BUG_ON(!list_empty(&inode->i_lru));
456 457
457 inode_wb_list_del(inode); 458 if (!list_empty(&inode->i_wb_list))
459 inode_wb_list_del(inode);
460
458 inode_sb_list_del(inode); 461 inode_sb_list_del(inode);
459 462
460 if (op->evict_inode) { 463 if (op->evict_inode) {
@@ -1328,7 +1331,8 @@ static void iput_final(struct inode *inode)
1328 } 1331 }
1329 1332
1330 inode->i_state |= I_FREEING; 1333 inode->i_state |= I_FREEING;
1331 inode_lru_list_del(inode); 1334 if (!list_empty(&inode->i_lru))
1335 inode_lru_list_del(inode);
1332 spin_unlock(&inode->i_lock); 1336 spin_unlock(&inode->i_lock);
1333 1337
1334 evict(inode); 1338 evict(inode);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 2c62c5aae82..16a698bd906 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -257,9 +257,12 @@ static void
257__flush_batch(journal_t *journal, int *batch_count) 257__flush_batch(journal_t *journal, int *batch_count)
258{ 258{
259 int i; 259 int i;
260 struct blk_plug plug;
260 261
262 blk_start_plug(&plug);
261 for (i = 0; i < *batch_count; i++) 263 for (i = 0; i < *batch_count; i++)
262 write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); 264 write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC);
265 blk_finish_plug(&plug);
263 266
264 for (i = 0; i < *batch_count; i++) { 267 for (i = 0; i < *batch_count; i++) {
265 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 268 struct buffer_head *bh = journal->j_chkpt_bhs[i];
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0dfa5b598e6..f24df13adc4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2390,73 +2390,6 @@ static void __exit journal_exit(void)
2390 jbd2_journal_destroy_caches(); 2390 jbd2_journal_destroy_caches();
2391} 2391}
2392 2392
2393/*
2394 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
2395 * tracing infrastructure to map a dev_t to a device name.
2396 *
2397 * The caller should use rcu_read_lock() in order to make sure the
2398 * device name stays valid until its done with it. We use
2399 * rcu_read_lock() as well to make sure we're safe in case the caller
2400 * gets sloppy, and because rcu_read_lock() is cheap and can be safely
2401 * nested.
2402 */
2403struct devname_cache {
2404 struct rcu_head rcu;
2405 dev_t device;
2406 char devname[BDEVNAME_SIZE];
2407};
2408#define CACHE_SIZE_BITS 6
2409static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
2410static DEFINE_SPINLOCK(devname_cache_lock);
2411
2412static void free_devcache(struct rcu_head *rcu)
2413{
2414 kfree(rcu);
2415}
2416
2417const char *jbd2_dev_to_name(dev_t device)
2418{
2419 int i = hash_32(device, CACHE_SIZE_BITS);
2420 char *ret;
2421 struct block_device *bd;
2422 static struct devname_cache *new_dev;
2423
2424 rcu_read_lock();
2425 if (devcache[i] && devcache[i]->device == device) {
2426 ret = devcache[i]->devname;
2427 rcu_read_unlock();
2428 return ret;
2429 }
2430 rcu_read_unlock();
2431
2432 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2433 if (!new_dev)
2434 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435 bd = bdget(device);
2436 spin_lock(&devname_cache_lock);
2437 if (devcache[i]) {
2438 if (devcache[i]->device == device) {
2439 kfree(new_dev);
2440 bdput(bd);
2441 ret = devcache[i]->devname;
2442 spin_unlock(&devname_cache_lock);
2443 return ret;
2444 }
2445 call_rcu(&devcache[i]->rcu, free_devcache);
2446 }
2447 devcache[i] = new_dev;
2448 devcache[i]->device = device;
2449 if (bd) {
2450 bdevname(bd, devcache[i]->devname);
2451 bdput(bd);
2452 } else
2453 __bdevname(device, devcache[i]->devname);
2454 ret = devcache[i]->devname;
2455 spin_unlock(&devname_cache_lock);
2456 return ret;
2457}
2458EXPORT_SYMBOL(jbd2_dev_to_name);
2459
2460MODULE_LICENSE("GPL"); 2393MODULE_LICENSE("GPL");
2461module_init(journal_init); 2394module_init(journal_init);
2462module_exit(journal_exit); 2395module_exit(journal_exit);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 27c511a1cf0..926d02068a1 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
227 case ACL_TYPE_ACCESS: 227 case ACL_TYPE_ACCESS:
228 xprefix = JFFS2_XPREFIX_ACL_ACCESS; 228 xprefix = JFFS2_XPREFIX_ACL_ACCESS;
229 if (acl) { 229 if (acl) {
230 mode_t mode = inode->i_mode; 230 umode_t mode = inode->i_mode;
231 rc = posix_acl_equiv_mode(acl, &mode); 231 rc = posix_acl_equiv_mode(acl, &mode);
232 if (rc < 0) 232 if (rc < 0)
233 return rc; 233 return rc;
@@ -259,7 +259,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
259 return rc; 259 return rc;
260} 260}
261 261
262int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode) 262int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode)
263{ 263{
264 struct posix_acl *acl; 264 struct posix_acl *acl;
265 int rc; 265 int rc;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index b3421c78d9f..9b477246f2a 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
28 28
29struct posix_acl *jffs2_get_acl(struct inode *inode, int type); 29struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
33 33
34extern const struct xattr_handler jffs2_acl_access_xattr_handler; 34extern const struct xattr_handler jffs2_acl_access_xattr_handler;
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index b81b35ddf4e..bbcb9755dd2 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
406 406
407/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, 407/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
408 fill in the raw_inode while you're at it. */ 408 fill in the raw_inode while you're at it. */
409struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri) 409struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri)
410{ 410{
411 struct inode *inode; 411 struct inode *inode;
412 struct super_block *sb = dir_i->i_sb; 412 struct super_block *sb = dir_i->i_sb;
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 526979c607b..6c1755c59c0 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *);
173struct inode *jffs2_iget(struct super_block *, unsigned long); 173struct inode *jffs2_iget(struct super_block *, unsigned long);
174void jffs2_evict_inode (struct inode *); 174void jffs2_evict_inode (struct inode *);
175void jffs2_dirty_inode(struct inode *inode, int flags); 175void jffs2_dirty_inode(struct inode *inode, int flags);
176struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, 176struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode,
177 struct jffs2_raw_inode *ri); 177 struct jffs2_raw_inode *ri);
178int jffs2_statfs (struct dentry *, struct kstatfs *); 178int jffs2_statfs (struct dentry *, struct kstatfs *);
179int jffs2_remount_fs (struct super_block *, int *, char *); 179int jffs2_remount_fs (struct super_block *, int *, char *);
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index b3a32caf2b4..45559dc3ea2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -127,16 +127,14 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
127 return PTR_ERR(acl); 127 return PTR_ERR(acl);
128 128
129 if (acl) { 129 if (acl) {
130 mode_t mode = inode->i_mode;
131 if (S_ISDIR(inode->i_mode)) { 130 if (S_ISDIR(inode->i_mode)) {
132 rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); 131 rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
133 if (rc) 132 if (rc)
134 goto cleanup; 133 goto cleanup;
135 } 134 }
136 rc = posix_acl_create(&acl, GFP_KERNEL, &mode); 135 rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
137 if (rc < 0) 136 if (rc < 0)
138 goto cleanup; /* posix_acl_release(NULL) is no-op */ 137 goto cleanup; /* posix_acl_release(NULL) is no-op */
139 inode->i_mode = mode;
140 if (rc > 0) 138 if (rc > 0)
141 rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); 139 rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl);
142cleanup: 140cleanup:
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 24838f1eeee..e87fedef23d 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -693,8 +693,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
693 return rc; 693 return rc;
694 } 694 }
695 if (acl) { 695 if (acl) {
696 mode_t mode = inode->i_mode; 696 rc = posix_acl_equiv_mode(acl, &inode->i_mode);
697 rc = posix_acl_equiv_mode(acl, &mode);
698 posix_acl_release(acl); 697 posix_acl_release(acl);
699 if (rc < 0) { 698 if (rc < 0) {
700 printk(KERN_ERR 699 printk(KERN_ERR
@@ -702,7 +701,6 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
702 rc); 701 rc);
703 return rc; 702 return rc;
704 } 703 }
705 inode->i_mode = mode;
706 mark_inode_dirty(inode); 704 mark_inode_dirty(inode);
707 } 705 }
708 /* 706 /*
diff --git a/fs/namei.c b/fs/namei.c
index f8c69d37379..2826db35dc2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask)
179#ifdef CONFIG_FS_POSIX_ACL 179#ifdef CONFIG_FS_POSIX_ACL
180 struct posix_acl *acl; 180 struct posix_acl *acl;
181 181
182 /*
183 * Under RCU walk, we cannot even do a "get_cached_acl()",
184 * because that involves locking and getting a refcount on
185 * a cached ACL.
186 *
187 * So the only case we handle during RCU walking is the
188 * case of a cached "no ACL at all", which needs no locks
189 * or refcounts.
190 */
191 if (mask & MAY_NOT_BLOCK) { 182 if (mask & MAY_NOT_BLOCK) {
192 if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) 183 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
184 if (!acl)
193 return -EAGAIN; 185 return -EAGAIN;
194 return -ECHILD; 186 /* no ->get_acl() calls in RCU mode... */
187 if (acl == ACL_NOT_CACHED)
188 return -ECHILD;
189 return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
195 } 190 }
196 191
197 acl = get_cached_acl(inode, ACL_TYPE_ACCESS); 192 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
@@ -313,6 +308,26 @@ int generic_permission(struct inode *inode, int mask)
313 return -EACCES; 308 return -EACCES;
314} 309}
315 310
311/*
312 * We _really_ want to just do "generic_permission()" without
313 * even looking at the inode->i_op values. So we keep a cache
314 * flag in inode->i_opflags, that says "this has not special
315 * permission function, use the fast case".
316 */
317static inline int do_inode_permission(struct inode *inode, int mask)
318{
319 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
320 if (likely(inode->i_op->permission))
321 return inode->i_op->permission(inode, mask);
322
323 /* This gets set once for the inode lifetime */
324 spin_lock(&inode->i_lock);
325 inode->i_opflags |= IOP_FASTPERM;
326 spin_unlock(&inode->i_lock);
327 }
328 return generic_permission(inode, mask);
329}
330
316/** 331/**
317 * inode_permission - check for access rights to a given inode 332 * inode_permission - check for access rights to a given inode
318 * @inode: inode to check permission on 333 * @inode: inode to check permission on
@@ -327,7 +342,7 @@ int inode_permission(struct inode *inode, int mask)
327{ 342{
328 int retval; 343 int retval;
329 344
330 if (mask & MAY_WRITE) { 345 if (unlikely(mask & MAY_WRITE)) {
331 umode_t mode = inode->i_mode; 346 umode_t mode = inode->i_mode;
332 347
333 /* 348 /*
@@ -344,11 +359,7 @@ int inode_permission(struct inode *inode, int mask)
344 return -EACCES; 359 return -EACCES;
345 } 360 }
346 361
347 if (inode->i_op->permission) 362 retval = do_inode_permission(inode, mask);
348 retval = inode->i_op->permission(inode, mask);
349 else
350 retval = generic_permission(inode, mask);
351
352 if (retval) 363 if (retval)
353 return retval; 364 return retval;
354 365
@@ -716,19 +727,25 @@ static int follow_automount(struct path *path, unsigned flags,
716 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) 727 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
717 return -EISDIR; /* we actually want to stop here */ 728 return -EISDIR; /* we actually want to stop here */
718 729
719 /* We want to mount if someone is trying to open/create a file of any 730 /*
720 * type under the mountpoint, wants to traverse through the mountpoint
721 * or wants to open the mounted directory.
722 *
723 * We don't want to mount if someone's just doing a stat and they've 731 * We don't want to mount if someone's just doing a stat and they've
724 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and 732 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
725 * appended a '/' to the name. 733 * appended a '/' to the name.
726 */ 734 */
727 if (!(flags & LOOKUP_FOLLOW) && 735 if (!(flags & LOOKUP_FOLLOW)) {
728 !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | 736 /* We do, however, want to mount if someone wants to open or
729 LOOKUP_OPEN | LOOKUP_CREATE))) 737 * create a file of any type under the mountpoint, wants to
730 return -EISDIR; 738 * traverse through the mountpoint or wants to open the mounted
731 739 * directory.
740 * Also, autofs may mark negative dentries as being automount
741 * points. These will need the attentions of the daemon to
742 * instantiate them before they can be used.
743 */
744 if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
745 LOOKUP_OPEN | LOOKUP_CREATE)) &&
746 path->dentry->d_inode)
747 return -EISDIR;
748 }
732 current->total_link_count++; 749 current->total_link_count++;
733 if (current->total_link_count >= 40) 750 if (current->total_link_count >= 40)
734 return -ELOOP; 751 return -ELOOP;
@@ -1244,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd)
1244 } 1261 }
1245} 1262}
1246 1263
1264/*
1265 * Do we need to follow links? We _really_ want to be able
1266 * to do this check without having to look at inode->i_op,
1267 * so we keep a cache of "no, this doesn't need follow_link"
1268 * for the common case.
1269 */
1270static inline int should_follow_link(struct inode *inode, int follow)
1271{
1272 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1273 if (likely(inode->i_op->follow_link))
1274 return follow;
1275
1276 /* This gets set once for the inode lifetime */
1277 spin_lock(&inode->i_lock);
1278 inode->i_opflags |= IOP_NOFOLLOW;
1279 spin_unlock(&inode->i_lock);
1280 }
1281 return 0;
1282}
1283
1247static inline int walk_component(struct nameidata *nd, struct path *path, 1284static inline int walk_component(struct nameidata *nd, struct path *path,
1248 struct qstr *name, int type, int follow) 1285 struct qstr *name, int type, int follow)
1249{ 1286{
@@ -1266,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1266 terminate_walk(nd); 1303 terminate_walk(nd);
1267 return -ENOENT; 1304 return -ENOENT;
1268 } 1305 }
1269 if (unlikely(inode->i_op->follow_link) && follow) { 1306 if (should_follow_link(inode, follow)) {
1270 if (nd->flags & LOOKUP_RCU) { 1307 if (nd->flags & LOOKUP_RCU) {
1271 if (unlikely(unlazy_walk(nd, path->dentry))) { 1308 if (unlikely(unlazy_walk(nd, path->dentry))) {
1272 terminate_walk(nd); 1309 terminate_walk(nd);
@@ -1319,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1319} 1356}
1320 1357
1321/* 1358/*
1359 * We really don't want to look at inode->i_op->lookup
1360 * when we don't have to. So we keep a cache bit in
1361 * the inode ->i_opflags field that says "yes, we can
1362 * do lookup on this inode".
1363 */
1364static inline int can_lookup(struct inode *inode)
1365{
1366 if (likely(inode->i_opflags & IOP_LOOKUP))
1367 return 1;
1368 if (likely(!inode->i_op->lookup))
1369 return 0;
1370
1371 /* We do this once for the lifetime of the inode */
1372 spin_lock(&inode->i_lock);
1373 inode->i_opflags |= IOP_LOOKUP;
1374 spin_unlock(&inode->i_lock);
1375 return 1;
1376}
1377
1378/*
1322 * Name resolution. 1379 * Name resolution.
1323 * This is the basic name resolution function, turning a pathname into 1380 * This is the basic name resolution function, turning a pathname into
1324 * the final dentry. We expect 'base' to be positive and a directory. 1381 * the final dentry. We expect 'base' to be positive and a directory.
@@ -1397,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1397 if (err) 1454 if (err)
1398 return err; 1455 return err;
1399 } 1456 }
1457 if (can_lookup(nd->inode))
1458 continue;
1400 err = -ENOTDIR; 1459 err = -ENOTDIR;
1401 if (!nd->inode->i_op->lookup) 1460 break;
1402 break;
1403 continue;
1404 /* here ends the main loop */ 1461 /* here ends the main loop */
1405 1462
1406last_component: 1463last_component:
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index e49e73107e6..7ef23979896 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -415,7 +415,7 @@ fail:
415} 415}
416 416
417int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, 417int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
418 mode_t mode) 418 umode_t mode)
419{ 419{
420 struct posix_acl *dfacl, *acl; 420 struct posix_acl *dfacl, *acl;
421 int error = 0; 421 int error = 0;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 38053d823eb..85f1690ca08 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
316 int flags, struct nfs_open_context *ctx) 316 int flags, struct nfs_open_context *ctx)
317{ 317{
318 struct nfs3_createdata *data; 318 struct nfs3_createdata *data;
319 mode_t mode = sattr->ia_mode; 319 umode_t mode = sattr->ia_mode;
320 int status = -ENOMEM; 320 int status = -ENOMEM;
321 321
322 dprintk("NFS call create %s\n", dentry->d_name.name); 322 dprintk("NFS call create %s\n", dentry->d_name.name);
@@ -562,7 +562,7 @@ static int
562nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) 562nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
563{ 563{
564 struct nfs3_createdata *data; 564 struct nfs3_createdata *data;
565 int mode = sattr->ia_mode; 565 umode_t mode = sattr->ia_mode;
566 int status = -ENOMEM; 566 int status = -ENOMEM;
567 567
568 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 568 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
@@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
681 dev_t rdev) 681 dev_t rdev)
682{ 682{
683 struct nfs3_createdata *data; 683 struct nfs3_createdata *data;
684 mode_t mode = sattr->ia_mode; 684 umode_t mode = sattr->ia_mode;
685 int status = -ENOMEM; 685 int status = -ENOMEM;
686 686
687 dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, 687 dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 783c58d9daf..a7219075b4d 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle,
247 case ACL_TYPE_ACCESS: 247 case ACL_TYPE_ACCESS:
248 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; 248 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
249 if (acl) { 249 if (acl) {
250 mode_t mode = inode->i_mode; 250 umode_t mode = inode->i_mode;
251 ret = posix_acl_equiv_mode(acl, &mode); 251 ret = posix_acl_equiv_mode(acl, &mode);
252 if (ret < 0) 252 if (ret < 0)
253 return ret; 253 return ret;
@@ -351,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle,
351 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 351 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
352 struct posix_acl *acl = NULL; 352 struct posix_acl *acl = NULL;
353 int ret = 0, ret2; 353 int ret = 0, ret2;
354 mode_t mode; 354 umode_t mode;
355 355
356 if (!S_ISLNK(inode->i_mode)) { 356 if (!S_ISLNK(inode->i_mode)) {
357 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { 357 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index d43729a760e..10027b42b7e 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -149,10 +149,10 @@ posix_acl_valid(const struct posix_acl *acl)
149 * file mode permission bits, or else 1. Returns -E... on error. 149 * file mode permission bits, or else 1. Returns -E... on error.
150 */ 150 */
151int 151int
152posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p) 152posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
153{ 153{
154 const struct posix_acl_entry *pa, *pe; 154 const struct posix_acl_entry *pa, *pe;
155 mode_t mode = 0; 155 umode_t mode = 0;
156 int not_equiv = 0; 156 int not_equiv = 0;
157 157
158 FOREACH_ACL_ENTRY(pa, acl, pe) { 158 FOREACH_ACL_ENTRY(pa, acl, pe) {
@@ -188,7 +188,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p)
188 * Create an ACL representing the file mode permission bits of an inode. 188 * Create an ACL representing the file mode permission bits of an inode.
189 */ 189 */
190struct posix_acl * 190struct posix_acl *
191posix_acl_from_mode(mode_t mode, gfp_t flags) 191posix_acl_from_mode(umode_t mode, gfp_t flags)
192{ 192{
193 struct posix_acl *acl = posix_acl_alloc(3, flags); 193 struct posix_acl *acl = posix_acl_alloc(3, flags);
194 if (!acl) 194 if (!acl)
@@ -279,11 +279,11 @@ check_perm:
279 * system calls. All permissions that are not granted by the acl are removed. 279 * system calls. All permissions that are not granted by the acl are removed.
280 * The permissions in the acl are changed to reflect the mode_p parameter. 280 * The permissions in the acl are changed to reflect the mode_p parameter.
281 */ 281 */
282static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) 282static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
283{ 283{
284 struct posix_acl_entry *pa, *pe; 284 struct posix_acl_entry *pa, *pe;
285 struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; 285 struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
286 mode_t mode = *mode_p; 286 umode_t mode = *mode_p;
287 int not_equiv = 0; 287 int not_equiv = 0;
288 288
289 /* assert(atomic_read(acl->a_refcount) == 1); */ 289 /* assert(atomic_read(acl->a_refcount) == 1); */
@@ -336,7 +336,7 @@ static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p)
336/* 336/*
337 * Modify the ACL for the chmod syscall. 337 * Modify the ACL for the chmod syscall.
338 */ 338 */
339static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) 339static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode)
340{ 340{
341 struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; 341 struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
342 struct posix_acl_entry *pa, *pe; 342 struct posix_acl_entry *pa, *pe;
@@ -382,7 +382,7 @@ static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode)
382} 382}
383 383
384int 384int
385posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) 385posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p)
386{ 386{
387 struct posix_acl *clone = posix_acl_clone(*acl, gfp); 387 struct posix_acl *clone = posix_acl_clone(*acl, gfp);
388 int err = -ENOMEM; 388 int err = -ENOMEM;
@@ -400,7 +400,7 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p)
400EXPORT_SYMBOL(posix_acl_create); 400EXPORT_SYMBOL(posix_acl_create);
401 401
402int 402int
403posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode) 403posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
404{ 404{
405 struct posix_acl *clone = posix_acl_clone(*acl, gfp); 405 struct posix_acl *clone = posix_acl_clone(*acl, gfp);
406 int err = -ENOMEM; 406 int err = -ENOMEM;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 08e3eccf9a1..5eb02069e1b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1118,7 +1118,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1118 * Warn that /proc/pid/oom_adj is deprecated, see 1118 * Warn that /proc/pid/oom_adj is deprecated, see
1119 * Documentation/feature-removal-schedule.txt. 1119 * Documentation/feature-removal-schedule.txt.
1120 */ 1120 */
1121 WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1121 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1122 current->comm, task_pid_nr(current), task_pid_nr(task), 1122 current->comm, task_pid_nr(current), task_pid_nr(task),
1123 task_pid_nr(task)); 1123 task_pid_nr(task));
1124 task->signal->oom_adj = oom_adjust; 1124 task->signal->oom_adj = oom_adjust;
@@ -1919,6 +1919,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1919 spin_lock(&files->file_lock); 1919 spin_lock(&files->file_lock);
1920 file = fcheck_files(files, fd); 1920 file = fcheck_files(files, fd);
1921 if (file) { 1921 if (file) {
1922 unsigned int f_flags;
1923 struct fdtable *fdt;
1924
1925 fdt = files_fdtable(files);
1926 f_flags = file->f_flags & ~O_CLOEXEC;
1927 if (FD_ISSET(fd, fdt->close_on_exec))
1928 f_flags |= O_CLOEXEC;
1929
1922 if (path) { 1930 if (path) {
1923 *path = file->f_path; 1931 *path = file->f_path;
1924 path_get(&file->f_path); 1932 path_get(&file->f_path);
@@ -1928,7 +1936,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1928 "pos:\t%lli\n" 1936 "pos:\t%lli\n"
1929 "flags:\t0%o\n", 1937 "flags:\t0%o\n",
1930 (long long) file->f_pos, 1938 (long long) file->f_pos,
1931 file->f_flags); 1939 f_flags);
1932 spin_unlock(&files->file_lock); 1940 spin_unlock(&files->file_lock);
1933 put_files_struct(files); 1941 put_files_struct(files);
1934 return 0; 1942 return 0;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 977ed272384..893b961dcfd 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -39,8 +39,9 @@
39#define PSTORE_NAMELEN 64 39#define PSTORE_NAMELEN 64
40 40
41struct pstore_private { 41struct pstore_private {
42 struct pstore_info *psi;
43 enum pstore_type_id type;
42 u64 id; 44 u64 id;
43 int (*erase)(u64);
44 ssize_t size; 45 ssize_t size;
45 char data[]; 46 char data[];
46}; 47};
@@ -73,7 +74,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
73{ 74{
74 struct pstore_private *p = dentry->d_inode->i_private; 75 struct pstore_private *p = dentry->d_inode->i_private;
75 76
76 p->erase(p->id); 77 p->psi->erase(p->type, p->id, p->psi);
77 78
78 return simple_unlink(dir, dentry); 79 return simple_unlink(dir, dentry);
79} 80}
@@ -175,8 +176,8 @@ int pstore_is_mounted(void)
175 * Set the mtime & ctime to the date that this record was originally stored. 176 * Set the mtime & ctime to the date that this record was originally stored.
176 */ 177 */
177int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, 178int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
178 char *data, size_t size, 179 char *data, size_t size, struct timespec time,
179 struct timespec time, int (*erase)(u64)) 180 struct pstore_info *psi)
180{ 181{
181 struct dentry *root = pstore_sb->s_root; 182 struct dentry *root = pstore_sb->s_root;
182 struct dentry *dentry; 183 struct dentry *dentry;
@@ -192,8 +193,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
192 private = kmalloc(sizeof *private + size, GFP_KERNEL); 193 private = kmalloc(sizeof *private + size, GFP_KERNEL);
193 if (!private) 194 if (!private)
194 goto fail_alloc; 195 goto fail_alloc;
196 private->type = type;
195 private->id = id; 197 private->id = id;
196 private->erase = erase; 198 private->psi = psi;
197 199
198 switch (type) { 200 switch (type) {
199 case PSTORE_TYPE_DMESG: 201 case PSTORE_TYPE_DMESG:
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 8c9f23eb164..611c1b3c46f 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -2,5 +2,5 @@ extern void pstore_set_kmsg_bytes(int);
2extern void pstore_get_records(void); 2extern void pstore_get_records(void);
3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, 3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
4 char *data, size_t size, 4 char *data, size_t size,
5 struct timespec time, int (*erase)(u64)); 5 struct timespec time, struct pstore_info *psi);
6extern int pstore_is_mounted(void); 6extern int pstore_is_mounted(void);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f2c3ff20ea6..c5300ec3169 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -37,6 +37,8 @@
37static DEFINE_SPINLOCK(pstore_lock); 37static DEFINE_SPINLOCK(pstore_lock);
38static struct pstore_info *psinfo; 38static struct pstore_info *psinfo;
39 39
40static char *backend;
41
40/* How much of the console log to snapshot */ 42/* How much of the console log to snapshot */
41static unsigned long kmsg_bytes = 10240; 43static unsigned long kmsg_bytes = 10240;
42 44
@@ -67,7 +69,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
67 unsigned long size, total = 0; 69 unsigned long size, total = 0;
68 char *dst, *why; 70 char *dst, *why;
69 u64 id; 71 u64 id;
70 int hsize, part = 1; 72 int hsize;
73 unsigned int part = 1;
71 74
72 if (reason < ARRAY_SIZE(reason_str)) 75 if (reason < ARRAY_SIZE(reason_str))
73 why = reason_str[reason]; 76 why = reason_str[reason];
@@ -78,7 +81,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
78 oopscount++; 81 oopscount++;
79 while (total < kmsg_bytes) { 82 while (total < kmsg_bytes) {
80 dst = psinfo->buf; 83 dst = psinfo->buf;
81 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++); 84 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
82 size = psinfo->bufsize - hsize; 85 size = psinfo->bufsize - hsize;
83 dst += hsize; 86 dst += hsize;
84 87
@@ -94,14 +97,16 @@ static void pstore_dump(struct kmsg_dumper *dumper,
94 memcpy(dst, s1 + s1_start, l1_cpy); 97 memcpy(dst, s1 + s1_start, l1_cpy);
95 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 98 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
96 99
97 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy); 100 id = psinfo->write(PSTORE_TYPE_DMESG, part,
101 hsize + l1_cpy + l2_cpy, psinfo);
98 if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) 102 if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
99 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, 103 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
100 psinfo->buf, hsize + l1_cpy + l2_cpy, 104 psinfo->buf, hsize + l1_cpy + l2_cpy,
101 CURRENT_TIME, psinfo->erase); 105 CURRENT_TIME, psinfo);
102 l1 -= l1_cpy; 106 l1 -= l1_cpy;
103 l2 -= l2_cpy; 107 l2 -= l2_cpy;
104 total += l1_cpy + l2_cpy; 108 total += l1_cpy + l2_cpy;
109 part++;
105 } 110 }
106 mutex_unlock(&psinfo->buf_mutex); 111 mutex_unlock(&psinfo->buf_mutex);
107} 112}
@@ -128,6 +133,12 @@ int pstore_register(struct pstore_info *psi)
128 spin_unlock(&pstore_lock); 133 spin_unlock(&pstore_lock);
129 return -EBUSY; 134 return -EBUSY;
130 } 135 }
136
137 if (backend && strcmp(backend, psi->name)) {
138 spin_unlock(&pstore_lock);
139 return -EINVAL;
140 }
141
131 psinfo = psi; 142 psinfo = psi;
132 spin_unlock(&pstore_lock); 143 spin_unlock(&pstore_lock);
133 144
@@ -166,9 +177,9 @@ void pstore_get_records(void)
166 if (rc) 177 if (rc)
167 goto out; 178 goto out;
168 179
169 while ((size = psi->read(&id, &type, &time)) > 0) { 180 while ((size = psi->read(&id, &type, &time, psi)) > 0) {
170 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, 181 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
171 time, psi->erase)) 182 time, psi))
172 failed++; 183 failed++;
173 } 184 }
174 psi->close(psi); 185 psi->close(psi);
@@ -196,12 +207,15 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size)
196 207
197 mutex_lock(&psinfo->buf_mutex); 208 mutex_lock(&psinfo->buf_mutex);
198 memcpy(psinfo->buf, buf, size); 209 memcpy(psinfo->buf, buf, size);
199 id = psinfo->write(type, size); 210 id = psinfo->write(type, 0, size, psinfo);
200 if (pstore_is_mounted()) 211 if (pstore_is_mounted())
201 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, 212 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
202 size, CURRENT_TIME, psinfo->erase); 213 size, CURRENT_TIME, psinfo);
203 mutex_unlock(&psinfo->buf_mutex); 214 mutex_unlock(&psinfo->buf_mutex);
204 215
205 return 0; 216 return 0;
206} 217}
207EXPORT_SYMBOL_GPL(pstore_write); 218EXPORT_SYMBOL_GPL(pstore_write);
219
220module_param(backend, charp, 0444);
221MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 7362cf4c946..6da0396e505 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -272,12 +272,10 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
272 case ACL_TYPE_ACCESS: 272 case ACL_TYPE_ACCESS:
273 name = POSIX_ACL_XATTR_ACCESS; 273 name = POSIX_ACL_XATTR_ACCESS;
274 if (acl) { 274 if (acl) {
275 mode_t mode = inode->i_mode; 275 error = posix_acl_equiv_mode(acl, &inode->i_mode);
276 error = posix_acl_equiv_mode(acl, &mode);
277 if (error < 0) 276 if (error < 0)
278 return error; 277 return error;
279 else { 278 else {
280 inode->i_mode = mode;
281 if (error == 0) 279 if (error == 0)
282 acl = NULL; 280 acl = NULL;
283 } 281 }
@@ -354,8 +352,6 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
354 return PTR_ERR(acl); 352 return PTR_ERR(acl);
355 353
356 if (acl) { 354 if (acl) {
357 mode_t mode = inode->i_mode;
358
359 /* Copy the default ACL to the default ACL of a new directory */ 355 /* Copy the default ACL to the default ACL of a new directory */
360 if (S_ISDIR(inode->i_mode)) { 356 if (S_ISDIR(inode->i_mode)) {
361 err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, 357 err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
@@ -366,12 +362,10 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
366 362
367 /* Now we reconcile the new ACL and the mode, 363 /* Now we reconcile the new ACL and the mode,
368 potentially modifying both */ 364 potentially modifying both */
369 err = posix_acl_create(&acl, GFP_NOFS, &mode); 365 err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
370 if (err < 0) 366 if (err < 0)
371 return err; 367 return err;
372 368
373 inode->i_mode = mode;
374
375 /* If we need an ACL.. */ 369 /* If we need an ACL.. */
376 if (err > 0) 370 if (err > 0)
377 err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); 371 err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl);
diff --git a/fs/stack.c b/fs/stack.c
index 4a6f7f44065..b4f2ab48a61 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
29 * 29 *
30 * We don't actually know what locking is used at the lower level; 30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using 31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and 32 * i_lock as in inode_add_bytes().
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */ 33 */
37 if (sizeof(i_blocks) > sizeof(long)) 34 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock); 35 spin_lock(&src->i_lock);
diff --git a/fs/stat.c b/fs/stat.c
index 961039121cb..ba5316ffac6 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
27 stat->uid = inode->i_uid; 27 stat->uid = inode->i_uid;
28 stat->gid = inode->i_gid; 28 stat->gid = inode->i_gid;
29 stat->rdev = inode->i_rdev; 29 stat->rdev = inode->i_rdev;
30 stat->size = i_size_read(inode);
30 stat->atime = inode->i_atime; 31 stat->atime = inode->i_atime;
31 stat->mtime = inode->i_mtime; 32 stat->mtime = inode->i_mtime;
32 stat->ctime = inode->i_ctime; 33 stat->ctime = inode->i_ctime;
33 stat->size = i_size_read(inode);
34 stat->blocks = inode->i_blocks;
35 stat->blksize = (1 << inode->i_blkbits); 34 stat->blksize = (1 << inode->i_blkbits);
35 stat->blocks = inode->i_blocks;
36} 36}
37 37
38EXPORT_SYMBOL(generic_fillattr); 38EXPORT_SYMBOL(generic_fillattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 44ce5165680..b6c4b3795c4 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -221,7 +221,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
221} 221}
222 222
223static int 223static int
224xfs_set_mode(struct inode *inode, mode_t mode) 224xfs_set_mode(struct inode *inode, umode_t mode)
225{ 225{
226 int error = 0; 226 int error = 0;
227 227
@@ -267,7 +267,7 @@ posix_acl_default_exists(struct inode *inode)
267int 267int
268xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) 268xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
269{ 269{
270 mode_t mode = inode->i_mode; 270 umode_t mode = inode->i_mode;
271 int error = 0, inherit = 0; 271 int error = 0, inherit = 0;
272 272
273 if (S_ISDIR(inode->i_mode)) { 273 if (S_ISDIR(inode->i_mode)) {
@@ -381,7 +381,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
381 goto out_release; 381 goto out_release;
382 382
383 if (type == ACL_TYPE_ACCESS) { 383 if (type == ACL_TYPE_ACCESS) {
384 mode_t mode = inode->i_mode; 384 umode_t mode = inode->i_mode;
385 error = posix_acl_equiv_mode(acl, &mode); 385 error = posix_acl_equiv_mode(acl, &mode);
386 386
387 if (error <= 0) { 387 if (error <= 0) {
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 3090471b2a5..e49c36d38d7 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle);
128extern int register_dock_notifier(struct notifier_block *nb); 128extern int register_dock_notifier(struct notifier_block *nb);
129extern void unregister_dock_notifier(struct notifier_block *nb); 129extern void unregister_dock_notifier(struct notifier_block *nb);
130extern int register_hotplug_dock_device(acpi_handle handle, 130extern int register_hotplug_dock_device(acpi_handle handle,
131 struct acpi_dock_ops *ops, 131 const struct acpi_dock_ops *ops,
132 void *context); 132 void *context);
133extern void unregister_hotplug_dock_device(acpi_handle handle); 133extern void unregister_hotplug_dock_device(acpi_handle handle);
134#else 134#else
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2ed0a8486c1..f554a9313b4 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
47 47
48/* Current ACPICA subsystem version in YYYYMMDD format */ 48/* Current ACPICA subsystem version in YYYYMMDD format */
49 49
50#define ACPI_CA_VERSION 0x20110413 50#define ACPI_CA_VERSION 0x20110623
51 51
52#include "actypes.h" 52#include "actypes.h"
53#include "actbl.h" 53#include "actbl.h"
@@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags;
69extern u32 acpi_gbl_enable_aml_debug_object; 69extern u32 acpi_gbl_enable_aml_debug_object;
70extern u8 acpi_gbl_copy_dsdt_locally; 70extern u8 acpi_gbl_copy_dsdt_locally;
71extern u8 acpi_gbl_truncate_io_addresses; 71extern u8 acpi_gbl_truncate_io_addresses;
72extern u8 acpi_gbl_disable_auto_repair;
72 73
73extern u32 acpi_current_gpe_count; 74extern u32 acpi_current_gpe_count;
74extern struct acpi_table_fadt acpi_gbl_FADT; 75extern struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index e67b523a50e..51a527d24a8 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -18,6 +18,11 @@
18 18
19extern int hest_disable; 19extern int hest_disable;
20extern int erst_disable; 20extern int erst_disable;
21#ifdef CONFIG_ACPI_APEI_GHES
22extern int ghes_disable;
23#else
24#define ghes_disable 1
25#endif
21 26
22#ifdef CONFIG_ACPI_APEI 27#ifdef CONFIG_ACPI_APEI
23void __init acpi_hest_init(void); 28void __init acpi_hest_init(void);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index ba4928cae47..67055f18033 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver;
337 337
338/* in processor_thermal.c */ 338/* in processor_thermal.c */
339int acpi_processor_get_limit_info(struct acpi_processor *pr); 339int acpi_processor_get_limit_info(struct acpi_processor *pr);
340extern struct thermal_cooling_device_ops processor_cooling_ops; 340extern const struct thermal_cooling_device_ops processor_cooling_ops;
341#ifdef CONFIG_CPU_FREQ 341#ifdef CONFIG_CPU_FREQ
342void acpi_thermal_cpufreq_init(void); 342void acpi_thermal_cpufreq_init(void);
343void acpi_thermal_cpufreq_exit(void); 343void acpi_thermal_cpufreq_exit(void);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 33d12f87f0e..44335e57eaa 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -205,6 +205,8 @@ struct drm_display_info {
205 enum subpixel_order subpixel_order; 205 enum subpixel_order subpixel_order;
206 u32 color_formats; 206 u32 color_formats;
207 207
208 u8 cea_rev;
209
208 char *raw_edid; /* if any */ 210 char *raw_edid; /* if any */
209}; 211};
210 212
@@ -802,6 +804,7 @@ extern struct drm_display_mode *drm_gtf_mode_complex(struct drm_device *dev,
802extern int drm_add_modes_noedid(struct drm_connector *connector, 804extern int drm_add_modes_noedid(struct drm_connector *connector,
803 int hdisplay, int vdisplay); 805 int hdisplay, int vdisplay);
804 806
807extern int drm_edid_header_is_valid(const u8 *raw_edid);
805extern bool drm_edid_is_valid(struct edid *edid); 808extern bool drm_edid_is_valid(struct edid *edid);
806struct drm_display_mode *drm_mode_find_dmt(struct drm_device *dev, 809struct drm_display_mode *drm_mode_find_dmt(struct drm_device *dev,
807 int hsize, int vsize, int fresh); 810 int hsize, int vsize, int fresh);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c4d6dbfa3ff..28c0d114cb5 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -237,7 +237,7 @@ typedef struct _drm_i915_sarea {
237#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) 237#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
238#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id) 238#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id)
239#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) 239#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
240#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image) 240#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image)
241#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) 241#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
242 242
243/* Allow drivers to submit batchbuffers directly to hardware, relying 243/* Allow drivers to submit batchbuffers directly to hardware, relying
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1deb2a73c2d..6001b4da39d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
238extern int pnpacpi_disabled; 238extern int pnpacpi_disabled;
239 239
240#define PXM_INVAL (-1) 240#define PXM_INVAL (-1)
241#define NID_INVAL (-1)
242 241
243int acpi_check_resource_conflict(const struct resource *res); 242int acpi_check_resource_conflict(const struct resource *res);
244 243
@@ -280,6 +279,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
280#define OSC_SB_CPUHP_OST_SUPPORT 8 279#define OSC_SB_CPUHP_OST_SUPPORT 8
281#define OSC_SB_APEI_SUPPORT 16 280#define OSC_SB_APEI_SUPPORT 16
282 281
282extern bool osc_sb_apei_support_acked;
283
283/* PCI defined _OSC bits */ 284/* PCI defined _OSC bits */
284/* _OSC DW1 Definition (OS Support Fields) */ 285/* _OSC DW1 Definition (OS Support Fields) */
285#define OSC_EXT_PCI_CONFIG_SUPPORT 1 286#define OSC_EXT_PCI_CONFIG_SUPPORT 1
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 3111385b8ca..e6e28f37d8e 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -172,8 +172,11 @@ struct pl08x_dma_chan {
172 int phychan_hold; 172 int phychan_hold;
173 struct tasklet_struct tasklet; 173 struct tasklet_struct tasklet;
174 char *name; 174 char *name;
175 struct pl08x_channel_data *cd; 175 const struct pl08x_channel_data *cd;
176 dma_addr_t runtime_addr; 176 dma_addr_t src_addr;
177 dma_addr_t dst_addr;
178 u32 src_cctl;
179 u32 dst_cctl;
177 enum dma_data_direction runtime_direction; 180 enum dma_data_direction runtime_direction;
178 dma_cookie_t lc; 181 dma_cookie_t lc;
179 struct list_head pend_list; 182 struct list_head pend_list;
@@ -202,7 +205,7 @@ struct pl08x_dma_chan {
202 * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2 205 * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
203 */ 206 */
204struct pl08x_platform_data { 207struct pl08x_platform_data {
205 struct pl08x_channel_data *slave_channels; 208 const struct pl08x_channel_data *slave_channels;
206 unsigned int num_slave_channels; 209 unsigned int num_slave_channels;
207 struct pl08x_channel_data memcpy_channel; 210 struct pl08x_channel_data memcpy_channel;
208 int (*get_signal)(struct pl08x_dma_chan *); 211 int (*get_signal)(struct pl08x_dma_chan *);
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3bac44cce14..7ad634501e4 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
146extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); 146extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
147extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits); 147extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
148 148
149#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
149#define BITMAP_LAST_WORD_MASK(nbits) \ 150#define BITMAP_LAST_WORD_MASK(nbits) \
150( \ 151( \
151 ((nbits) % BITS_PER_LONG) ? \ 152 ((nbits) % BITS_PER_LONG) ? \
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 36719ead50e..b51629e15cf 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -122,6 +122,8 @@ struct cpuidle_driver {
122}; 122};
123 123
124#ifdef CONFIG_CPU_IDLE 124#ifdef CONFIG_CPU_IDLE
125extern void disable_cpuidle(void);
126extern int cpuidle_idle_call(void);
125 127
126extern int cpuidle_register_driver(struct cpuidle_driver *drv); 128extern int cpuidle_register_driver(struct cpuidle_driver *drv);
127struct cpuidle_driver *cpuidle_get_driver(void); 129struct cpuidle_driver *cpuidle_get_driver(void);
@@ -135,6 +137,8 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev);
135extern void cpuidle_disable_device(struct cpuidle_device *dev); 137extern void cpuidle_disable_device(struct cpuidle_device *dev);
136 138
137#else 139#else
140static inline void disable_cpuidle(void) { }
141static inline int cpuidle_idle_call(void) { return -ENODEV; }
138 142
139static inline int cpuidle_register_driver(struct cpuidle_driver *drv) 143static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
140{return -ENODEV; } 144{return -ENODEV; }
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 48e82af1159..98f46efbe2d 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -265,10 +265,11 @@ static inline void put_cred(const struct cred *_cred)
265/** 265/**
266 * current_cred - Access the current task's subjective credentials 266 * current_cred - Access the current task's subjective credentials
267 * 267 *
268 * Access the subjective credentials of the current task. 268 * Access the subjective credentials of the current task. RCU-safe,
269 * since nobody else can modify it.
269 */ 270 */
270#define current_cred() \ 271#define current_cred() \
271 (current->cred) 272 (*(__force struct cred **)&current->cred)
272 273
273/** 274/**
274 * __task_cred - Access a task's objective credentials 275 * __task_cred - Access a task's objective credentials
@@ -307,7 +308,7 @@ static inline void put_cred(const struct cred *_cred)
307({ \ 308({ \
308 struct user_struct *__u; \ 309 struct user_struct *__u; \
309 struct cred *__cred; \ 310 struct cred *__cred; \
310 __cred = (struct cred *) current_cred(); \ 311 __cred = current_cred(); \
311 __u = get_uid(__cred->user); \ 312 __u = get_uid(__cred->user); \
312 __u; \ 313 __u; \
313}) 314})
@@ -322,7 +323,7 @@ static inline void put_cred(const struct cred *_cred)
322({ \ 323({ \
323 struct group_info *__groups; \ 324 struct group_info *__groups; \
324 struct cred *__cred; \ 325 struct cred *__cred; \
325 __cred = (struct cred *) current_cred(); \ 326 __cred = current_cred(); \
326 __groups = get_group_info(__cred->group_info); \ 327 __groups = get_group_info(__cred->group_info); \
327 __groups; \ 328 __groups; \
328}) 329})
@@ -341,7 +342,7 @@ static inline void put_cred(const struct cred *_cred)
341 342
342#define current_cred_xxx(xxx) \ 343#define current_cred_xxx(xxx) \
343({ \ 344({ \
344 current->cred->xxx; \ 345 current_cred()->xxx; \
345}) 346})
346 347
347#define current_uid() (current_cred_xxx(uid)) 348#define current_uid() (current_cred_xxx(uid))
diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
index ec78a4bbe1d..2cd9f1cf9fa 100644
--- a/include/linux/cryptohash.h
+++ b/include/linux/cryptohash.h
@@ -3,11 +3,16 @@
3 3
4#define SHA_DIGEST_WORDS 5 4#define SHA_DIGEST_WORDS 5
5#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8) 5#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
6#define SHA_WORKSPACE_WORDS 80 6#define SHA_WORKSPACE_WORDS 16
7 7
8void sha_init(__u32 *buf); 8void sha_init(__u32 *buf);
9void sha_transform(__u32 *digest, const char *data, __u32 *W); 9void sha_transform(__u32 *digest, const char *data, __u32 *W);
10 10
11#define MD5_DIGEST_WORDS 4
12#define MD5_MESSAGE_BYTES 64
13
14void md5_transform(__u32 *hash, __u32 const *in);
15
11__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]); 16__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]);
12 17
13#endif 18#endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d37d2a79309..62157c03caf 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -180,12 +180,12 @@ struct dentry_operations {
180 */ 180 */
181 181
182/* d_flags entries */ 182/* d_flags entries */
183#define DCACHE_AUTOFS_PENDING 0x0001 /* autofs: "under construction" */ 183#define DCACHE_OP_HASH 0x0001
184#define DCACHE_NFSFS_RENAMED 0x0002 184#define DCACHE_OP_COMPARE 0x0002
185 /* this dentry has been "silly renamed" and has to be deleted on the last 185#define DCACHE_OP_REVALIDATE 0x0004
186 * dput() */ 186#define DCACHE_OP_DELETE 0x0008
187 187
188#define DCACHE_DISCONNECTED 0x0004 188#define DCACHE_DISCONNECTED 0x0010
189 /* This dentry is possibly not currently connected to the dcache tree, in 189 /* This dentry is possibly not currently connected to the dcache tree, in
190 * which case its parent will either be itself, or will have this flag as 190 * which case its parent will either be itself, or will have this flag as
191 * well. nfsd will not use a dentry with this bit set, but will first 191 * well. nfsd will not use a dentry with this bit set, but will first
@@ -196,22 +196,18 @@ struct dentry_operations {
196 * dentry into place and return that dentry rather than the passed one, 196 * dentry into place and return that dentry rather than the passed one,
197 * typically using d_splice_alias. */ 197 * typically using d_splice_alias. */
198 198
199#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ 199#define DCACHE_REFERENCED 0x0020 /* Recently used, don't discard. */
200#define DCACHE_RCUACCESS 0x0010 /* Entry has ever been RCU-visible */ 200#define DCACHE_RCUACCESS 0x0040 /* Entry has ever been RCU-visible */
201#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020
202 /* Parent inode is watched by inotify */
203
204#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */
205#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080
206 /* Parent inode is watched by some fsnotify listener */
207 201
208#define DCACHE_CANT_MOUNT 0x0100 202#define DCACHE_CANT_MOUNT 0x0100
209#define DCACHE_GENOCIDE 0x0200 203#define DCACHE_GENOCIDE 0x0200
210 204
211#define DCACHE_OP_HASH 0x1000 205#define DCACHE_NFSFS_RENAMED 0x1000
212#define DCACHE_OP_COMPARE 0x2000 206 /* this dentry has been "silly renamed" and has to be deleted on the last
213#define DCACHE_OP_REVALIDATE 0x4000 207 * dput() */
214#define DCACHE_OP_DELETE 0x8000 208#define DCACHE_COOKIE 0x2000 /* For use by dcookie subsystem */
209#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x4000
210 /* Parent inode is watched by some fsnotify listener */
215 211
216#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ 212#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */
217#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ 213#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 4427e045405..3fa1f3d90ce 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -208,6 +208,49 @@ struct dm_target_callbacks {
208int dm_register_target(struct target_type *t); 208int dm_register_target(struct target_type *t);
209void dm_unregister_target(struct target_type *t); 209void dm_unregister_target(struct target_type *t);
210 210
211/*
212 * Target argument parsing.
213 */
214struct dm_arg_set {
215 unsigned argc;
216 char **argv;
217};
218
219/*
220 * The minimum and maximum value of a numeric argument, together with
221 * the error message to use if the number is found to be outside that range.
222 */
223struct dm_arg {
224 unsigned min;
225 unsigned max;
226 char *error;
227};
228
229/*
230 * Validate the next argument, either returning it as *value or, if invalid,
231 * returning -EINVAL and setting *error.
232 */
233int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
234 unsigned *value, char **error);
235
236/*
237 * Process the next argument as the start of a group containing between
238 * arg->min and arg->max further arguments. Either return the size as
239 * *num_args or, if invalid, return -EINVAL and set *error.
240 */
241int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
242 unsigned *num_args, char **error);
243
244/*
245 * Return the current argument and shift to the next.
246 */
247const char *dm_shift_arg(struct dm_arg_set *as);
248
249/*
250 * Move through num_args arguments.
251 */
252void dm_consume_args(struct dm_arg_set *as, unsigned num_args);
253
211/*----------------------------------------------------------------- 254/*-----------------------------------------------------------------
212 * Functions for creating and manipulating mapped devices. 255 * Functions for creating and manipulating mapped devices.
213 * Drop the reference with dm_put when you finish with the object. 256 * Drop the reference with dm_put when you finish with the object.
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 3708455ee6c..0cb8eff76bd 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
268 268
269#define DM_VERSION_MAJOR 4 269#define DM_VERSION_MAJOR 4
270#define DM_VERSION_MINOR 20 270#define DM_VERSION_MINOR 21
271#define DM_VERSION_PATCHLEVEL 0 271#define DM_VERSION_PATCHLEVEL 0
272#define DM_VERSION_EXTRA "-ioctl (2011-02-02)" 272#define DM_VERSION_EXTRA "-ioctl (2011-07-06)"
273 273
274/* Status bits */ 274/* Status bits */
275#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 275#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index 298d587e349..5e54458e920 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -42,5 +42,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
42 unsigned num_dests, struct dm_io_region *dests, 42 unsigned num_dests, struct dm_io_region *dests,
43 unsigned flags, dm_kcopyd_notify_fn fn, void *context); 43 unsigned flags, dm_kcopyd_notify_fn fn, void *context);
44 44
45/*
46 * Prepare a callback and submit it via the kcopyd thread.
47 *
48 * dm_kcopyd_prepare_callback allocates a callback structure and returns it.
49 * It must not be called from interrupt context.
50 * The returned value should be passed into dm_kcopyd_do_callback.
51 *
52 * dm_kcopyd_do_callback submits the callback.
53 * It may be called from interrupt context.
54 * The callback is issued from the kcopyd thread.
55 */
56void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
57 dm_kcopyd_notify_fn fn, void *context);
58void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
59
45#endif /* __KERNEL__ */ 60#endif /* __KERNEL__ */
46#endif /* _LINUX_DM_KCOPYD_H */ 61#endif /* _LINUX_DM_KCOPYD_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec257269392..2362a0bc7f0 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -19,6 +19,7 @@
19#include <linux/rtc.h> 19#include <linux/rtc.h>
20#include <linux/ioport.h> 20#include <linux/ioport.h>
21#include <linux/pfn.h> 21#include <linux/pfn.h>
22#include <linux/pstore.h>
22 23
23#include <asm/page.h> 24#include <asm/page.h>
24#include <asm/system.h> 25#include <asm/system.h>
@@ -232,6 +233,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
232#define UV_SYSTEM_TABLE_GUID \ 233#define UV_SYSTEM_TABLE_GUID \
233 EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 ) 234 EFI_GUID( 0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
234 235
236#define LINUX_EFI_CRASH_GUID \
237 EFI_GUID( 0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 )
238
235typedef struct { 239typedef struct {
236 efi_guid_t guid; 240 efi_guid_t guid;
237 unsigned long table; 241 unsigned long table;
@@ -458,6 +462,8 @@ struct efivars {
458 struct kset *kset; 462 struct kset *kset;
459 struct bin_attribute *new_var, *del_var; 463 struct bin_attribute *new_var, *del_var;
460 const struct efivar_operations *ops; 464 const struct efivar_operations *ops;
465 struct efivar_entry *walk_entry;
466 struct pstore_info efi_pstore_info;
461}; 467};
462 468
463int register_efivars(struct efivars *efivars, 469int register_efivars(struct efivars *efivars,
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 3ff060ac781..c6f996f2abb 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -25,10 +25,6 @@ struct fault_attr {
25 unsigned long reject_end; 25 unsigned long reject_end;
26 26
27 unsigned long count; 27 unsigned long count;
28
29#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
30 struct dentry *dir;
31#endif
32}; 28};
33 29
34#define FAULT_ATTR_INITIALIZER { \ 30#define FAULT_ATTR_INITIALIZER { \
@@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size);
45 41
46#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 42#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
47 43
48int init_fault_attr_dentries(struct fault_attr *attr, const char *name); 44struct dentry *fault_create_debugfs_attr(const char *name,
49void cleanup_fault_attr_dentries(struct fault_attr *attr); 45 struct dentry *parent, struct fault_attr *attr);
50 46
51#else /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 47#else /* CONFIG_FAULT_INJECTION_DEBUG_FS */
52 48
53static inline int init_fault_attr_dentries(struct fault_attr *attr, 49static inline struct dentry *fault_create_debugfs_attr(const char *name,
54 const char *name) 50 struct dentry *parent, struct fault_attr *attr)
55{
56 return -ENODEV;
57}
58
59static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
60{ 51{
52 return ERR_PTR(-ENODEV);
61} 53}
62 54
63#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 55#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f23bcb77260..178cdb4f1d4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -738,22 +738,54 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
738struct posix_acl; 738struct posix_acl;
739#define ACL_NOT_CACHED ((void *)(-1)) 739#define ACL_NOT_CACHED ((void *)(-1))
740 740
741#define IOP_FASTPERM 0x0001
742#define IOP_LOOKUP 0x0002
743#define IOP_NOFOLLOW 0x0004
744
745/*
746 * Keep mostly read-only and often accessed (especially for
747 * the RCU path lookup and 'stat' data) fields at the beginning
748 * of the 'struct inode'
749 */
741struct inode { 750struct inode {
742 /* RCU path lookup touches following: */
743 umode_t i_mode; 751 umode_t i_mode;
752 unsigned short i_opflags;
744 uid_t i_uid; 753 uid_t i_uid;
745 gid_t i_gid; 754 gid_t i_gid;
755 unsigned int i_flags;
756
757#ifdef CONFIG_FS_POSIX_ACL
758 struct posix_acl *i_acl;
759 struct posix_acl *i_default_acl;
760#endif
761
746 const struct inode_operations *i_op; 762 const struct inode_operations *i_op;
747 struct super_block *i_sb; 763 struct super_block *i_sb;
764 struct address_space *i_mapping;
748 765
749 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
750 unsigned int i_flags;
751 unsigned long i_state;
752#ifdef CONFIG_SECURITY 766#ifdef CONFIG_SECURITY
753 void *i_security; 767 void *i_security;
754#endif 768#endif
755 struct mutex i_mutex;
756 769
770 /* Stat data, not accessed from path walking */
771 unsigned long i_ino;
772 unsigned int i_nlink;
773 dev_t i_rdev;
774 loff_t i_size;
775 struct timespec i_atime;
776 struct timespec i_mtime;
777 struct timespec i_ctime;
778 unsigned int i_blkbits;
779 blkcnt_t i_blocks;
780
781#ifdef __NEED_I_SIZE_ORDERED
782 seqcount_t i_size_seqcount;
783#endif
784
785 /* Misc */
786 unsigned long i_state;
787 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
788 struct mutex i_mutex;
757 789
758 unsigned long dirtied_when; /* jiffies of first dirtying */ 790 unsigned long dirtied_when; /* jiffies of first dirtying */
759 791
@@ -765,25 +797,12 @@ struct inode {
765 struct list_head i_dentry; 797 struct list_head i_dentry;
766 struct rcu_head i_rcu; 798 struct rcu_head i_rcu;
767 }; 799 };
768 unsigned long i_ino;
769 atomic_t i_count; 800 atomic_t i_count;
770 unsigned int i_nlink;
771 dev_t i_rdev;
772 unsigned int i_blkbits;
773 u64 i_version; 801 u64 i_version;
774 loff_t i_size;
775#ifdef __NEED_I_SIZE_ORDERED
776 seqcount_t i_size_seqcount;
777#endif
778 struct timespec i_atime;
779 struct timespec i_mtime;
780 struct timespec i_ctime;
781 blkcnt_t i_blocks;
782 unsigned short i_bytes; 802 unsigned short i_bytes;
783 atomic_t i_dio_count; 803 atomic_t i_dio_count;
784 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 804 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
785 struct file_lock *i_flock; 805 struct file_lock *i_flock;
786 struct address_space *i_mapping;
787 struct address_space i_data; 806 struct address_space i_data;
788#ifdef CONFIG_QUOTA 807#ifdef CONFIG_QUOTA
789 struct dquot *i_dquot[MAXQUOTAS]; 808 struct dquot *i_dquot[MAXQUOTAS];
@@ -806,10 +825,6 @@ struct inode {
806 atomic_t i_readcount; /* struct files open RO */ 825 atomic_t i_readcount; /* struct files open RO */
807#endif 826#endif
808 atomic_t i_writecount; 827 atomic_t i_writecount;
809#ifdef CONFIG_FS_POSIX_ACL
810 struct posix_acl *i_acl;
811 struct posix_acl *i_default_acl;
812#endif
813 void *i_private; /* fs or device private pointer */ 828 void *i_private; /* fs or device private pointer */
814}; 829};
815 830
@@ -2317,11 +2332,18 @@ extern int should_remove_suid(struct dentry *);
2317extern int file_remove_suid(struct file *); 2332extern int file_remove_suid(struct file *);
2318 2333
2319extern void __insert_inode_hash(struct inode *, unsigned long hashval); 2334extern void __insert_inode_hash(struct inode *, unsigned long hashval);
2320extern void remove_inode_hash(struct inode *);
2321static inline void insert_inode_hash(struct inode *inode) 2335static inline void insert_inode_hash(struct inode *inode)
2322{ 2336{
2323 __insert_inode_hash(inode, inode->i_ino); 2337 __insert_inode_hash(inode, inode->i_ino);
2324} 2338}
2339
2340extern void __remove_inode_hash(struct inode *);
2341static inline void remove_inode_hash(struct inode *inode)
2342{
2343 if (!inode_unhashed(inode))
2344 __remove_inode_hash(inode);
2345}
2346
2325extern void inode_sb_list_add(struct inode *inode); 2347extern void inode_sb_list_add(struct inode *inode);
2326 2348
2327#ifdef CONFIG_BLOCK 2349#ifdef CONFIG_BLOCK
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 5bbebda78b0..5e98eeb2af3 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -1,8 +1,26 @@
1/* 1/*
2 * Basic general purpose allocator for managing special purpose memory 2 * Basic general purpose allocator for managing special purpose
3 * not managed by the regular kmalloc/kfree interface. 3 * memory, for example, memory that is not managed by the regular
4 * Uses for this includes on-device special memory, uncached memory 4 * kmalloc/kfree interface. Uses for this includes on-device special
5 * etc. 5 * memory, uncached memory etc.
6 *
7 * It is safe to use the allocator in NMI handlers and other special
8 * unblockable contexts that could otherwise deadlock on locks. This
9 * is implemented by using atomic operations and retries on any
10 * conflicts. The disadvantage is that there may be livelocks in
11 * extreme cases. For better scalability, one allocator can be used
12 * for each CPU.
13 *
14 * The lockless operation only works if there is enough memory
15 * available. If new memory is added to the pool a lock has to be
16 * still taken. So any user relying on locklessness has to ensure
17 * that sufficient memory is preallocated.
18 *
19 * The basic atomic operation of this allocator is cmpxchg on long.
20 * On architectures that don't have NMI-safe cmpxchg implementation,
21 * the allocator can NOT be used in NMI handler. So code uses the
22 * allocator in NMI handler should depend on
23 * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
6 * 24 *
7 * This source code is licensed under the GNU General Public License, 25 * This source code is licensed under the GNU General Public License,
8 * Version 2. See the file COPYING for more details. 26 * Version 2. See the file COPYING for more details.
@@ -15,7 +33,7 @@
15 * General purpose special memory pool descriptor. 33 * General purpose special memory pool descriptor.
16 */ 34 */
17struct gen_pool { 35struct gen_pool {
18 rwlock_t lock; 36 spinlock_t lock;
19 struct list_head chunks; /* list of chunks in this pool */ 37 struct list_head chunks; /* list of chunks in this pool */
20 int min_alloc_order; /* minimum allocation order */ 38 int min_alloc_order; /* minimum allocation order */
21}; 39};
@@ -24,8 +42,8 @@ struct gen_pool {
24 * General purpose special memory pool chunk descriptor. 42 * General purpose special memory pool chunk descriptor.
25 */ 43 */
26struct gen_pool_chunk { 44struct gen_pool_chunk {
27 spinlock_t lock;
28 struct list_head next_chunk; /* next chunk in pool */ 45 struct list_head next_chunk; /* next chunk in pool */
46 atomic_t avail;
29 phys_addr_t phys_addr; /* physical starting address of memory chunk */ 47 phys_addr_t phys_addr; /* physical starting address of memory chunk */
30 unsigned long start_addr; /* starting address of memory chunk */ 48 unsigned long start_addr; /* starting address of memory chunk */
31 unsigned long end_addr; /* ending address of memory chunk */ 49 unsigned long end_addr; /* ending address of memory chunk */
@@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
56extern void gen_pool_destroy(struct gen_pool *); 74extern void gen_pool_destroy(struct gen_pool *);
57extern unsigned long gen_pool_alloc(struct gen_pool *, size_t); 75extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
58extern void gen_pool_free(struct gen_pool *, unsigned long, size_t); 76extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
77extern void gen_pool_for_each_chunk(struct gen_pool *,
78 void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
79extern size_t gen_pool_avail(struct gen_pool *);
80extern size_t gen_pool_size(struct gen_pool *);
59#endif /* __GENALLOC_H__ */ 81#endif /* __GENALLOC_H__ */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cb4089254f0..3a76faf6a3e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -92,7 +92,7 @@ struct vm_area_struct;
92 */ 92 */
93#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 93#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
94 94
95#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */ 95#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */
96#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 96#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
97 97
98/* This equals 0, but use constants in case they ever change */ 98/* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 13a801f3d02..255491cf522 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id);
146void ida_destroy(struct ida *ida); 146void ida_destroy(struct ida *ida);
147void ida_init(struct ida *ida); 147void ida_init(struct ida *ida);
148 148
149int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
150 gfp_t gfp_mask);
151void ida_simple_remove(struct ida *ida, unsigned int id);
152
149void __init idr_init_cache(void); 153void __init idr_init_cache(void);
150 154
151#endif /* __IDR_H__ */ 155#endif /* __IDR_H__ */
diff --git a/include/linux/input.h b/include/linux/input.h
index 068784e1797..a637e781433 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -438,6 +438,8 @@ struct input_keymap_entry {
438#define KEY_WIMAX 246 438#define KEY_WIMAX 246
439#define KEY_RFKILL 247 /* Key that controls all radios */ 439#define KEY_RFKILL 247 /* Key that controls all radios */
440 440
441#define KEY_MICMUTE 248 /* Mute / unmute the microphone */
442
441/* Code 255 is reserved for special needs of AT keyboard driver */ 443/* Code 255 is reserved for special needs of AT keyboard driver */
442 444
443#define BTN_MISC 0x100 445#define BTN_MISC 0x100
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d087c2e7b2a..38f307b8c33 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1329,12 +1329,6 @@ extern int jbd_blocks_per_page(struct inode *inode);
1329#define BUFFER_TRACE2(bh, bh2, info) do {} while (0) 1329#define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
1330#define JBUFFER_TRACE(jh, info) do {} while (0) 1330#define JBUFFER_TRACE(jh, info) do {} while (0)
1331 1331
1332/*
1333 * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
1334 * tracing infrastructure to map a dev_t to a device name.
1335 */
1336extern const char *jbd2_dev_to_name(dev_t device);
1337
1338#endif /* __KERNEL__ */ 1332#endif /* __KERNEL__ */
1339 1333
1340#endif /* _LINUX_JBD2_H */ 1334#endif /* _LINUX_JBD2_H */
diff --git a/include/linux/llist.h b/include/linux/llist.h
new file mode 100644
index 00000000000..aa0c8b5b3cd
--- /dev/null
+++ b/include/linux/llist.h
@@ -0,0 +1,126 @@
1#ifndef LLIST_H
2#define LLIST_H
3/*
4 * Lock-less NULL terminated single linked list
5 *
6 * If there are multiple producers and multiple consumers, llist_add
7 * can be used in producers and llist_del_all can be used in
8 * consumers. They can work simultaneously without lock. But
9 * llist_del_first can not be used here. Because llist_del_first
10 * depends on list->first->next does not changed if list->first is not
11 * changed during its operation, but llist_del_first, llist_add,
12 * llist_add (or llist_del_all, llist_add, llist_add) sequence in
13 * another consumer may violate that.
14 *
15 * If there are multiple producers and one consumer, llist_add can be
16 * used in producers and llist_del_all or llist_del_first can be used
17 * in the consumer.
18 *
19 * This can be summarized as follow:
20 *
21 * | add | del_first | del_all
22 * add | - | - | -
23 * del_first | | L | L
24 * del_all | | | -
25 *
26 * Where "-" stands for no lock is needed, while "L" stands for lock
27 * is needed.
28 *
29 * The list entries deleted via llist_del_all can be traversed with
30 * traversing function such as llist_for_each etc. But the list
31 * entries can not be traversed safely before deleted from the list.
32 * The order of deleted entries is from the newest to the oldest added
33 * one. If you want to traverse from the oldest to the newest, you
34 * must reverse the order by yourself before traversing.
35 *
36 * The basic atomic operation of this list is cmpxchg on long. On
37 * architectures that don't have NMI-safe cmpxchg implementation, the
38 * list can NOT be used in NMI handler. So code uses the list in NMI
39 * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
40 */
41
42struct llist_head {
43 struct llist_node *first;
44};
45
46struct llist_node {
47 struct llist_node *next;
48};
49
50#define LLIST_HEAD_INIT(name) { NULL }
51#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name)
52
53/**
54 * init_llist_head - initialize lock-less list head
55 * @head: the head for your lock-less list
56 */
57static inline void init_llist_head(struct llist_head *list)
58{
59 list->first = NULL;
60}
61
62/**
63 * llist_entry - get the struct of this entry
64 * @ptr: the &struct llist_node pointer.
65 * @type: the type of the struct this is embedded in.
66 * @member: the name of the llist_node within the struct.
67 */
68#define llist_entry(ptr, type, member) \
69 container_of(ptr, type, member)
70
71/**
72 * llist_for_each - iterate over some deleted entries of a lock-less list
73 * @pos: the &struct llist_node to use as a loop cursor
74 * @node: the first entry of deleted list entries
75 *
76 * In general, some entries of the lock-less list can be traversed
77 * safely only after being deleted from list, so start with an entry
78 * instead of list head.
79 *
80 * If being used on entries deleted from lock-less list directly, the
81 * traverse order is from the newest to the oldest added entry. If
82 * you want to traverse from the oldest to the newest, you must
83 * reverse the order by yourself before traversing.
84 */
85#define llist_for_each(pos, node) \
86 for ((pos) = (node); pos; (pos) = (pos)->next)
87
88/**
89 * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
90 * @pos: the type * to use as a loop cursor.
91 * @node: the fist entry of deleted list entries.
92 * @member: the name of the llist_node with the struct.
93 *
94 * In general, some entries of the lock-less list can be traversed
95 * safely only after being removed from list, so start with an entry
96 * instead of list head.
97 *
98 * If being used on entries deleted from lock-less list directly, the
99 * traverse order is from the newest to the oldest added entry. If
100 * you want to traverse from the oldest to the newest, you must
101 * reverse the order by yourself before traversing.
102 */
103#define llist_for_each_entry(pos, node, member) \
104 for ((pos) = llist_entry((node), typeof(*(pos)), member); \
105 &(pos)->member != NULL; \
106 (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
107
108/**
109 * llist_empty - tests whether a lock-less list is empty
110 * @head: the list to test
111 *
112 * Not guaranteed to be accurate or up to date. Just a quick way to
113 * test whether the list is empty without deleting something from the
114 * list.
115 */
116static inline int llist_empty(const struct llist_head *head)
117{
118 return ACCESS_ONCE(head->first) == NULL;
119}
120
121void llist_add(struct llist_node *new, struct llist_head *head);
122void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
123 struct llist_head *head);
124struct llist_node *llist_del_first(struct llist_head *head);
125struct llist_node *llist_del_all(struct llist_head *head);
126#endif /* LLIST_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b9660078691..3b535db00a9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void);
86 86
87extern void mem_cgroup_uncharge_page(struct page *page); 87extern void mem_cgroup_uncharge_page(struct page *page);
88extern void mem_cgroup_uncharge_cache_page(struct page *page); 88extern void mem_cgroup_uncharge_cache_page(struct page *page);
89extern int mem_cgroup_shmem_charge_fallback(struct page *page,
90 struct mm_struct *mm, gfp_t gfp_mask);
91 89
92extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 90extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
93int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); 91int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
@@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
225{ 223{
226} 224}
227 225
228static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
229 struct mm_struct *mm, gfp_t gfp_mask)
230{
231 return 0;
232}
233
234static inline void mem_cgroup_add_lru_list(struct page *page, int lru) 226static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
235{ 227{
236} 228}
diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index 89212df0562..f7316c29bde 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -89,7 +89,7 @@ enum aat2870_id {
89 89
90/* Backlight current magnitude (mA) */ 90/* Backlight current magnitude (mA) */
91enum aat2870_current { 91enum aat2870_current {
92 AAT2870_CURRENT_0_45, 92 AAT2870_CURRENT_0_45 = 1,
93 AAT2870_CURRENT_0_90, 93 AAT2870_CURRENT_0_90,
94 AAT2870_CURRENT_1_80, 94 AAT2870_CURRENT_1_80,
95 AAT2870_CURRENT_2_70, 95 AAT2870_CURRENT_2_70,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3172a1c0f08..f2690cf4982 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1600,6 +1600,7 @@ enum mf_flags {
1600}; 1600};
1601extern void memory_failure(unsigned long pfn, int trapno); 1601extern void memory_failure(unsigned long pfn, int trapno);
1602extern int __memory_failure(unsigned long pfn, int trapno, int flags); 1602extern int __memory_failure(unsigned long pfn, int trapno, int flags);
1603extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
1603extern int unpoison_memory(unsigned long pfn); 1604extern int unpoison_memory(unsigned long pfn);
1604extern int sysctl_memory_failure_early_kill; 1605extern int sysctl_memory_failure_early_kill;
1605extern int sysctl_memory_failure_recovery; 1606extern int sysctl_memory_failure_recovery;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b96fb99072f..eaac770f886 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -569,12 +569,12 @@ extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
569extern int nfs3_proc_setacl(struct inode *inode, int type, 569extern int nfs3_proc_setacl(struct inode *inode, int type,
570 struct posix_acl *acl); 570 struct posix_acl *acl);
571extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, 571extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
572 mode_t mode); 572 umode_t mode);
573extern void nfs3_forget_cached_acls(struct inode *inode); 573extern void nfs3_forget_cached_acls(struct inode *inode);
574#else 574#else
575static inline int nfs3_proc_set_default_acl(struct inode *dir, 575static inline int nfs3_proc_set_default_acl(struct inode *dir,
576 struct inode *inode, 576 struct inode *inode,
577 mode_t mode) 577 umode_t mode)
578{ 578{
579 return 0; 579 return 0;
580} 580}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 569ea5b76fd..abd615d74a2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -773,6 +773,11 @@ struct nfs3_getaclres {
773 struct posix_acl * acl_default; 773 struct posix_acl * acl_default;
774}; 774};
775 775
776struct nfs4_string {
777 unsigned int len;
778 char *data;
779};
780
776#ifdef CONFIG_NFS_V4 781#ifdef CONFIG_NFS_V4
777 782
778typedef u64 clientid4; 783typedef u64 clientid4;
@@ -963,11 +968,6 @@ struct nfs4_server_caps_res {
963 struct nfs4_sequence_res seq_res; 968 struct nfs4_sequence_res seq_res;
964}; 969};
965 970
966struct nfs4_string {
967 unsigned int len;
968 char *data;
969};
970
971#define NFS4_PATHNAME_MAXCOMPONENTS 512 971#define NFS4_PATHNAME_MAXCOMPONENTS 512
972struct nfs4_pathname { 972struct nfs4_pathname {
973 unsigned int ncomponents; 973 unsigned int ncomponents;
diff --git a/include/linux/of.h b/include/linux/of.h
index bd716f8908d..0085bb01c04 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -196,12 +196,13 @@ extern struct property *of_find_property(const struct device_node *np,
196 const char *name, 196 const char *name,
197 int *lenp); 197 int *lenp);
198extern int of_property_read_u32_array(const struct device_node *np, 198extern int of_property_read_u32_array(const struct device_node *np,
199 char *propname, 199 const char *propname,
200 u32 *out_values, 200 u32 *out_values,
201 size_t sz); 201 size_t sz);
202 202
203extern int of_property_read_string(struct device_node *np, char *propname, 203extern int of_property_read_string(struct device_node *np,
204 const char **out_string); 204 const char *propname,
205 const char **out_string);
205extern int of_device_is_compatible(const struct device_node *device, 206extern int of_device_is_compatible(const struct device_node *device,
206 const char *); 207 const char *);
207extern int of_device_is_available(const struct device_node *device); 208extern int of_device_is_available(const struct device_node *device);
@@ -242,13 +243,15 @@ static inline bool of_have_populated_dt(void)
242} 243}
243 244
244static inline int of_property_read_u32_array(const struct device_node *np, 245static inline int of_property_read_u32_array(const struct device_node *np,
245 char *propname, u32 *out_values, size_t sz) 246 const char *propname,
247 u32 *out_values, size_t sz)
246{ 248{
247 return -ENOSYS; 249 return -ENOSYS;
248} 250}
249 251
250static inline int of_property_read_string(struct device_node *np, 252static inline int of_property_read_string(struct device_node *np,
251 char *propname, const char **out_string) 253 const char *propname,
254 const char **out_string)
252{ 255{
253 return -ENOSYS; 256 return -ENOSYS;
254} 257}
@@ -256,7 +259,7 @@ static inline int of_property_read_string(struct device_node *np,
256#endif /* CONFIG_OF */ 259#endif /* CONFIG_OF */
257 260
258static inline int of_property_read_u32(const struct device_node *np, 261static inline int of_property_read_u32(const struct device_node *np,
259 char *propname, 262 const char *propname,
260 u32 *out_value) 263 u32 *out_value)
261{ 264{
262 return of_property_read_u32_array(np, propname, out_value, 1); 265 return of_property_read_u32_array(np, propname, out_value, 1);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b00c4ec5056..ae96bbe5451 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2709,6 +2709,16 @@
2709#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 2709#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60
2710#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00 2710#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00
2711#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f 2711#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f
2712#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20
2713#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21
2714#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22
2715#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23
2716#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24
2717#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25
2718#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26
2719#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27
2720#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e
2721#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f
2712#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f 2722#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
2713#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 2723#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
2714#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 2724#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 9a53b99818e..b7681102a4b 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -9,6 +9,7 @@
9#define __LINUX_POSIX_ACL_H 9#define __LINUX_POSIX_ACL_H
10 10
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/rcupdate.h>
12 13
13#define ACL_UNDEFINED_ID (-1) 14#define ACL_UNDEFINED_ID (-1)
14 15
@@ -38,7 +39,10 @@ struct posix_acl_entry {
38}; 39};
39 40
40struct posix_acl { 41struct posix_acl {
41 atomic_t a_refcount; 42 union {
43 atomic_t a_refcount;
44 struct rcu_head a_rcu;
45 };
42 unsigned int a_count; 46 unsigned int a_count;
43 struct posix_acl_entry a_entries[0]; 47 struct posix_acl_entry a_entries[0];
44}; 48};
@@ -65,7 +69,7 @@ static inline void
65posix_acl_release(struct posix_acl *acl) 69posix_acl_release(struct posix_acl *acl)
66{ 70{
67 if (acl && atomic_dec_and_test(&acl->a_refcount)) 71 if (acl && atomic_dec_and_test(&acl->a_refcount))
68 kfree(acl); 72 kfree_rcu(acl, a_rcu);
69} 73}
70 74
71 75
@@ -75,29 +79,31 @@ extern void posix_acl_init(struct posix_acl *, int);
75extern struct posix_acl *posix_acl_alloc(int, gfp_t); 79extern struct posix_acl *posix_acl_alloc(int, gfp_t);
76extern int posix_acl_valid(const struct posix_acl *); 80extern int posix_acl_valid(const struct posix_acl *);
77extern int posix_acl_permission(struct inode *, const struct posix_acl *, int); 81extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
78extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t); 82extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
79extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *); 83extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
80extern int posix_acl_create(struct posix_acl **, gfp_t, mode_t *); 84extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
81extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t); 85extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
82 86
83extern struct posix_acl *get_posix_acl(struct inode *, int); 87extern struct posix_acl *get_posix_acl(struct inode *, int);
84extern int set_posix_acl(struct inode *, int, struct posix_acl *); 88extern int set_posix_acl(struct inode *, int, struct posix_acl *);
85 89
86#ifdef CONFIG_FS_POSIX_ACL 90#ifdef CONFIG_FS_POSIX_ACL
87static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) 91static inline struct posix_acl **acl_by_type(struct inode *inode, int type)
88{ 92{
89 struct posix_acl **p, *acl;
90 switch (type) { 93 switch (type) {
91 case ACL_TYPE_ACCESS: 94 case ACL_TYPE_ACCESS:
92 p = &inode->i_acl; 95 return &inode->i_acl;
93 break;
94 case ACL_TYPE_DEFAULT: 96 case ACL_TYPE_DEFAULT:
95 p = &inode->i_default_acl; 97 return &inode->i_default_acl;
96 break;
97 default: 98 default:
98 return ERR_PTR(-EINVAL); 99 BUG();
99 } 100 }
100 acl = ACCESS_ONCE(*p); 101}
102
103static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
104{
105 struct posix_acl **p = acl_by_type(inode, type);
106 struct posix_acl *acl = ACCESS_ONCE(*p);
101 if (acl) { 107 if (acl) {
102 spin_lock(&inode->i_lock); 108 spin_lock(&inode->i_lock);
103 acl = *p; 109 acl = *p;
@@ -108,41 +114,20 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type)
108 return acl; 114 return acl;
109} 115}
110 116
111static inline int negative_cached_acl(struct inode *inode, int type) 117static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
112{ 118{
113 struct posix_acl **p, *acl; 119 return rcu_dereference(*acl_by_type(inode, type));
114 switch (type) {
115 case ACL_TYPE_ACCESS:
116 p = &inode->i_acl;
117 break;
118 case ACL_TYPE_DEFAULT:
119 p = &inode->i_default_acl;
120 break;
121 default:
122 BUG();
123 }
124 acl = ACCESS_ONCE(*p);
125 if (acl)
126 return 0;
127 return 1;
128} 120}
129 121
130static inline void set_cached_acl(struct inode *inode, 122static inline void set_cached_acl(struct inode *inode,
131 int type, 123 int type,
132 struct posix_acl *acl) 124 struct posix_acl *acl)
133{ 125{
134 struct posix_acl *old = NULL; 126 struct posix_acl **p = acl_by_type(inode, type);
127 struct posix_acl *old;
135 spin_lock(&inode->i_lock); 128 spin_lock(&inode->i_lock);
136 switch (type) { 129 old = *p;
137 case ACL_TYPE_ACCESS: 130 rcu_assign_pointer(*p, posix_acl_dup(acl));
138 old = inode->i_acl;
139 inode->i_acl = posix_acl_dup(acl);
140 break;
141 case ACL_TYPE_DEFAULT:
142 old = inode->i_default_acl;
143 inode->i_default_acl = posix_acl_dup(acl);
144 break;
145 }
146 spin_unlock(&inode->i_lock); 131 spin_unlock(&inode->i_lock);
147 if (old != ACL_NOT_CACHED) 132 if (old != ACL_NOT_CACHED)
148 posix_acl_release(old); 133 posix_acl_release(old);
@@ -150,18 +135,11 @@ static inline void set_cached_acl(struct inode *inode,
150 135
151static inline void forget_cached_acl(struct inode *inode, int type) 136static inline void forget_cached_acl(struct inode *inode, int type)
152{ 137{
153 struct posix_acl *old = NULL; 138 struct posix_acl **p = acl_by_type(inode, type);
139 struct posix_acl *old;
154 spin_lock(&inode->i_lock); 140 spin_lock(&inode->i_lock);
155 switch (type) { 141 old = *p;
156 case ACL_TYPE_ACCESS: 142 *p = ACL_NOT_CACHED;
157 old = inode->i_acl;
158 inode->i_acl = ACL_NOT_CACHED;
159 break;
160 case ACL_TYPE_DEFAULT:
161 old = inode->i_default_acl;
162 inode->i_default_acl = ACL_NOT_CACHED;
163 break;
164 }
165 spin_unlock(&inode->i_lock); 143 spin_unlock(&inode->i_lock);
166 if (old != ACL_NOT_CACHED) 144 if (old != ACL_NOT_CACHED)
167 posix_acl_release(old); 145 posix_acl_release(old);
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 2455ef2683f..cc03bbf5c4b 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -38,9 +38,12 @@ struct pstore_info {
38 int (*open)(struct pstore_info *psi); 38 int (*open)(struct pstore_info *psi);
39 int (*close)(struct pstore_info *psi); 39 int (*close)(struct pstore_info *psi);
40 ssize_t (*read)(u64 *id, enum pstore_type_id *type, 40 ssize_t (*read)(u64 *id, enum pstore_type_id *type,
41 struct timespec *time); 41 struct timespec *time, struct pstore_info *psi);
42 u64 (*write)(enum pstore_type_id type, size_t size); 42 u64 (*write)(enum pstore_type_id type, unsigned int part,
43 int (*erase)(u64 id); 43 size_t size, struct pstore_info *psi);
44 int (*erase)(enum pstore_type_id type, u64 id,
45 struct pstore_info *psi);
46 void *data;
44}; 47};
45 48
46#ifdef CONFIG_PSTORE 49#ifdef CONFIG_PSTORE
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 23241c2fecc..9d4539c52e5 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -39,7 +39,15 @@
39 * when it is shrunk, before we rcu free the node. See shrink code for 39 * when it is shrunk, before we rcu free the node. See shrink code for
40 * details. 40 * details.
41 */ 41 */
42#define RADIX_TREE_INDIRECT_PTR 1 42#define RADIX_TREE_INDIRECT_PTR 1
43/*
44 * A common use of the radix tree is to store pointers to struct pages;
45 * but shmem/tmpfs needs also to store swap entries in the same tree:
46 * those are marked as exceptional entries to distinguish them.
47 * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
48 */
49#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
50#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
43 51
44#define radix_tree_indirect_to_ptr(ptr) \ 52#define radix_tree_indirect_to_ptr(ptr) \
45 radix_tree_indirect_to_ptr((void __force *)(ptr)) 53 radix_tree_indirect_to_ptr((void __force *)(ptr))
@@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg)
174} 182}
175 183
176/** 184/**
185 * radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry?
186 * @arg: value returned by radix_tree_deref_slot
187 * Returns: 0 if well-aligned pointer, non-0 if exceptional entry.
188 */
189static inline int radix_tree_exceptional_entry(void *arg)
190{
191 /* Not unlikely because radix_tree_exception often tested first */
192 return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
193}
194
195/**
196 * radix_tree_exception - radix_tree_deref_slot returned either exception?
197 * @arg: value returned by radix_tree_deref_slot
198 * Returns: 0 if well-aligned pointer, non-0 if either kind of exception.
199 */
200static inline int radix_tree_exception(void *arg)
201{
202 return unlikely((unsigned long)arg &
203 (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
204}
205
206/**
177 * radix_tree_replace_slot - replace item in a slot 207 * radix_tree_replace_slot - replace item in a slot
178 * @pslot: pointer to slot, returned by radix_tree_lookup_slot 208 * @pslot: pointer to slot, returned by radix_tree_lookup_slot
179 * @item: new item to store in the slot. 209 * @item: new item to store in the slot.
@@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
194unsigned int 224unsigned int
195radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 225radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
196 unsigned long first_index, unsigned int max_items); 226 unsigned long first_index, unsigned int max_items);
197unsigned int 227unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
198radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 228 void ***results, unsigned long *indices,
199 unsigned long first_index, unsigned int max_items); 229 unsigned long first_index, unsigned int max_items);
200unsigned long radix_tree_next_hole(struct radix_tree_root *root, 230unsigned long radix_tree_next_hole(struct radix_tree_root *root,
201 unsigned long index, unsigned long max_scan); 231 unsigned long index, unsigned long max_scan);
@@ -222,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
222 unsigned long nr_to_tag, 252 unsigned long nr_to_tag,
223 unsigned int fromtag, unsigned int totag); 253 unsigned int fromtag, unsigned int totag);
224int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); 254int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
255unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
225 256
226static inline void radix_tree_preload_end(void) 257static inline void radix_tree_preload_end(void)
227{ 258{
diff --git a/include/linux/random.h b/include/linux/random.h
index ce29a040c8d..d13059f3ea3 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -57,18 +57,6 @@ extern void add_interrupt_randomness(int irq);
57extern void get_random_bytes(void *buf, int nbytes); 57extern void get_random_bytes(void *buf, int nbytes);
58void generate_random_uuid(unsigned char uuid_out[16]); 58void generate_random_uuid(unsigned char uuid_out[16]);
59 59
60extern __u32 secure_ip_id(__be32 daddr);
61extern __u32 secure_ipv6_id(const __be32 daddr[4]);
62extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
63extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
64 __be16 dport);
65extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
66 __be16 sport, __be16 dport);
67extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
68 __be16 sport, __be16 dport);
69extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
70 __be16 sport, __be16 dport);
71
72#ifndef MODULE 60#ifndef MODULE
73extern const struct file_operations random_fops, urandom_fops; 61extern const struct file_operations random_fops, urandom_fops;
74#endif 62#endif
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 9e87c1cb727..26f6ea4444e 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -122,6 +122,9 @@ struct regulator;
122struct regulator_bulk_data { 122struct regulator_bulk_data {
123 const char *supply; 123 const char *supply;
124 struct regulator *consumer; 124 struct regulator *consumer;
125
126 /* Internal use */
127 int ret;
125}; 128};
126 129
127#if defined(CONFIG_REGULATOR) 130#if defined(CONFIG_REGULATOR)
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 6c433b89c80..1a80bc77517 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -188,18 +188,16 @@ struct regulator_dev {
188 188
189 /* lists we belong to */ 189 /* lists we belong to */
190 struct list_head list; /* list of all regulators */ 190 struct list_head list; /* list of all regulators */
191 struct list_head slist; /* list of supplied regulators */
192 191
193 /* lists we own */ 192 /* lists we own */
194 struct list_head consumer_list; /* consumers we supply */ 193 struct list_head consumer_list; /* consumers we supply */
195 struct list_head supply_list; /* regulators we supply */
196 194
197 struct blocking_notifier_head notifier; 195 struct blocking_notifier_head notifier;
198 struct mutex mutex; /* consumer lock */ 196 struct mutex mutex; /* consumer lock */
199 struct module *owner; 197 struct module *owner;
200 struct device dev; 198 struct device dev;
201 struct regulation_constraints *constraints; 199 struct regulation_constraints *constraints;
202 struct regulator_dev *supply; /* for tree */ 200 struct regulator *supply; /* for tree */
203 201
204 void *reg_data; /* regulator_dev data */ 202 void *reg_data; /* regulator_dev data */
205 203
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index aa08fa8fd79..9291ac3cc62 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -8,22 +8,15 @@
8 8
9/* inode in-kernel data */ 9/* inode in-kernel data */
10 10
11#define SHMEM_NR_DIRECT 16
12
13#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
14
15struct shmem_inode_info { 11struct shmem_inode_info {
16 spinlock_t lock; 12 spinlock_t lock;
17 unsigned long flags; 13 unsigned long flags;
18 unsigned long alloced; /* data pages alloced to file */ 14 unsigned long alloced; /* data pages alloced to file */
19 unsigned long swapped; /* subtotal assigned to swap */
20 unsigned long next_index; /* highest alloced index + 1 */
21 struct shared_policy policy; /* NUMA memory alloc policy */
22 struct page *i_indirect; /* top indirect blocks page */
23 union { 15 union {
24 swp_entry_t i_direct[SHMEM_NR_DIRECT]; /* first blocks */ 16 unsigned long swapped; /* subtotal assigned to swap */
25 char inline_symlink[SHMEM_SYMLINK_INLINE_LEN]; 17 char *symlink; /* unswappable short symlink */
26 }; 18 };
19 struct shared_policy policy; /* NUMA memory alloc policy */
27 struct list_head swaplist; /* chain of maybes on swap */ 20 struct list_head swaplist; /* chain of maybes on swap */
28 struct list_head xattr_list; /* list of shmem_xattr */ 21 struct list_head xattr_list; /* list of shmem_xattr */
29 struct inode vfs_inode; 22 struct inode vfs_inode;
@@ -49,7 +42,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
49/* 42/*
50 * Functions in mm/shmem.c called directly from elsewhere: 43 * Functions in mm/shmem.c called directly from elsewhere:
51 */ 44 */
52extern int init_tmpfs(void); 45extern int shmem_init(void);
53extern int shmem_fill_super(struct super_block *sb, void *data, int silent); 46extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
54extern struct file *shmem_file_setup(const char *name, 47extern struct file *shmem_file_setup(const char *name,
55 loff_t size, unsigned long flags); 48 loff_t size, unsigned long flags);
@@ -59,8 +52,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
59 pgoff_t index, gfp_t gfp_mask); 52 pgoff_t index, gfp_t gfp_mask);
60extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); 53extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
61extern int shmem_unuse(swp_entry_t entry, struct page *page); 54extern int shmem_unuse(swp_entry_t entry, struct page *page);
62extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
63 struct page **pagep, swp_entry_t *ent);
64 55
65static inline struct page *shmem_read_mapping_page( 56static inline struct page *shmem_read_mapping_page(
66 struct address_space *mapping, pgoff_t index) 57 struct address_space *mapping, pgoff_t index)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cd42e30b7c6..2189d3ffc85 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -1,3 +1,8 @@
1#ifndef _LINUX_SWAPOPS_H
2#define _LINUX_SWAPOPS_H
3
4#include <linux/radix-tree.h>
5
1/* 6/*
2 * swapcache pages are stored in the swapper_space radix tree. We want to 7 * swapcache pages are stored in the swapper_space radix tree. We want to
3 * get good packing density in that tree, so the index should be dense in 8 * get good packing density in that tree, so the index should be dense in
@@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
76 return __swp_entry_to_pte(arch_entry); 81 return __swp_entry_to_pte(arch_entry);
77} 82}
78 83
84static inline swp_entry_t radix_to_swp_entry(void *arg)
85{
86 swp_entry_t entry;
87
88 entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
89 return entry;
90}
91
92static inline void *swp_to_radix_entry(swp_entry_t entry)
93{
94 unsigned long value;
95
96 value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
97 return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
98}
99
79#ifdef CONFIG_MIGRATION 100#ifdef CONFIG_MIGRATION
80static inline swp_entry_t make_migration_entry(struct page *page, int write) 101static inline swp_entry_t make_migration_entry(struct page *page, int write)
81{ 102{
@@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry)
169 return 0; 190 return 0;
170} 191}
171#endif 192#endif
193
194#endif /* _LINUX_SWAPOPS_H */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d3ec89fb412..47b4a27e6e9 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -85,22 +85,6 @@ struct thermal_cooling_device {
85 ((long)t-2732+5)/10 : ((long)t-2732-5)/10) 85 ((long)t-2732+5)/10 : ((long)t-2732-5)/10)
86#define CELSIUS_TO_KELVIN(t) ((t)*10+2732) 86#define CELSIUS_TO_KELVIN(t) ((t)*10+2732)
87 87
88#if defined(CONFIG_THERMAL_HWMON)
89/* thermal zone devices with the same type share one hwmon device */
90struct thermal_hwmon_device {
91 char type[THERMAL_NAME_LENGTH];
92 struct device *device;
93 int count;
94 struct list_head tz_list;
95 struct list_head node;
96};
97
98struct thermal_hwmon_attr {
99 struct device_attribute attr;
100 char name[16];
101};
102#endif
103
104struct thermal_zone_device { 88struct thermal_zone_device {
105 int id; 89 int id;
106 char type[THERMAL_NAME_LENGTH]; 90 char type[THERMAL_NAME_LENGTH];
@@ -120,12 +104,6 @@ struct thermal_zone_device {
120 struct mutex lock; /* protect cooling devices list */ 104 struct mutex lock; /* protect cooling devices list */
121 struct list_head node; 105 struct list_head node;
122 struct delayed_work poll_queue; 106 struct delayed_work poll_queue;
123#if defined(CONFIG_THERMAL_HWMON)
124 struct list_head hwmon_node;
125 struct thermal_hwmon_device *hwmon;
126 struct thermal_hwmon_attr temp_input; /* hwmon sys attr */
127 struct thermal_hwmon_attr temp_crit; /* hwmon sys attr */
128#endif
129}; 107};
130/* Adding event notification support elements */ 108/* Adding event notification support elements */
131#define THERMAL_GENL_FAMILY_NAME "thermal_event" 109#define THERMAL_GENL_FAMILY_NAME "thermal_event"
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 3b938743514..9808877c2ab 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -8,7 +8,7 @@
8 * have chosen to adopt the protocol and over the years it has become a 8 * have chosen to adopt the protocol and over the years it has become a
9 * de-facto standard for labeled networking. 9 * de-facto standard for labeled networking.
10 * 10 *
11 * Author: Paul Moore <paul.moore@hp.com> 11 * Author: Paul Moore <paul@paul-moore.com>
12 * 12 *
13 */ 13 */
14 14
diff --git a/include/net/dst.h b/include/net/dst.h
index 29e255796ce..13d507d69dd 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -37,7 +37,7 @@ struct dst_entry {
37 unsigned long _metrics; 37 unsigned long _metrics;
38 unsigned long expires; 38 unsigned long expires;
39 struct dst_entry *path; 39 struct dst_entry *path;
40 struct neighbour *_neighbour; 40 struct neighbour __rcu *_neighbour;
41#ifdef CONFIG_XFRM 41#ifdef CONFIG_XFRM
42 struct xfrm_state *xfrm; 42 struct xfrm_state *xfrm;
43#else 43#else
@@ -88,12 +88,17 @@ struct dst_entry {
88 88
89static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst) 89static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst)
90{ 90{
91 return dst->_neighbour; 91 return rcu_dereference(dst->_neighbour);
92}
93
94static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst)
95{
96 return rcu_dereference_raw(dst->_neighbour);
92} 97}
93 98
94static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) 99static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh)
95{ 100{
96 dst->_neighbour = neigh; 101 rcu_assign_pointer(dst->_neighbour, neigh);
97} 102}
98 103
99extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); 104extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
@@ -382,8 +387,12 @@ static inline void dst_rcu_free(struct rcu_head *head)
382static inline void dst_confirm(struct dst_entry *dst) 387static inline void dst_confirm(struct dst_entry *dst)
383{ 388{
384 if (dst) { 389 if (dst) {
385 struct neighbour *n = dst_get_neighbour(dst); 390 struct neighbour *n;
391
392 rcu_read_lock();
393 n = dst_get_neighbour(dst);
386 neigh_confirm(n); 394 neigh_confirm(n);
395 rcu_read_unlock();
387 } 396 }
388} 397}
389 398
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index f21a16ee370..f67440970d7 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -4,7 +4,7 @@
4 * The NetLabel system manages static and dynamic label mappings for network 4 * The NetLabel system manages static and dynamic label mappings for network
5 * protocols such as CIPSO and RIPSO. 5 * protocols such as CIPSO and RIPSO.
6 * 6 *
7 * Author: Paul Moore <paul.moore@hp.com> 7 * Author: Paul Moore <paul@paul-moore.com>
8 * 8 *
9 */ 9 */
10 10
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
new file mode 100644
index 00000000000..d97f6892c01
--- /dev/null
+++ b/include/net/secure_seq.h
@@ -0,0 +1,20 @@
1#ifndef _NET_SECURE_SEQ
2#define _NET_SECURE_SEQ
3
4#include <linux/types.h>
5
6extern __u32 secure_ip_id(__be32 daddr);
7extern __u32 secure_ipv6_id(const __be32 daddr[4]);
8extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
9extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
10 __be16 dport);
11extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
12 __be16 sport, __be16 dport);
13extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
14 __be16 sport, __be16 dport);
15extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
16 __be16 sport, __be16 dport);
17extern u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
18 __be16 sport, __be16 dport);
19
20#endif /* _NET_SECURE_SEQ */
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
new file mode 100644
index 00000000000..c5c5e008e6d
--- /dev/null
+++ b/include/scsi/osd_ore.h
@@ -0,0 +1,125 @@
1/*
2 * Copyright (C) 2011
3 * Boaz Harrosh <bharrosh@panasas.com>
4 *
5 * Public Declarations of the ORE API
6 *
7 * This file is part of the ORE (Object Raid Engine) library.
8 *
9 * ORE is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as published
11 * by the Free Software Foundation. (GPL v2)
12 *
13 * ORE is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with the ORE; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22#ifndef __ORE_H__
23#define __ORE_H__
24
25#include <scsi/osd_initiator.h>
26#include <scsi/osd_attributes.h>
27#include <scsi/osd_sec.h>
28#include <linux/pnfs_osd_xdr.h>
29
30struct ore_comp {
31 struct osd_obj_id obj;
32 u8 cred[OSD_CAP_LEN];
33};
34
35struct ore_layout {
36 /* Our way of looking at the data_map */
37 unsigned stripe_unit;
38 unsigned mirrors_p1;
39
40 unsigned group_width;
41 u64 group_depth;
42 unsigned group_count;
43};
44
45struct ore_components {
46 unsigned numdevs; /* Num of devices in array */
47 /* If @single_comp == EC_SINGLE_COMP, @comps points to a single
48 * component. else there are @numdevs components
49 */
50 enum EC_COMP_USAGE {
51 EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff
52 } single_comp;
53 struct ore_comp *comps;
54 struct osd_dev **ods; /* osd_dev array */
55};
56
57struct ore_io_state;
58typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private);
59
60struct ore_io_state {
61 struct kref kref;
62
63 void *private;
64 ore_io_done_fn done;
65
66 struct ore_layout *layout;
67 struct ore_components *comps;
68
69 /* Global read/write IO*/
70 loff_t offset;
71 unsigned long length;
72 void *kern_buff;
73
74 struct page **pages;
75 unsigned nr_pages;
76 unsigned pgbase;
77 unsigned pages_consumed;
78
79 /* Attributes */
80 unsigned in_attr_len;
81 struct osd_attr *in_attr;
82 unsigned out_attr_len;
83 struct osd_attr *out_attr;
84
85 bool reading;
86
87 /* Variable array of size numdevs */
88 unsigned numdevs;
89 struct ore_per_dev_state {
90 struct osd_request *or;
91 struct bio *bio;
92 loff_t offset;
93 unsigned length;
94 unsigned dev;
95 } per_dev[];
96};
97
98static inline unsigned ore_io_state_size(unsigned numdevs)
99{
100 return sizeof(struct ore_io_state) +
101 sizeof(struct ore_per_dev_state) * numdevs;
102}
103
104/* ore.c */
105int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
106 bool is_reading, u64 offset, u64 length,
107 struct ore_io_state **ios);
108int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
109 struct ore_io_state **ios);
110void ore_put_io_state(struct ore_io_state *ios);
111
112int ore_check_io(struct ore_io_state *ios, u64 *resid);
113
114int ore_create(struct ore_io_state *ios);
115int ore_remove(struct ore_io_state *ios);
116int ore_write(struct ore_io_state *ios);
117int ore_read(struct ore_io_state *ios);
118int ore_truncate(struct ore_layout *layout, struct ore_components *comps,
119 u64 size);
120
121int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr);
122
123extern const struct osd_attr g_attr_logical_length;
124
125#endif
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6363193a341..b50a5473624 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -23,7 +23,7 @@ TRACE_EVENT(ext4_free_inode,
23 TP_STRUCT__entry( 23 TP_STRUCT__entry(
24 __field( dev_t, dev ) 24 __field( dev_t, dev )
25 __field( ino_t, ino ) 25 __field( ino_t, ino )
26 __field( umode_t, mode ) 26 __field( __u16, mode )
27 __field( uid_t, uid ) 27 __field( uid_t, uid )
28 __field( gid_t, gid ) 28 __field( gid_t, gid )
29 __field( __u64, blocks ) 29 __field( __u64, blocks )
@@ -52,7 +52,7 @@ TRACE_EVENT(ext4_request_inode,
52 TP_STRUCT__entry( 52 TP_STRUCT__entry(
53 __field( dev_t, dev ) 53 __field( dev_t, dev )
54 __field( ino_t, dir ) 54 __field( ino_t, dir )
55 __field( umode_t, mode ) 55 __field( __u16, mode )
56 ), 56 ),
57 57
58 TP_fast_assign( 58 TP_fast_assign(
@@ -75,7 +75,7 @@ TRACE_EVENT(ext4_allocate_inode,
75 __field( dev_t, dev ) 75 __field( dev_t, dev )
76 __field( ino_t, ino ) 76 __field( ino_t, ino )
77 __field( ino_t, dir ) 77 __field( ino_t, dir )
78 __field( umode_t, mode ) 78 __field( __u16, mode )
79 ), 79 ),
80 80
81 TP_fast_assign( 81 TP_fast_assign(
@@ -725,7 +725,7 @@ TRACE_EVENT(ext4_free_blocks,
725 TP_STRUCT__entry( 725 TP_STRUCT__entry(
726 __field( dev_t, dev ) 726 __field( dev_t, dev )
727 __field( ino_t, ino ) 727 __field( ino_t, ino )
728 __field( umode_t, mode ) 728 __field( __u16, mode )
729 __field( __u64, block ) 729 __field( __u64, block )
730 __field( unsigned long, count ) 730 __field( unsigned long, count )
731 __field( int, flags ) 731 __field( int, flags )
@@ -1012,7 +1012,7 @@ TRACE_EVENT(ext4_forget,
1012 TP_STRUCT__entry( 1012 TP_STRUCT__entry(
1013 __field( dev_t, dev ) 1013 __field( dev_t, dev )
1014 __field( ino_t, ino ) 1014 __field( ino_t, ino )
1015 __field( umode_t, mode ) 1015 __field( __u16, mode )
1016 __field( int, is_metadata ) 1016 __field( int, is_metadata )
1017 __field( __u64, block ) 1017 __field( __u64, block )
1018 ), 1018 ),
@@ -1039,7 +1039,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
1039 TP_STRUCT__entry( 1039 TP_STRUCT__entry(
1040 __field( dev_t, dev ) 1040 __field( dev_t, dev )
1041 __field( ino_t, ino ) 1041 __field( ino_t, ino )
1042 __field( umode_t, mode ) 1042 __field( __u16, mode )
1043 __field( __u64, i_blocks ) 1043 __field( __u64, i_blocks )
1044 __field( int, used_blocks ) 1044 __field( int, used_blocks )
1045 __field( int, reserved_data_blocks ) 1045 __field( int, reserved_data_blocks )
@@ -1076,7 +1076,7 @@ TRACE_EVENT(ext4_da_reserve_space,
1076 TP_STRUCT__entry( 1076 TP_STRUCT__entry(
1077 __field( dev_t, dev ) 1077 __field( dev_t, dev )
1078 __field( ino_t, ino ) 1078 __field( ino_t, ino )
1079 __field( umode_t, mode ) 1079 __field( __u16, mode )
1080 __field( __u64, i_blocks ) 1080 __field( __u64, i_blocks )
1081 __field( int, md_needed ) 1081 __field( int, md_needed )
1082 __field( int, reserved_data_blocks ) 1082 __field( int, reserved_data_blocks )
@@ -1110,7 +1110,7 @@ TRACE_EVENT(ext4_da_release_space,
1110 TP_STRUCT__entry( 1110 TP_STRUCT__entry(
1111 __field( dev_t, dev ) 1111 __field( dev_t, dev )
1112 __field( ino_t, ino ) 1112 __field( ino_t, ino )
1113 __field( umode_t, mode ) 1113 __field( __u16, mode )
1114 __field( __u64, i_blocks ) 1114 __field( __u64, i_blocks )
1115 __field( int, freed_blocks ) 1115 __field( int, freed_blocks )
1116 __field( int, reserved_data_blocks ) 1116 __field( int, reserved_data_blocks )
@@ -1518,6 +1518,77 @@ TRACE_EVENT(ext4_load_inode,
1518 (unsigned long) __entry->ino) 1518 (unsigned long) __entry->ino)
1519); 1519);
1520 1520
1521TRACE_EVENT(ext4_journal_start,
1522 TP_PROTO(struct super_block *sb, int nblocks, unsigned long IP),
1523
1524 TP_ARGS(sb, nblocks, IP),
1525
1526 TP_STRUCT__entry(
1527 __field( dev_t, dev )
1528 __field( int, nblocks )
1529 __field(unsigned long, ip )
1530 ),
1531
1532 TP_fast_assign(
1533 __entry->dev = sb->s_dev;
1534 __entry->nblocks = nblocks;
1535 __entry->ip = IP;
1536 ),
1537
1538 TP_printk("dev %d,%d nblocks %d caller %pF",
1539 MAJOR(__entry->dev), MINOR(__entry->dev),
1540 __entry->nblocks, (void *)__entry->ip)
1541);
1542
1543DECLARE_EVENT_CLASS(ext4__trim,
1544 TP_PROTO(struct super_block *sb,
1545 ext4_group_t group,
1546 ext4_grpblk_t start,
1547 ext4_grpblk_t len),
1548
1549 TP_ARGS(sb, group, start, len),
1550
1551 TP_STRUCT__entry(
1552 __field( int, dev_major )
1553 __field( int, dev_minor )
1554 __field( __u32, group )
1555 __field( int, start )
1556 __field( int, len )
1557 ),
1558
1559 TP_fast_assign(
1560 __entry->dev_major = MAJOR(sb->s_dev);
1561 __entry->dev_minor = MINOR(sb->s_dev);
1562 __entry->group = group;
1563 __entry->start = start;
1564 __entry->len = len;
1565 ),
1566
1567 TP_printk("dev %d,%d group %u, start %d, len %d",
1568 __entry->dev_major, __entry->dev_minor,
1569 __entry->group, __entry->start, __entry->len)
1570);
1571
1572DEFINE_EVENT(ext4__trim, ext4_trim_extent,
1573
1574 TP_PROTO(struct super_block *sb,
1575 ext4_group_t group,
1576 ext4_grpblk_t start,
1577 ext4_grpblk_t len),
1578
1579 TP_ARGS(sb, group, start, len)
1580);
1581
1582DEFINE_EVENT(ext4__trim, ext4_trim_all_free,
1583
1584 TP_PROTO(struct super_block *sb,
1585 ext4_group_t group,
1586 ext4_grpblk_t start,
1587 ext4_grpblk_t len),
1588
1589 TP_ARGS(sb, group, start, len)
1590);
1591
1521#endif /* _TRACE_EXT4_H */ 1592#endif /* _TRACE_EXT4_H */
1522 1593
1523/* This part must be outside protection */ 1594/* This part must be outside protection */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index bf16545cc97..75964412ddb 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -26,8 +26,8 @@ TRACE_EVENT(jbd2_checkpoint,
26 __entry->result = result; 26 __entry->result = result;
27 ), 27 ),
28 28
29 TP_printk("dev %s result %d", 29 TP_printk("dev %d,%d result %d",
30 jbd2_dev_to_name(__entry->dev), __entry->result) 30 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->result)
31); 31);
32 32
33DECLARE_EVENT_CLASS(jbd2_commit, 33DECLARE_EVENT_CLASS(jbd2_commit,
@@ -48,9 +48,9 @@ DECLARE_EVENT_CLASS(jbd2_commit,
48 __entry->transaction = commit_transaction->t_tid; 48 __entry->transaction = commit_transaction->t_tid;
49 ), 49 ),
50 50
51 TP_printk("dev %s transaction %d sync %d", 51 TP_printk("dev %d,%d transaction %d sync %d",
52 jbd2_dev_to_name(__entry->dev), __entry->transaction, 52 MAJOR(__entry->dev), MINOR(__entry->dev),
53 __entry->sync_commit) 53 __entry->transaction, __entry->sync_commit)
54); 54);
55 55
56DEFINE_EVENT(jbd2_commit, jbd2_start_commit, 56DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -100,9 +100,9 @@ TRACE_EVENT(jbd2_end_commit,
100 __entry->head = journal->j_tail_sequence; 100 __entry->head = journal->j_tail_sequence;
101 ), 101 ),
102 102
103 TP_printk("dev %s transaction %d sync %d head %d", 103 TP_printk("dev %d,%d transaction %d sync %d head %d",
104 jbd2_dev_to_name(__entry->dev), __entry->transaction, 104 MAJOR(__entry->dev), MINOR(__entry->dev),
105 __entry->sync_commit, __entry->head) 105 __entry->transaction, __entry->sync_commit, __entry->head)
106); 106);
107 107
108TRACE_EVENT(jbd2_submit_inode_data, 108TRACE_EVENT(jbd2_submit_inode_data,
@@ -120,8 +120,9 @@ TRACE_EVENT(jbd2_submit_inode_data,
120 __entry->ino = inode->i_ino; 120 __entry->ino = inode->i_ino;
121 ), 121 ),
122 122
123 TP_printk("dev %s ino %lu", 123 TP_printk("dev %d,%d ino %lu",
124 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) 124 MAJOR(__entry->dev), MINOR(__entry->dev),
125 (unsigned long) __entry->ino)
125); 126);
126 127
127TRACE_EVENT(jbd2_run_stats, 128TRACE_EVENT(jbd2_run_stats,
@@ -156,9 +157,9 @@ TRACE_EVENT(jbd2_run_stats,
156 __entry->blocks_logged = stats->rs_blocks_logged; 157 __entry->blocks_logged = stats->rs_blocks_logged;
157 ), 158 ),
158 159
159 TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u " 160 TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
160 "logging %u handle_count %u blocks %u blocks_logged %u", 161 "logging %u handle_count %u blocks %u blocks_logged %u",
161 jbd2_dev_to_name(__entry->dev), __entry->tid, 162 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
162 jiffies_to_msecs(__entry->wait), 163 jiffies_to_msecs(__entry->wait),
163 jiffies_to_msecs(__entry->running), 164 jiffies_to_msecs(__entry->running),
164 jiffies_to_msecs(__entry->locked), 165 jiffies_to_msecs(__entry->locked),
@@ -192,9 +193,9 @@ TRACE_EVENT(jbd2_checkpoint_stats,
192 __entry->dropped = stats->cs_dropped; 193 __entry->dropped = stats->cs_dropped;
193 ), 194 ),
194 195
195 TP_printk("dev %s tid %lu chp_time %u forced_to_close %u " 196 TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
196 "written %u dropped %u", 197 "written %u dropped %u",
197 jbd2_dev_to_name(__entry->dev), __entry->tid, 198 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
198 jiffies_to_msecs(__entry->chp_time), 199 jiffies_to_msecs(__entry->chp_time),
199 __entry->forced_to_close, __entry->written, __entry->dropped) 200 __entry->forced_to_close, __entry->written, __entry->dropped)
200); 201);
@@ -222,9 +223,10 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
222 __entry->freed = freed; 223 __entry->freed = freed;
223 ), 224 ),
224 225
225 TP_printk("dev %s from %u to %u offset %lu freed %lu", 226 TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
226 jbd2_dev_to_name(__entry->dev), __entry->tail_sequence, 227 MAJOR(__entry->dev), MINOR(__entry->dev),
227 __entry->first_tid, __entry->block_nr, __entry->freed) 228 __entry->tail_sequence, __entry->first_tid,
229 __entry->block_nr, __entry->freed)
228); 230);
229 231
230#endif /* _TRACE_JBD2_H */ 232#endif /* _TRACE_JBD2_H */
diff --git a/init/main.c b/init/main.c
index d7211faed2a..9c51ee7adf3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -369,9 +369,12 @@ static noinline void __init_refok rest_init(void)
369 init_idle_bootup_task(current); 369 init_idle_bootup_task(current);
370 preempt_enable_no_resched(); 370 preempt_enable_no_resched();
371 schedule(); 371 schedule();
372 preempt_disable(); 372
373 /* At this point, we can enable user mode helper functionality */
374 usermodehelper_enable();
373 375
374 /* Call into cpu_idle with preempt disabled */ 376 /* Call into cpu_idle with preempt disabled */
377 preempt_disable();
375 cpu_idle(); 378 cpu_idle();
376} 379}
377 380
@@ -715,7 +718,7 @@ static void __init do_basic_setup(void)
715{ 718{
716 cpuset_init_smp(); 719 cpuset_init_smp();
717 usermodehelper_init(); 720 usermodehelper_init();
718 init_tmpfs(); 721 shmem_init();
719 driver_init(); 722 driver_init();
720 init_irq_proc(); 723 init_irq_proc();
721 do_ctors(); 724 do_ctors();
diff --git a/ipc/shm.c b/ipc/shm.c
index 9fb044f3b34..02ecf2c078f 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -105,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns)
105} 105}
106#endif 106#endif
107 107
108void __init shm_init (void) 108static int __init ipc_ns_init(void)
109{ 109{
110 shm_init_ns(&init_ipc_ns); 110 shm_init_ns(&init_ipc_ns);
111 return 0;
112}
113
114pure_initcall(ipc_ns_init);
115
116void __init shm_init (void)
117{
111 ipc_init_proc_interface("sysvipc/shm", 118 ipc_init_proc_interface("sysvipc/shm",
112#if BITS_PER_LONG <= 32 119#if BITS_PER_LONG <= 32
113 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", 120 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
@@ -294,7 +301,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
294void shm_destroy_orphaned(struct ipc_namespace *ns) 301void shm_destroy_orphaned(struct ipc_namespace *ns)
295{ 302{
296 down_write(&shm_ids(ns).rw_mutex); 303 down_write(&shm_ids(ns).rw_mutex);
297 if (&shm_ids(ns).in_use) 304 if (shm_ids(ns).in_use)
298 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 305 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
299 up_write(&shm_ids(ns).rw_mutex); 306 up_write(&shm_ids(ns).rw_mutex);
300} 307}
@@ -304,9 +311,12 @@ void exit_shm(struct task_struct *task)
304{ 311{
305 struct ipc_namespace *ns = task->nsproxy->ipc_ns; 312 struct ipc_namespace *ns = task->nsproxy->ipc_ns;
306 313
314 if (shm_ids(ns).in_use == 0)
315 return;
316
307 /* Destroy all already created segments, but not mapped yet */ 317 /* Destroy all already created segments, but not mapped yet */
308 down_write(&shm_ids(ns).rw_mutex); 318 down_write(&shm_ids(ns).rw_mutex);
309 if (&shm_ids(ns).in_use) 319 if (shm_ids(ns).in_use)
310 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); 320 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
311 up_write(&shm_ids(ns).rw_mutex); 321 up_write(&shm_ids(ns).rw_mutex);
312} 322}
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index a11db956dd6..34872482315 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -42,6 +42,8 @@
42/* Our I/O buffers. */ 42/* Our I/O buffers. */
43static char remcom_in_buffer[BUFMAX]; 43static char remcom_in_buffer[BUFMAX];
44static char remcom_out_buffer[BUFMAX]; 44static char remcom_out_buffer[BUFMAX];
45static int gdbstub_use_prev_in_buf;
46static int gdbstub_prev_in_buf_pos;
45 47
46/* Storage for the registers, in GDB format. */ 48/* Storage for the registers, in GDB format. */
47static unsigned long gdb_regs[(NUMREGBYTES + 49static unsigned long gdb_regs[(NUMREGBYTES +
@@ -58,6 +60,13 @@ static int gdbstub_read_wait(void)
58 int ret = -1; 60 int ret = -1;
59 int i; 61 int i;
60 62
63 if (unlikely(gdbstub_use_prev_in_buf)) {
64 if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf)
65 return remcom_in_buffer[gdbstub_prev_in_buf_pos++];
66 else
67 gdbstub_use_prev_in_buf = 0;
68 }
69
61 /* poll any additional I/O interfaces that are defined */ 70 /* poll any additional I/O interfaces that are defined */
62 while (ret < 0) 71 while (ret < 0)
63 for (i = 0; kdb_poll_funcs[i] != NULL; i++) { 72 for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
@@ -109,7 +118,6 @@ static void get_packet(char *buffer)
109 buffer[count] = ch; 118 buffer[count] = ch;
110 count = count + 1; 119 count = count + 1;
111 } 120 }
112 buffer[count] = 0;
113 121
114 if (ch == '#') { 122 if (ch == '#') {
115 xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4; 123 xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4;
@@ -124,6 +132,7 @@ static void get_packet(char *buffer)
124 if (dbg_io_ops->flush) 132 if (dbg_io_ops->flush)
125 dbg_io_ops->flush(); 133 dbg_io_ops->flush();
126 } 134 }
135 buffer[count] = 0;
127 } while (checksum != xmitcsum); 136 } while (checksum != xmitcsum);
128} 137}
129 138
@@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
1082 case 'c': 1091 case 'c':
1083 strcpy(remcom_in_buffer, cmd); 1092 strcpy(remcom_in_buffer, cmd);
1084 return 0; 1093 return 0;
1085 case '?': 1094 case '$':
1086 gdb_cmd_status(ks); 1095 strcpy(remcom_in_buffer, cmd);
1087 break; 1096 gdbstub_use_prev_in_buf = strlen(remcom_in_buffer);
1088 case '\0': 1097 gdbstub_prev_in_buf_pos = 0;
1089 strcpy(remcom_out_buffer, ""); 1098 return 0;
1090 break;
1091 } 1099 }
1092 dbg_io_ops->write_char('+'); 1100 dbg_io_ops->write_char('+');
1093 put_packet(remcom_out_buffer); 1101 put_packet(remcom_out_buffer);
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 2f62fe85f16..7179eac7b41 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv)
112 unsigned long addr; 112 unsigned long addr;
113 long offset; 113 long offset;
114 114
115 kdbgetintenv("BTARGS", &argcount); /* Arguments to print */ 115 /* Prompt after each proc in bta */
116 kdbgetintenv("BTAPROMPT", &btaprompt); /* Prompt after each 116 kdbgetintenv("BTAPROMPT", &btaprompt);
117 * proc in bta */
118 117
119 if (strcmp(argv[0], "bta") == 0) { 118 if (strcmp(argv[0], "bta") == 0) {
120 struct task_struct *g, *p; 119 struct task_struct *g, *p;
diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds
index 56c88e4db30..9834ad303ab 100644
--- a/kernel/debug/kdb/kdb_cmds
+++ b/kernel/debug/kdb/kdb_cmds
@@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging"
18endefcmd 18endefcmd
19 19
20defcmd dumpall "" "First line debugging" 20defcmd dumpall "" "First line debugging"
21 set BTSYMARG 1
22 set BTARGS 9
23 pid R 21 pid R
24 -dumpcommon 22 -dumpcommon
25 -bta 23 -bta
26endefcmd 24endefcmd
27 25
28defcmd dumpcpu "" "Same as dumpall but only tasks on cpus" 26defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
29 set BTSYMARG 1
30 set BTARGS 9
31 pid R 27 pid R
32 -dumpcommon 28 -dumpcommon
33 -btc 29 -btc
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index dd0b1b7dd02..d9ca9aa481e 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs);
30int kdb_poll_idx = 1; 30int kdb_poll_idx = 1;
31EXPORT_SYMBOL_GPL(kdb_poll_idx); 31EXPORT_SYMBOL_GPL(kdb_poll_idx);
32 32
33static struct kgdb_state *kdb_ks;
34
33int kdb_stub(struct kgdb_state *ks) 35int kdb_stub(struct kgdb_state *ks)
34{ 36{
35 int error = 0; 37 int error = 0;
@@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks)
39 kdb_dbtrap_t db_result = KDB_DB_NOBPT; 41 kdb_dbtrap_t db_result = KDB_DB_NOBPT;
40 int i; 42 int i;
41 43
44 kdb_ks = ks;
42 if (KDB_STATE(REENTRY)) { 45 if (KDB_STATE(REENTRY)) {
43 reason = KDB_REASON_SWITCH; 46 reason = KDB_REASON_SWITCH;
44 KDB_STATE_CLEAR(REENTRY); 47 KDB_STATE_CLEAR(REENTRY);
@@ -123,20 +126,8 @@ int kdb_stub(struct kgdb_state *ks)
123 KDB_STATE_CLEAR(PAGER); 126 KDB_STATE_CLEAR(PAGER);
124 kdbnearsym_cleanup(); 127 kdbnearsym_cleanup();
125 if (error == KDB_CMD_KGDB) { 128 if (error == KDB_CMD_KGDB) {
126 if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) { 129 if (KDB_STATE(DOING_KGDB))
127 /*
128 * This inteface glue which allows kdb to transition in into
129 * the gdb stub. In order to do this the '?' or '' gdb serial
130 * packet response is processed here. And then control is
131 * passed to the gdbstub.
132 */
133 if (KDB_STATE(DOING_KGDB))
134 gdbstub_state(ks, "?");
135 else
136 gdbstub_state(ks, "");
137 KDB_STATE_CLEAR(DOING_KGDB); 130 KDB_STATE_CLEAR(DOING_KGDB);
138 KDB_STATE_CLEAR(DOING_KGDB2);
139 }
140 return DBG_PASS_EVENT; 131 return DBG_PASS_EVENT;
141 } 132 }
142 kdb_bp_install(ks->linux_regs); 133 kdb_bp_install(ks->linux_regs);
@@ -166,3 +157,7 @@ int kdb_stub(struct kgdb_state *ks)
166 return kgdb_info[ks->cpu].ret_state; 157 return kgdb_info[ks->cpu].ret_state;
167} 158}
168 159
160void kdb_gdb_state_pass(char *buf)
161{
162 gdbstub_state(kdb_ks, buf);
163}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 96fdaac46a8..4802eb5840e 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN];
31 31
32int kdb_trap_printk; 32int kdb_trap_printk;
33 33
34static void kgdb_transition_check(char *buffer) 34static int kgdb_transition_check(char *buffer)
35{ 35{
36 int slen = strlen(buffer); 36 if (buffer[0] != '+' && buffer[0] != '$') {
37 if (strncmp(buffer, "$?#3f", slen) != 0 &&
38 strncmp(buffer, "$qSupported#37", slen) != 0 &&
39 strncmp(buffer, "+$qSupported#37", slen) != 0) {
40 KDB_STATE_SET(KGDB_TRANS); 37 KDB_STATE_SET(KGDB_TRANS);
41 kdb_printf("%s", buffer); 38 kdb_printf("%s", buffer);
39 } else {
40 int slen = strlen(buffer);
41 if (slen > 3 && buffer[slen - 3] == '#') {
42 kdb_gdb_state_pass(buffer);
43 strcpy(buffer, "kgdb");
44 KDB_STATE_SET(DOING_KGDB);
45 return 1;
46 }
42 } 47 }
48 return 0;
43} 49}
44 50
45static int kdb_read_get_key(char *buffer, size_t bufsize) 51static int kdb_read_get_key(char *buffer, size_t bufsize)
@@ -251,6 +257,10 @@ poll_again:
251 case 13: /* enter */ 257 case 13: /* enter */
252 *lastchar++ = '\n'; 258 *lastchar++ = '\n';
253 *lastchar++ = '\0'; 259 *lastchar++ = '\0';
260 if (!KDB_STATE(KGDB_TRANS)) {
261 KDB_STATE_SET(KGDB_TRANS);
262 kdb_printf("%s", buffer);
263 }
254 kdb_printf("\n"); 264 kdb_printf("\n");
255 return buffer; 265 return buffer;
256 case 4: /* Del */ 266 case 4: /* Del */
@@ -382,22 +392,26 @@ poll_again:
382 * printed characters if we think that 392 * printed characters if we think that
383 * kgdb is connecting, until the check 393 * kgdb is connecting, until the check
384 * fails */ 394 * fails */
385 if (!KDB_STATE(KGDB_TRANS)) 395 if (!KDB_STATE(KGDB_TRANS)) {
386 kgdb_transition_check(buffer); 396 if (kgdb_transition_check(buffer))
387 else 397 return buffer;
398 } else {
388 kdb_printf("%c", key); 399 kdb_printf("%c", key);
400 }
389 } 401 }
390 /* Special escape to kgdb */ 402 /* Special escape to kgdb */
391 if (lastchar - buffer >= 5 && 403 if (lastchar - buffer >= 5 &&
392 strcmp(lastchar - 5, "$?#3f") == 0) { 404 strcmp(lastchar - 5, "$?#3f") == 0) {
405 kdb_gdb_state_pass(lastchar - 5);
393 strcpy(buffer, "kgdb"); 406 strcpy(buffer, "kgdb");
394 KDB_STATE_SET(DOING_KGDB); 407 KDB_STATE_SET(DOING_KGDB);
395 return buffer; 408 return buffer;
396 } 409 }
397 if (lastchar - buffer >= 14 && 410 if (lastchar - buffer >= 11 &&
398 strcmp(lastchar - 14, "$qSupported#37") == 0) { 411 strcmp(lastchar - 11, "$qSupported") == 0) {
412 kdb_gdb_state_pass(lastchar - 11);
399 strcpy(buffer, "kgdb"); 413 strcpy(buffer, "kgdb");
400 KDB_STATE_SET(DOING_KGDB2); 414 KDB_STATE_SET(DOING_KGDB);
401 return buffer; 415 return buffer;
402 } 416 }
403 } 417 }
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index be14779bcef..63786e71a3c 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -145,7 +145,6 @@ static char *__env[] = {
145#endif 145#endif
146 "RADIX=16", 146 "RADIX=16",
147 "MDCOUNT=8", /* lines of md output */ 147 "MDCOUNT=8", /* lines of md output */
148 "BTARGS=9", /* 9 possible args in bt */
149 KDB_PLATFORM_ENV, 148 KDB_PLATFORM_ENV,
150 "DTABCOUNT=30", 149 "DTABCOUNT=30",
151 "NOSECT=1", 150 "NOSECT=1",
@@ -172,6 +171,7 @@ static char *__env[] = {
172 (char *)0, 171 (char *)0,
173 (char *)0, 172 (char *)0,
174 (char *)0, 173 (char *)0,
174 (char *)0,
175}; 175};
176 176
177static const int __nenv = (sizeof(__env) / sizeof(char *)); 177static const int __nenv = (sizeof(__env) / sizeof(char *));
@@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
1386 } 1386 }
1387 1387
1388 if (result == KDB_CMD_KGDB) { 1388 if (result == KDB_CMD_KGDB) {
1389 if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2))) 1389 if (!KDB_STATE(DOING_KGDB))
1390 kdb_printf("Entering please attach debugger " 1390 kdb_printf("Entering please attach debugger "
1391 "or use $D#44+ or $3#33\n"); 1391 "or use $D#44+ or $3#33\n");
1392 break; 1392 break;
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 35d69ed1dfb..e381d105b40 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -21,7 +21,6 @@
21#define KDB_CMD_SS (-1003) 21#define KDB_CMD_SS (-1003)
22#define KDB_CMD_SSB (-1004) 22#define KDB_CMD_SSB (-1004)
23#define KDB_CMD_KGDB (-1005) 23#define KDB_CMD_KGDB (-1005)
24#define KDB_CMD_KGDB2 (-1006)
25 24
26/* Internal debug flags */ 25/* Internal debug flags */
27#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */ 26#define KDB_DEBUG_FLAG_BP 0x0002 /* Breakpoint subsystem debug */
@@ -146,7 +145,6 @@ extern int kdb_state;
146 * keyboard on this cpu */ 145 * keyboard on this cpu */
147#define KDB_STATE_KEXEC 0x00040000 /* kexec issued */ 146#define KDB_STATE_KEXEC 0x00040000 /* kexec issued */
148#define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */ 147#define KDB_STATE_DOING_KGDB 0x00080000 /* kgdb enter now issued */
149#define KDB_STATE_DOING_KGDB2 0x00100000 /* kgdb enter now issued */
150#define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */ 148#define KDB_STATE_KGDB_TRANS 0x00200000 /* Transition to kgdb */
151#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch 149#define KDB_STATE_ARCH 0xff000000 /* Reserved for arch
152 * specific use */ 150 * specific use */
@@ -218,6 +216,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val);
218extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); 216extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
219extern void kdb_meminfo_proc_show(void); 217extern void kdb_meminfo_proc_show(void);
220extern char *kdb_getstr(char *, size_t, char *); 218extern char *kdb_getstr(char *, size_t, char *);
219extern void kdb_gdb_state_pass(char *buf);
221 220
222/* Defines for kdb_symbol_print */ 221/* Defines for kdb_symbol_print */
223#define KDB_SP_SPACEB 0x0001 /* Space before string */ 222#define KDB_SP_SPACEB 0x0001 /* Space before string */
diff --git a/kernel/futex.c b/kernel/futex.c
index 0a308970c24..11cbe052b2e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -218,6 +218,8 @@ static void drop_futex_key_refs(union futex_key *key)
218 * @uaddr: virtual address of the futex 218 * @uaddr: virtual address of the futex
219 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 219 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
220 * @key: address where result is stored. 220 * @key: address where result is stored.
221 * @rw: mapping needs to be read/write (values: VERIFY_READ,
222 * VERIFY_WRITE)
221 * 223 *
222 * Returns a negative error code or 0 224 * Returns a negative error code or 0
223 * The key words are stored in *key on success. 225 * The key words are stored in *key on success.
@@ -229,12 +231,12 @@ static void drop_futex_key_refs(union futex_key *key)
229 * lock_page() might sleep, the caller should not hold a spinlock. 231 * lock_page() might sleep, the caller should not hold a spinlock.
230 */ 232 */
231static int 233static int
232get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) 234get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
233{ 235{
234 unsigned long address = (unsigned long)uaddr; 236 unsigned long address = (unsigned long)uaddr;
235 struct mm_struct *mm = current->mm; 237 struct mm_struct *mm = current->mm;
236 struct page *page, *page_head; 238 struct page *page, *page_head;
237 int err; 239 int err, ro = 0;
238 240
239 /* 241 /*
240 * The futex address must be "naturally" aligned. 242 * The futex address must be "naturally" aligned.
@@ -262,8 +264,18 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
262 264
263again: 265again:
264 err = get_user_pages_fast(address, 1, 1, &page); 266 err = get_user_pages_fast(address, 1, 1, &page);
267 /*
268 * If write access is not required (eg. FUTEX_WAIT), try
269 * and get read-only access.
270 */
271 if (err == -EFAULT && rw == VERIFY_READ) {
272 err = get_user_pages_fast(address, 1, 0, &page);
273 ro = 1;
274 }
265 if (err < 0) 275 if (err < 0)
266 return err; 276 return err;
277 else
278 err = 0;
267 279
268#ifdef CONFIG_TRANSPARENT_HUGEPAGE 280#ifdef CONFIG_TRANSPARENT_HUGEPAGE
269 page_head = page; 281 page_head = page;
@@ -305,6 +317,13 @@ again:
305 if (!page_head->mapping) { 317 if (!page_head->mapping) {
306 unlock_page(page_head); 318 unlock_page(page_head);
307 put_page(page_head); 319 put_page(page_head);
320 /*
321 * ZERO_PAGE pages don't have a mapping. Avoid a busy loop
322 * trying to find one. RW mapping would have COW'd (and thus
323 * have a mapping) so this page is RO and won't ever change.
324 */
325 if ((page_head == ZERO_PAGE(address)))
326 return -EFAULT;
308 goto again; 327 goto again;
309 } 328 }
310 329
@@ -316,6 +335,15 @@ again:
316 * the object not the particular process. 335 * the object not the particular process.
317 */ 336 */
318 if (PageAnon(page_head)) { 337 if (PageAnon(page_head)) {
338 /*
339 * A RO anonymous page will never change and thus doesn't make
340 * sense for futex operations.
341 */
342 if (ro) {
343 err = -EFAULT;
344 goto out;
345 }
346
319 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ 347 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
320 key->private.mm = mm; 348 key->private.mm = mm;
321 key->private.address = address; 349 key->private.address = address;
@@ -327,9 +355,10 @@ again:
327 355
328 get_futex_key_refs(key); 356 get_futex_key_refs(key);
329 357
358out:
330 unlock_page(page_head); 359 unlock_page(page_head);
331 put_page(page_head); 360 put_page(page_head);
332 return 0; 361 return err;
333} 362}
334 363
335static inline void put_futex_key(union futex_key *key) 364static inline void put_futex_key(union futex_key *key)
@@ -940,7 +969,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
940 if (!bitset) 969 if (!bitset)
941 return -EINVAL; 970 return -EINVAL;
942 971
943 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); 972 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ);
944 if (unlikely(ret != 0)) 973 if (unlikely(ret != 0))
945 goto out; 974 goto out;
946 975
@@ -986,10 +1015,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
986 int ret, op_ret; 1015 int ret, op_ret;
987 1016
988retry: 1017retry:
989 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); 1018 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
990 if (unlikely(ret != 0)) 1019 if (unlikely(ret != 0))
991 goto out; 1020 goto out;
992 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); 1021 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
993 if (unlikely(ret != 0)) 1022 if (unlikely(ret != 0))
994 goto out_put_key1; 1023 goto out_put_key1;
995 1024
@@ -1243,10 +1272,11 @@ retry:
1243 pi_state = NULL; 1272 pi_state = NULL;
1244 } 1273 }
1245 1274
1246 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); 1275 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
1247 if (unlikely(ret != 0)) 1276 if (unlikely(ret != 0))
1248 goto out; 1277 goto out;
1249 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); 1278 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
1279 requeue_pi ? VERIFY_WRITE : VERIFY_READ);
1250 if (unlikely(ret != 0)) 1280 if (unlikely(ret != 0))
1251 goto out_put_key1; 1281 goto out_put_key1;
1252 1282
@@ -1790,7 +1820,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1790 * while the syscall executes. 1820 * while the syscall executes.
1791 */ 1821 */
1792retry: 1822retry:
1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); 1823 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ);
1794 if (unlikely(ret != 0)) 1824 if (unlikely(ret != 0))
1795 return ret; 1825 return ret;
1796 1826
@@ -1941,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1941 } 1971 }
1942 1972
1943retry: 1973retry:
1944 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); 1974 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE);
1945 if (unlikely(ret != 0)) 1975 if (unlikely(ret != 0))
1946 goto out; 1976 goto out;
1947 1977
@@ -2060,7 +2090,7 @@ retry:
2060 if ((uval & FUTEX_TID_MASK) != vpid) 2090 if ((uval & FUTEX_TID_MASK) != vpid)
2061 return -EPERM; 2091 return -EPERM;
2062 2092
2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); 2093 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE);
2064 if (unlikely(ret != 0)) 2094 if (unlikely(ret != 0))
2065 goto out; 2095 goto out;
2066 2096
@@ -2249,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2249 debug_rt_mutex_init_waiter(&rt_waiter); 2279 debug_rt_mutex_init_waiter(&rt_waiter);
2250 rt_waiter.task = NULL; 2280 rt_waiter.task = NULL;
2251 2281
2252 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); 2282 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
2253 if (unlikely(ret != 0)) 2283 if (unlikely(ret != 0))
2254 goto out; 2284 goto out;
2255 2285
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 47613dfb7b2..ddc7644c130 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work)
274 * (used for preventing user land processes from being created after the user 274 * (used for preventing user land processes from being created after the user
275 * land has been frozen during a system-wide hibernation or suspend operation). 275 * land has been frozen during a system-wide hibernation or suspend operation).
276 */ 276 */
277static int usermodehelper_disabled; 277static int usermodehelper_disabled = 1;
278 278
279/* Number of helpers running */ 279/* Number of helpers running */
280static atomic_t running_helpers = ATOMIC_INIT(0); 280static atomic_t running_helpers = ATOMIC_INIT(0);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3956f5149e2..8c24294e477 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2468,7 +2468,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
2468 2468
2469 BUG_ON(usage_bit >= LOCK_USAGE_STATES); 2469 BUG_ON(usage_bit >= LOCK_USAGE_STATES);
2470 2470
2471 if (hlock_class(hlock)->key == &__lockdep_no_validate__) 2471 if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys)
2472 continue; 2472 continue;
2473 2473
2474 if (!mark_lock(curr, hlock, usage_bit)) 2474 if (!mark_lock(curr, hlock, usage_bit))
@@ -2485,23 +2485,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
2485{ 2485{
2486 struct task_struct *curr = current; 2486 struct task_struct *curr = current;
2487 2487
2488 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
2489 return;
2490
2491 if (unlikely(curr->hardirqs_enabled)) {
2492 /*
2493 * Neither irq nor preemption are disabled here
2494 * so this is racy by nature but losing one hit
2495 * in a stat is not a big deal.
2496 */
2497 __debug_atomic_inc(redundant_hardirqs_on);
2498 return;
2499 }
2500 /* we'll do an OFF -> ON transition: */ 2488 /* we'll do an OFF -> ON transition: */
2501 curr->hardirqs_enabled = 1; 2489 curr->hardirqs_enabled = 1;
2502 2490
2503 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
2504 return;
2505 /* 2491 /*
2506 * We are going to turn hardirqs on, so set the 2492 * We are going to turn hardirqs on, so set the
2507 * usage bit for all held locks: 2493 * usage bit for all held locks:
@@ -2529,9 +2515,25 @@ void trace_hardirqs_on_caller(unsigned long ip)
2529 if (unlikely(!debug_locks || current->lockdep_recursion)) 2515 if (unlikely(!debug_locks || current->lockdep_recursion))
2530 return; 2516 return;
2531 2517
2518 if (unlikely(current->hardirqs_enabled)) {
2519 /*
2520 * Neither irq nor preemption are disabled here
2521 * so this is racy by nature but losing one hit
2522 * in a stat is not a big deal.
2523 */
2524 __debug_atomic_inc(redundant_hardirqs_on);
2525 return;
2526 }
2527
2532 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 2528 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2533 return; 2529 return;
2534 2530
2531 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
2532 return;
2533
2534 if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
2535 return;
2536
2535 current->lockdep_recursion = 1; 2537 current->lockdep_recursion = 1;
2536 __trace_hardirqs_on_caller(ip); 2538 __trace_hardirqs_on_caller(ip);
2537 current->lockdep_recursion = 0; 2539 current->lockdep_recursion = 0;
@@ -2872,10 +2874,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2872void lockdep_init_map(struct lockdep_map *lock, const char *name, 2874void lockdep_init_map(struct lockdep_map *lock, const char *name,
2873 struct lock_class_key *key, int subclass) 2875 struct lock_class_key *key, int subclass)
2874{ 2876{
2875 int i; 2877 memset(lock, 0, sizeof(*lock));
2876
2877 for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
2878 lock->class_cache[i] = NULL;
2879 2878
2880#ifdef CONFIG_LOCK_STAT 2879#ifdef CONFIG_LOCK_STAT
2881 lock->cpu = raw_smp_processor_id(); 2880 lock->cpu = raw_smp_processor_id();
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d1db2880d1c..e19ce1454ee 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
291 if (!cpumask_subset(mask, cpu_possible_mask)) 291 if (!cpumask_subset(mask, cpu_possible_mask))
292 return -EINVAL; 292 return -EINVAL;
293 293
294 s = NULL;
295 if (isadd == REGISTER) { 294 if (isadd == REGISTER) {
296 for_each_cpu(cpu, mask) { 295 for_each_cpu(cpu, mask) {
297 if (!s) 296 s = kmalloc_node(sizeof(struct listener),
298 s = kmalloc_node(sizeof(struct listener), 297 GFP_KERNEL, cpu_to_node(cpu));
299 GFP_KERNEL, cpu_to_node(cpu));
300 if (!s) 298 if (!s)
301 goto cleanup; 299 goto cleanup;
300
302 s->pid = pid; 301 s->pid = pid;
303 INIT_LIST_HEAD(&s->list);
304 s->valid = 1; 302 s->valid = 1;
305 303
306 listeners = &per_cpu(listener_array, cpu); 304 listeners = &per_cpu(listener_array, cpu);
307 down_write(&listeners->sem); 305 down_write(&listeners->sem);
308 list_for_each_entry_safe(s2, tmp, &listeners->list, list) { 306 list_for_each_entry(s2, &listeners->list, list) {
309 if (s2->pid == pid) 307 if (s2->pid == pid && s2->valid)
310 goto next_cpu; 308 goto exists;
311 } 309 }
312 list_add(&s->list, &listeners->list); 310 list_add(&s->list, &listeners->list);
313 s = NULL; 311 s = NULL;
314next_cpu: 312exists:
315 up_write(&listeners->sem); 313 up_write(&listeners->sem);
314 kfree(s); /* nop if NULL */
316 } 315 }
317 kfree(s);
318 return 0; 316 return 0;
319 } 317 }
320 318
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5ae2be..6c695ff9cab 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -276,4 +276,7 @@ config CORDIC
276 so its calculations are in fixed point. Modules can select this 276 so its calculations are in fixed point. Modules can select this
277 when they require this function. Module will be called cordic. 277 when they require this function. Module will be called cordic.
278 278
279config LLIST
280 bool
281
279endmenu 282endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 892f4e282ea..d5d175c8a6c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,7 +10,7 @@ endif
10lib-y := ctype.o string.o vsprintf.o cmdline.o \ 10lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
15 is_single_threaded.o plist.o decompress.o find_next_bit.o 15 is_single_threaded.o plist.o decompress.o find_next_bit.o
16 16
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
115 115
116obj-$(CONFIG_CORDIC) += cordic.o 116obj-$(CONFIG_CORDIC) += cordic.o
117 117
118obj-$(CONFIG_LLIST) += llist.o
119
118hostprogs-y := gen_crc32table 120hostprogs-y := gen_crc32table
119clean-files := crc32table.h 121clean-files := crc32table.h
120 122
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 37ef4b04879..2f4412e4d07 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
271} 271}
272EXPORT_SYMBOL(__bitmap_weight); 272EXPORT_SYMBOL(__bitmap_weight);
273 273
274#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
275
276void bitmap_set(unsigned long *map, int start, int nr) 274void bitmap_set(unsigned long *map, int start, int nr)
277{ 275{
278 unsigned long *p = map + BIT_WORD(start); 276 unsigned long *p = map + BIT_WORD(start);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 2577b121c7c..f193b779644 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
197 return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); 197 return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
198} 198}
199 199
200void cleanup_fault_attr_dentries(struct fault_attr *attr) 200struct dentry *fault_create_debugfs_attr(const char *name,
201{ 201 struct dentry *parent, struct fault_attr *attr)
202 debugfs_remove_recursive(attr->dir);
203}
204
205int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
206{ 202{
207 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 203 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
208 struct dentry *dir; 204 struct dentry *dir;
209 205
210 dir = debugfs_create_dir(name, NULL); 206 dir = debugfs_create_dir(name, parent);
211 if (!dir) 207 if (!dir)
212 return -ENOMEM; 208 return ERR_PTR(-ENOMEM);
213
214 attr->dir = dir;
215 209
216 if (!debugfs_create_ul("probability", mode, dir, &attr->probability)) 210 if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
217 goto fail; 211 goto fail;
@@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
243 237
244#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ 238#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
245 239
246 return 0; 240 return dir;
247fail: 241fail:
248 debugfs_remove_recursive(attr->dir); 242 debugfs_remove_recursive(dir);
249 243
250 return -ENOMEM; 244 return ERR_PTR(-ENOMEM);
251} 245}
252 246
253#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ 247#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 577ddf80597..f352cc42f4f 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -1,8 +1,26 @@
1/* 1/*
2 * Basic general purpose allocator for managing special purpose memory 2 * Basic general purpose allocator for managing special purpose
3 * not managed by the regular kmalloc/kfree interface. 3 * memory, for example, memory that is not managed by the regular
4 * Uses for this includes on-device special memory, uncached memory 4 * kmalloc/kfree interface. Uses for this includes on-device special
5 * etc. 5 * memory, uncached memory etc.
6 *
7 * It is safe to use the allocator in NMI handlers and other special
8 * unblockable contexts that could otherwise deadlock on locks. This
9 * is implemented by using atomic operations and retries on any
10 * conflicts. The disadvantage is that there may be livelocks in
11 * extreme cases. For better scalability, one allocator can be used
12 * for each CPU.
13 *
14 * The lockless operation only works if there is enough memory
15 * available. If new memory is added to the pool a lock has to be
16 * still taken. So any user relying on locklessness has to ensure
17 * that sufficient memory is preallocated.
18 *
19 * The basic atomic operation of this allocator is cmpxchg on long.
20 * On architectures that don't have NMI-safe cmpxchg implementation,
21 * the allocator can NOT be used in NMI handler. So code uses the
22 * allocator in NMI handler should depend on
23 * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
6 * 24 *
7 * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> 25 * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
8 * 26 *
@@ -13,8 +31,109 @@
13#include <linux/slab.h> 31#include <linux/slab.h>
14#include <linux/module.h> 32#include <linux/module.h>
15#include <linux/bitmap.h> 33#include <linux/bitmap.h>
34#include <linux/rculist.h>
35#include <linux/interrupt.h>
16#include <linux/genalloc.h> 36#include <linux/genalloc.h>
17 37
38static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
39{
40 unsigned long val, nval;
41
42 nval = *addr;
43 do {
44 val = nval;
45 if (val & mask_to_set)
46 return -EBUSY;
47 cpu_relax();
48 } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
49
50 return 0;
51}
52
53static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
54{
55 unsigned long val, nval;
56
57 nval = *addr;
58 do {
59 val = nval;
60 if ((val & mask_to_clear) != mask_to_clear)
61 return -EBUSY;
62 cpu_relax();
63 } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
64
65 return 0;
66}
67
68/*
69 * bitmap_set_ll - set the specified number of bits at the specified position
70 * @map: pointer to a bitmap
71 * @start: a bit position in @map
72 * @nr: number of bits to set
73 *
74 * Set @nr bits start from @start in @map lock-lessly. Several users
75 * can set/clear the same bitmap simultaneously without lock. If two
76 * users set the same bit, one user will return remain bits, otherwise
77 * return 0.
78 */
79static int bitmap_set_ll(unsigned long *map, int start, int nr)
80{
81 unsigned long *p = map + BIT_WORD(start);
82 const int size = start + nr;
83 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
84 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
85
86 while (nr - bits_to_set >= 0) {
87 if (set_bits_ll(p, mask_to_set))
88 return nr;
89 nr -= bits_to_set;
90 bits_to_set = BITS_PER_LONG;
91 mask_to_set = ~0UL;
92 p++;
93 }
94 if (nr) {
95 mask_to_set &= BITMAP_LAST_WORD_MASK(size);
96 if (set_bits_ll(p, mask_to_set))
97 return nr;
98 }
99
100 return 0;
101}
102
103/*
104 * bitmap_clear_ll - clear the specified number of bits at the specified position
105 * @map: pointer to a bitmap
106 * @start: a bit position in @map
107 * @nr: number of bits to set
108 *
109 * Clear @nr bits start from @start in @map lock-lessly. Several users
110 * can set/clear the same bitmap simultaneously without lock. If two
111 * users clear the same bit, one user will return remain bits,
112 * otherwise return 0.
113 */
114static int bitmap_clear_ll(unsigned long *map, int start, int nr)
115{
116 unsigned long *p = map + BIT_WORD(start);
117 const int size = start + nr;
118 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
119 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
120
121 while (nr - bits_to_clear >= 0) {
122 if (clear_bits_ll(p, mask_to_clear))
123 return nr;
124 nr -= bits_to_clear;
125 bits_to_clear = BITS_PER_LONG;
126 mask_to_clear = ~0UL;
127 p++;
128 }
129 if (nr) {
130 mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
131 if (clear_bits_ll(p, mask_to_clear))
132 return nr;
133 }
134
135 return 0;
136}
18 137
19/** 138/**
20 * gen_pool_create - create a new special memory pool 139 * gen_pool_create - create a new special memory pool
@@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
30 149
31 pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); 150 pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
32 if (pool != NULL) { 151 if (pool != NULL) {
33 rwlock_init(&pool->lock); 152 spin_lock_init(&pool->lock);
34 INIT_LIST_HEAD(&pool->chunks); 153 INIT_LIST_HEAD(&pool->chunks);
35 pool->min_alloc_order = min_alloc_order; 154 pool->min_alloc_order = min_alloc_order;
36 } 155 }
@@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
63 if (unlikely(chunk == NULL)) 182 if (unlikely(chunk == NULL))
64 return -ENOMEM; 183 return -ENOMEM;
65 184
66 spin_lock_init(&chunk->lock);
67 chunk->phys_addr = phys; 185 chunk->phys_addr = phys;
68 chunk->start_addr = virt; 186 chunk->start_addr = virt;
69 chunk->end_addr = virt + size; 187 chunk->end_addr = virt + size;
188 atomic_set(&chunk->avail, size);
70 189
71 write_lock(&pool->lock); 190 spin_lock(&pool->lock);
72 list_add(&chunk->next_chunk, &pool->chunks); 191 list_add_rcu(&chunk->next_chunk, &pool->chunks);
73 write_unlock(&pool->lock); 192 spin_unlock(&pool->lock);
74 193
75 return 0; 194 return 0;
76} 195}
@@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt);
85 */ 204 */
86phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) 205phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
87{ 206{
88 struct list_head *_chunk;
89 struct gen_pool_chunk *chunk; 207 struct gen_pool_chunk *chunk;
208 phys_addr_t paddr = -1;
90 209
91 read_lock(&pool->lock); 210 rcu_read_lock();
92 list_for_each(_chunk, &pool->chunks) { 211 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
93 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 212 if (addr >= chunk->start_addr && addr < chunk->end_addr) {
94 213 paddr = chunk->phys_addr + (addr - chunk->start_addr);
95 if (addr >= chunk->start_addr && addr < chunk->end_addr) 214 break;
96 return chunk->phys_addr + addr - chunk->start_addr; 215 }
97 } 216 }
98 read_unlock(&pool->lock); 217 rcu_read_unlock();
99 218
100 return -1; 219 return paddr;
101} 220}
102EXPORT_SYMBOL(gen_pool_virt_to_phys); 221EXPORT_SYMBOL(gen_pool_virt_to_phys);
103 222
@@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool)
115 int order = pool->min_alloc_order; 234 int order = pool->min_alloc_order;
116 int bit, end_bit; 235 int bit, end_bit;
117 236
118
119 list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { 237 list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
120 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 238 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
121 list_del(&chunk->next_chunk); 239 list_del(&chunk->next_chunk);
@@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy);
137 * @size: number of bytes to allocate from the pool 255 * @size: number of bytes to allocate from the pool
138 * 256 *
139 * Allocate the requested number of bytes from the specified pool. 257 * Allocate the requested number of bytes from the specified pool.
140 * Uses a first-fit algorithm. 258 * Uses a first-fit algorithm. Can not be used in NMI handler on
259 * architectures without NMI-safe cmpxchg implementation.
141 */ 260 */
142unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 261unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
143{ 262{
144 struct list_head *_chunk;
145 struct gen_pool_chunk *chunk; 263 struct gen_pool_chunk *chunk;
146 unsigned long addr, flags; 264 unsigned long addr = 0;
147 int order = pool->min_alloc_order; 265 int order = pool->min_alloc_order;
148 int nbits, start_bit, end_bit; 266 int nbits, start_bit = 0, end_bit, remain;
267
268#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
269 BUG_ON(in_nmi());
270#endif
149 271
150 if (size == 0) 272 if (size == 0)
151 return 0; 273 return 0;
152 274
153 nbits = (size + (1UL << order) - 1) >> order; 275 nbits = (size + (1UL << order) - 1) >> order;
154 276 rcu_read_lock();
155 read_lock(&pool->lock); 277 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
156 list_for_each(_chunk, &pool->chunks) { 278 if (size > atomic_read(&chunk->avail))
157 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 279 continue;
158 280
159 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 281 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
160 282retry:
161 spin_lock_irqsave(&chunk->lock, flags); 283 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
162 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, 284 start_bit, nbits, 0);
163 nbits, 0); 285 if (start_bit >= end_bit)
164 if (start_bit >= end_bit) {
165 spin_unlock_irqrestore(&chunk->lock, flags);
166 continue; 286 continue;
287 remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
288 if (remain) {
289 remain = bitmap_clear_ll(chunk->bits, start_bit,
290 nbits - remain);
291 BUG_ON(remain);
292 goto retry;
167 } 293 }
168 294
169 addr = chunk->start_addr + ((unsigned long)start_bit << order); 295 addr = chunk->start_addr + ((unsigned long)start_bit << order);
170 296 size = nbits << order;
171 bitmap_set(chunk->bits, start_bit, nbits); 297 atomic_sub(size, &chunk->avail);
172 spin_unlock_irqrestore(&chunk->lock, flags); 298 break;
173 read_unlock(&pool->lock);
174 return addr;
175 } 299 }
176 read_unlock(&pool->lock); 300 rcu_read_unlock();
177 return 0; 301 return addr;
178} 302}
179EXPORT_SYMBOL(gen_pool_alloc); 303EXPORT_SYMBOL(gen_pool_alloc);
180 304
@@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc);
184 * @addr: starting address of memory to free back to pool 308 * @addr: starting address of memory to free back to pool
185 * @size: size in bytes of memory to free 309 * @size: size in bytes of memory to free
186 * 310 *
187 * Free previously allocated special memory back to the specified pool. 311 * Free previously allocated special memory back to the specified
312 * pool. Can not be used in NMI handler on architectures without
313 * NMI-safe cmpxchg implementation.
188 */ 314 */
189void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) 315void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
190{ 316{
191 struct list_head *_chunk;
192 struct gen_pool_chunk *chunk; 317 struct gen_pool_chunk *chunk;
193 unsigned long flags;
194 int order = pool->min_alloc_order; 318 int order = pool->min_alloc_order;
195 int bit, nbits; 319 int start_bit, nbits, remain;
196 320
197 nbits = (size + (1UL << order) - 1) >> order; 321#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
198 322 BUG_ON(in_nmi());
199 read_lock(&pool->lock); 323#endif
200 list_for_each(_chunk, &pool->chunks) {
201 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
202 324
325 nbits = (size + (1UL << order) - 1) >> order;
326 rcu_read_lock();
327 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
203 if (addr >= chunk->start_addr && addr < chunk->end_addr) { 328 if (addr >= chunk->start_addr && addr < chunk->end_addr) {
204 BUG_ON(addr + size > chunk->end_addr); 329 BUG_ON(addr + size > chunk->end_addr);
205 spin_lock_irqsave(&chunk->lock, flags); 330 start_bit = (addr - chunk->start_addr) >> order;
206 bit = (addr - chunk->start_addr) >> order; 331 remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
207 while (nbits--) 332 BUG_ON(remain);
208 __clear_bit(bit++, chunk->bits); 333 size = nbits << order;
209 spin_unlock_irqrestore(&chunk->lock, flags); 334 atomic_add(size, &chunk->avail);
210 break; 335 rcu_read_unlock();
336 return;
211 } 337 }
212 } 338 }
213 BUG_ON(nbits > 0); 339 rcu_read_unlock();
214 read_unlock(&pool->lock); 340 BUG();
215} 341}
216EXPORT_SYMBOL(gen_pool_free); 342EXPORT_SYMBOL(gen_pool_free);
343
344/**
345 * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
346 * @pool: the generic memory pool
347 * @func: func to call
348 * @data: additional data used by @func
349 *
350 * Call @func for every chunk of generic memory pool. The @func is
351 * called with rcu_read_lock held.
352 */
353void gen_pool_for_each_chunk(struct gen_pool *pool,
354 void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
355 void *data)
356{
357 struct gen_pool_chunk *chunk;
358
359 rcu_read_lock();
360 list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
361 func(pool, chunk, data);
362 rcu_read_unlock();
363}
364EXPORT_SYMBOL(gen_pool_for_each_chunk);
365
366/**
367 * gen_pool_avail - get available free space of the pool
368 * @pool: pool to get available free space
369 *
370 * Return available free space of the specified pool.
371 */
372size_t gen_pool_avail(struct gen_pool *pool)
373{
374 struct gen_pool_chunk *chunk;
375 size_t avail = 0;
376
377 rcu_read_lock();
378 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
379 avail += atomic_read(&chunk->avail);
380 rcu_read_unlock();
381 return avail;
382}
383EXPORT_SYMBOL_GPL(gen_pool_avail);
384
385/**
386 * gen_pool_size - get size in bytes of memory managed by the pool
387 * @pool: pool to get size
388 *
389 * Return size in bytes of memory managed by the pool.
390 */
391size_t gen_pool_size(struct gen_pool *pool)
392{
393 struct gen_pool_chunk *chunk;
394 size_t size = 0;
395
396 rcu_read_lock();
397 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
398 size += chunk->end_addr - chunk->start_addr;
399 rcu_read_unlock();
400 return size;
401}
402EXPORT_SYMBOL_GPL(gen_pool_size);
diff --git a/lib/idr.c b/lib/idr.c
index e15502e8b21..db040ce3fa7 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -34,8 +34,10 @@
34#include <linux/err.h> 34#include <linux/err.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/idr.h> 36#include <linux/idr.h>
37#include <linux/spinlock.h>
37 38
38static struct kmem_cache *idr_layer_cache; 39static struct kmem_cache *idr_layer_cache;
40static DEFINE_SPINLOCK(simple_ida_lock);
39 41
40static struct idr_layer *get_from_free_list(struct idr *idp) 42static struct idr_layer *get_from_free_list(struct idr *idp)
41{ 43{
@@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida)
926EXPORT_SYMBOL(ida_destroy); 928EXPORT_SYMBOL(ida_destroy);
927 929
928/** 930/**
931 * ida_simple_get - get a new id.
932 * @ida: the (initialized) ida.
933 * @start: the minimum id (inclusive, < 0x8000000)
934 * @end: the maximum id (exclusive, < 0x8000000 or 0)
935 * @gfp_mask: memory allocation flags
936 *
937 * Allocates an id in the range start <= id < end, or returns -ENOSPC.
938 * On memory allocation failure, returns -ENOMEM.
939 *
940 * Use ida_simple_remove() to get rid of an id.
941 */
942int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
943 gfp_t gfp_mask)
944{
945 int ret, id;
946 unsigned int max;
947
948 BUG_ON((int)start < 0);
949 BUG_ON((int)end < 0);
950
951 if (end == 0)
952 max = 0x80000000;
953 else {
954 BUG_ON(end < start);
955 max = end - 1;
956 }
957
958again:
959 if (!ida_pre_get(ida, gfp_mask))
960 return -ENOMEM;
961
962 spin_lock(&simple_ida_lock);
963 ret = ida_get_new_above(ida, start, &id);
964 if (!ret) {
965 if (id > max) {
966 ida_remove(ida, id);
967 ret = -ENOSPC;
968 } else {
969 ret = id;
970 }
971 }
972 spin_unlock(&simple_ida_lock);
973
974 if (unlikely(ret == -EAGAIN))
975 goto again;
976
977 return ret;
978}
979EXPORT_SYMBOL(ida_simple_get);
980
981/**
982 * ida_simple_remove - remove an allocated id.
983 * @ida: the (initialized) ida.
984 * @id: the id returned by ida_simple_get.
985 */
986void ida_simple_remove(struct ida *ida, unsigned int id)
987{
988 BUG_ON((int)id < 0);
989 spin_lock(&simple_ida_lock);
990 ida_remove(ida, id);
991 spin_unlock(&simple_ida_lock);
992}
993EXPORT_SYMBOL(ida_simple_remove);
994
995/**
929 * ida_init - initialize ida handle 996 * ida_init - initialize ida handle
930 * @ida: ida handle 997 * @ida: ida handle
931 * 998 *
diff --git a/lib/llist.c b/lib/llist.c
new file mode 100644
index 00000000000..da445724fa1
--- /dev/null
+++ b/lib/llist.c
@@ -0,0 +1,129 @@
1/*
2 * Lock-less NULL terminated single linked list
3 *
4 * The basic atomic operation of this list is cmpxchg on long. On
5 * architectures that don't have NMI-safe cmpxchg implementation, the
6 * list can NOT be used in NMI handler. So code uses the list in NMI
7 * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
8 *
9 * Copyright 2010,2011 Intel Corp.
10 * Author: Huang Ying <ying.huang@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version
14 * 2 as published by the Free Software Foundation;
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25#include <linux/kernel.h>
26#include <linux/module.h>
27#include <linux/interrupt.h>
28#include <linux/llist.h>
29
30#include <asm/system.h>
31
32/**
33 * llist_add - add a new entry
34 * @new: new entry to be added
35 * @head: the head for your lock-less list
36 */
37void llist_add(struct llist_node *new, struct llist_head *head)
38{
39 struct llist_node *entry, *old_entry;
40
41#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
42 BUG_ON(in_nmi());
43#endif
44
45 entry = head->first;
46 do {
47 old_entry = entry;
48 new->next = entry;
49 cpu_relax();
50 } while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
51}
52EXPORT_SYMBOL_GPL(llist_add);
53
54/**
55 * llist_add_batch - add several linked entries in batch
56 * @new_first: first entry in batch to be added
57 * @new_last: last entry in batch to be added
58 * @head: the head for your lock-less list
59 */
60void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
61 struct llist_head *head)
62{
63 struct llist_node *entry, *old_entry;
64
65#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
66 BUG_ON(in_nmi());
67#endif
68
69 entry = head->first;
70 do {
71 old_entry = entry;
72 new_last->next = entry;
73 cpu_relax();
74 } while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
75}
76EXPORT_SYMBOL_GPL(llist_add_batch);
77
78/**
79 * llist_del_first - delete the first entry of lock-less list
80 * @head: the head for your lock-less list
81 *
82 * If list is empty, return NULL, otherwise, return the first entry
83 * deleted, this is the newest added one.
84 *
85 * Only one llist_del_first user can be used simultaneously with
86 * multiple llist_add users without lock. Because otherwise
87 * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
88 * llist_add) sequence in another user may change @head->first->next,
89 * but keep @head->first. If multiple consumers are needed, please
90 * use llist_del_all or use lock between consumers.
91 */
92struct llist_node *llist_del_first(struct llist_head *head)
93{
94 struct llist_node *entry, *old_entry, *next;
95
96#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
97 BUG_ON(in_nmi());
98#endif
99
100 entry = head->first;
101 do {
102 if (entry == NULL)
103 return NULL;
104 old_entry = entry;
105 next = entry->next;
106 cpu_relax();
107 } while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
108
109 return entry;
110}
111EXPORT_SYMBOL_GPL(llist_del_first);
112
113/**
114 * llist_del_all - delete all entries from lock-less list
115 * @head: the head of lock-less list to delete all entries
116 *
117 * If list is empty, return NULL, otherwise, delete all entries and
118 * return the pointer to the first entry. The order of entries
119 * deleted is from the newest to the oldest added one.
120 */
121struct llist_node *llist_del_all(struct llist_head *head)
122{
123#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
124 BUG_ON(in_nmi());
125#endif
126
127 return xchg(&head->first, NULL);
128}
129EXPORT_SYMBOL_GPL(llist_del_all);
diff --git a/lib/md5.c b/lib/md5.c
new file mode 100644
index 00000000000..c777180e1f2
--- /dev/null
+++ b/lib/md5.c
@@ -0,0 +1,95 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/cryptohash.h>
4
5#define F1(x, y, z) (z ^ (x & (y ^ z)))
6#define F2(x, y, z) F1(z, x, y)
7#define F3(x, y, z) (x ^ y ^ z)
8#define F4(x, y, z) (y ^ (x | ~z))
9
10#define MD5STEP(f, w, x, y, z, in, s) \
11 (w += f(x, y, z) + in, w = (w<<s | w>>(32-s)) + x)
12
13void md5_transform(__u32 *hash, __u32 const *in)
14{
15 u32 a, b, c, d;
16
17 a = hash[0];
18 b = hash[1];
19 c = hash[2];
20 d = hash[3];
21
22 MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
23 MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
24 MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
25 MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
26 MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
27 MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
28 MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
29 MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
30 MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
31 MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
32 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
33 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
34 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
35 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
36 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
37 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
38
39 MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
40 MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
41 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
42 MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
43 MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
44 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
45 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
46 MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
47 MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
48 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
49 MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
50 MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
51 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
52 MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
53 MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
54 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
55
56 MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
57 MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
58 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
59 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
60 MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
61 MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
62 MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
63 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
64 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
65 MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
66 MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
67 MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
68 MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
69 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
70 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
71 MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
72
73 MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
74 MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
75 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
76 MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
77 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
78 MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
79 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
80 MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
81 MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
82 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
83 MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
84 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
85 MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
86 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
87 MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
88 MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
89
90 hash[0] += a;
91 hash[1] += b;
92 hash[2] += c;
93 hash[3] += d;
94}
95EXPORT_SYMBOL(md5_transform);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7ea2e033d71..a2f9da59c19 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
823EXPORT_SYMBOL(radix_tree_prev_hole); 823EXPORT_SYMBOL(radix_tree_prev_hole);
824 824
825static unsigned int 825static unsigned int
826__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, 826__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
827 unsigned int max_items, unsigned long *next_index) 827 unsigned long index, unsigned int max_items, unsigned long *next_index)
828{ 828{
829 unsigned int nr_found = 0; 829 unsigned int nr_found = 0;
830 unsigned int shift, height; 830 unsigned int shift, height;
@@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
857 857
858 /* Bottom level: grab some items */ 858 /* Bottom level: grab some items */
859 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { 859 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
860 index++;
861 if (slot->slots[i]) { 860 if (slot->slots[i]) {
862 results[nr_found++] = &(slot->slots[i]); 861 results[nr_found] = &(slot->slots[i]);
863 if (nr_found == max_items) 862 if (indices)
863 indices[nr_found] = index;
864 if (++nr_found == max_items) {
865 index++;
864 goto out; 866 goto out;
867 }
865 } 868 }
869 index++;
866 } 870 }
867out: 871out:
868 *next_index = index; 872 *next_index = index;
@@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
918 922
919 if (cur_index > max_index) 923 if (cur_index > max_index)
920 break; 924 break;
921 slots_found = __lookup(node, (void ***)results + ret, cur_index, 925 slots_found = __lookup(node, (void ***)results + ret, NULL,
922 max_items - ret, &next_index); 926 cur_index, max_items - ret, &next_index);
923 nr_found = 0; 927 nr_found = 0;
924 for (i = 0; i < slots_found; i++) { 928 for (i = 0; i < slots_found; i++) {
925 struct radix_tree_node *slot; 929 struct radix_tree_node *slot;
@@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
944 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree 948 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
945 * @root: radix tree root 949 * @root: radix tree root
946 * @results: where the results of the lookup are placed 950 * @results: where the results of the lookup are placed
951 * @indices: where their indices should be placed (but usually NULL)
947 * @first_index: start the lookup from this key 952 * @first_index: start the lookup from this key
948 * @max_items: place up to this many items at *results 953 * @max_items: place up to this many items at *results
949 * 954 *
@@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
958 * protection, radix_tree_deref_slot may fail requiring a retry. 963 * protection, radix_tree_deref_slot may fail requiring a retry.
959 */ 964 */
960unsigned int 965unsigned int
961radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, 966radix_tree_gang_lookup_slot(struct radix_tree_root *root,
967 void ***results, unsigned long *indices,
962 unsigned long first_index, unsigned int max_items) 968 unsigned long first_index, unsigned int max_items)
963{ 969{
964 unsigned long max_index; 970 unsigned long max_index;
@@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
974 if (first_index > 0) 980 if (first_index > 0)
975 return 0; 981 return 0;
976 results[0] = (void **)&root->rnode; 982 results[0] = (void **)&root->rnode;
983 if (indices)
984 indices[0] = 0;
977 return 1; 985 return 1;
978 } 986 }
979 node = indirect_to_ptr(node); 987 node = indirect_to_ptr(node);
@@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
987 995
988 if (cur_index > max_index) 996 if (cur_index > max_index)
989 break; 997 break;
990 slots_found = __lookup(node, results + ret, cur_index, 998 slots_found = __lookup(node, results + ret,
991 max_items - ret, &next_index); 999 indices ? indices + ret : NULL,
1000 cur_index, max_items - ret, &next_index);
992 ret += slots_found; 1001 ret += slots_found;
993 if (next_index == 0) 1002 if (next_index == 0)
994 break; 1003 break;
@@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1194} 1203}
1195EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1204EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1196 1205
1206#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
1207#include <linux/sched.h> /* for cond_resched() */
1208
1209/*
1210 * This linear search is at present only useful to shmem_unuse_inode().
1211 */
1212static unsigned long __locate(struct radix_tree_node *slot, void *item,
1213 unsigned long index, unsigned long *found_index)
1214{
1215 unsigned int shift, height;
1216 unsigned long i;
1217
1218 height = slot->height;
1219 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1220
1221 for ( ; height > 1; height--) {
1222 i = (index >> shift) & RADIX_TREE_MAP_MASK;
1223 for (;;) {
1224 if (slot->slots[i] != NULL)
1225 break;
1226 index &= ~((1UL << shift) - 1);
1227 index += 1UL << shift;
1228 if (index == 0)
1229 goto out; /* 32-bit wraparound */
1230 i++;
1231 if (i == RADIX_TREE_MAP_SIZE)
1232 goto out;
1233 }
1234
1235 shift -= RADIX_TREE_MAP_SHIFT;
1236 slot = rcu_dereference_raw(slot->slots[i]);
1237 if (slot == NULL)
1238 goto out;
1239 }
1240
1241 /* Bottom level: check items */
1242 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
1243 if (slot->slots[i] == item) {
1244 *found_index = index + i;
1245 index = 0;
1246 goto out;
1247 }
1248 }
1249 index += RADIX_TREE_MAP_SIZE;
1250out:
1251 return index;
1252}
1253
1254/**
1255 * radix_tree_locate_item - search through radix tree for item
1256 * @root: radix tree root
1257 * @item: item to be found
1258 *
1259 * Returns index where item was found, or -1 if not found.
1260 * Caller must hold no lock (since this time-consuming function needs
1261 * to be preemptible), and must check afterwards if item is still there.
1262 */
1263unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1264{
1265 struct radix_tree_node *node;
1266 unsigned long max_index;
1267 unsigned long cur_index = 0;
1268 unsigned long found_index = -1;
1269
1270 do {
1271 rcu_read_lock();
1272 node = rcu_dereference_raw(root->rnode);
1273 if (!radix_tree_is_indirect_ptr(node)) {
1274 rcu_read_unlock();
1275 if (node == item)
1276 found_index = 0;
1277 break;
1278 }
1279
1280 node = indirect_to_ptr(node);
1281 max_index = radix_tree_maxindex(node->height);
1282 if (cur_index > max_index)
1283 break;
1284
1285 cur_index = __locate(node, item, cur_index, &found_index);
1286 rcu_read_unlock();
1287 cond_resched();
1288 } while (cur_index != 0 && cur_index <= max_index);
1289
1290 return found_index;
1291}
1292#else
1293unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1294{
1295 return -1;
1296}
1297#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1197 1298
1198/** 1299/**
1199 * radix_tree_shrink - shrink height of a radix tree to minimal 1300 * radix_tree_shrink - shrink height of a radix tree to minimal
diff --git a/lib/sha1.c b/lib/sha1.c
index 4c45fd50e91..f33271dd00c 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -1,31 +1,72 @@
1/* 1/*
2 * SHA transform algorithm, originally taken from code written by 2 * SHA1 routine optimized to do word accesses rather than byte accesses,
3 * Peter Gutmann, and placed in the public domain. 3 * and to avoid unnecessary copies into the context array.
4 *
5 * This was based on the git SHA1 implementation.
4 */ 6 */
5 7
6#include <linux/kernel.h> 8#include <linux/kernel.h>
7#include <linux/module.h> 9#include <linux/module.h>
8#include <linux/cryptohash.h> 10#include <linux/bitops.h>
11#include <asm/unaligned.h>
9 12
10/* The SHA f()-functions. */ 13/*
14 * If you have 32 registers or more, the compiler can (and should)
15 * try to change the array[] accesses into registers. However, on
16 * machines with less than ~25 registers, that won't really work,
17 * and at least gcc will make an unholy mess of it.
18 *
19 * So to avoid that mess which just slows things down, we force
20 * the stores to memory to actually happen (we might be better off
21 * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
22 * suggested by Artur Skawina - that will also make gcc unable to
23 * try to do the silly "optimize away loads" part because it won't
24 * see what the value will be).
25 *
26 * Ben Herrenschmidt reports that on PPC, the C version comes close
27 * to the optimized asm with this (ie on PPC you don't want that
28 * 'volatile', since there are lots of registers).
29 *
30 * On ARM we get the best code generation by forcing a full memory barrier
31 * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
32 * the stack frame size simply explode and performance goes down the drain.
33 */
11 34
12#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */ 35#ifdef CONFIG_X86
13#define f2(x,y,z) (x ^ y ^ z) /* XOR */ 36 #define setW(x, val) (*(volatile __u32 *)&W(x) = (val))
14#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */ 37#elif defined(CONFIG_ARM)
38 #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
39#else
40 #define setW(x, val) (W(x) = (val))
41#endif
15 42
16/* The SHA Mysterious Constants */ 43/* This "rolls" over the 512-bit array */
44#define W(x) (array[(x)&15])
17 45
18#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */ 46/*
19#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */ 47 * Where do we get the source from? The first 16 iterations get it from
20#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */ 48 * the input data, the next mix it from the 512-bit array.
21#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */ 49 */
50#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t)
51#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
52
53#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
54 __u32 TEMP = input(t); setW(t, TEMP); \
55 E += TEMP + rol32(A,5) + (fn) + (constant); \
56 B = ror32(B, 2); } while (0)
57
58#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
59#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
60#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
61#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
62#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
22 63
23/** 64/**
24 * sha_transform - single block SHA1 transform 65 * sha_transform - single block SHA1 transform
25 * 66 *
26 * @digest: 160 bit digest to update 67 * @digest: 160 bit digest to update
27 * @data: 512 bits of data to hash 68 * @data: 512 bits of data to hash
28 * @W: 80 words of workspace (see note) 69 * @array: 16 words of workspace (see note)
29 * 70 *
30 * This function generates a SHA1 digest for a single 512-bit block. 71 * This function generates a SHA1 digest for a single 512-bit block.
31 * Be warned, it does not handle padding and message digest, do not 72 * Be warned, it does not handle padding and message digest, do not
@@ -36,47 +77,111 @@
36 * to clear the workspace. This is left to the caller to avoid 77 * to clear the workspace. This is left to the caller to avoid
37 * unnecessary clears between chained hashing operations. 78 * unnecessary clears between chained hashing operations.
38 */ 79 */
39void sha_transform(__u32 *digest, const char *in, __u32 *W) 80void sha_transform(__u32 *digest, const char *data, __u32 *array)
40{ 81{
41 __u32 a, b, c, d, e, t, i; 82 __u32 A, B, C, D, E;
42 83
43 for (i = 0; i < 16; i++) 84 A = digest[0];
44 W[i] = be32_to_cpu(((const __be32 *)in)[i]); 85 B = digest[1];
45 86 C = digest[2];
46 for (i = 0; i < 64; i++) 87 D = digest[3];
47 W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1); 88 E = digest[4];
48 89
49 a = digest[0]; 90 /* Round 1 - iterations 0-16 take their input from 'data' */
50 b = digest[1]; 91 T_0_15( 0, A, B, C, D, E);
51 c = digest[2]; 92 T_0_15( 1, E, A, B, C, D);
52 d = digest[3]; 93 T_0_15( 2, D, E, A, B, C);
53 e = digest[4]; 94 T_0_15( 3, C, D, E, A, B);
54 95 T_0_15( 4, B, C, D, E, A);
55 for (i = 0; i < 20; i++) { 96 T_0_15( 5, A, B, C, D, E);
56 t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i]; 97 T_0_15( 6, E, A, B, C, D);
57 e = d; d = c; c = rol32(b, 30); b = a; a = t; 98 T_0_15( 7, D, E, A, B, C);
58 } 99 T_0_15( 8, C, D, E, A, B);
59 100 T_0_15( 9, B, C, D, E, A);
60 for (; i < 40; i ++) { 101 T_0_15(10, A, B, C, D, E);
61 t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i]; 102 T_0_15(11, E, A, B, C, D);
62 e = d; d = c; c = rol32(b, 30); b = a; a = t; 103 T_0_15(12, D, E, A, B, C);
63 } 104 T_0_15(13, C, D, E, A, B);
64 105 T_0_15(14, B, C, D, E, A);
65 for (; i < 60; i ++) { 106 T_0_15(15, A, B, C, D, E);
66 t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i]; 107
67 e = d; d = c; c = rol32(b, 30); b = a; a = t; 108 /* Round 1 - tail. Input from 512-bit mixing array */
68 } 109 T_16_19(16, E, A, B, C, D);
69 110 T_16_19(17, D, E, A, B, C);
70 for (; i < 80; i ++) { 111 T_16_19(18, C, D, E, A, B);
71 t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i]; 112 T_16_19(19, B, C, D, E, A);
72 e = d; d = c; c = rol32(b, 30); b = a; a = t; 113
73 } 114 /* Round 2 */
74 115 T_20_39(20, A, B, C, D, E);
75 digest[0] += a; 116 T_20_39(21, E, A, B, C, D);
76 digest[1] += b; 117 T_20_39(22, D, E, A, B, C);
77 digest[2] += c; 118 T_20_39(23, C, D, E, A, B);
78 digest[3] += d; 119 T_20_39(24, B, C, D, E, A);
79 digest[4] += e; 120 T_20_39(25, A, B, C, D, E);
121 T_20_39(26, E, A, B, C, D);
122 T_20_39(27, D, E, A, B, C);
123 T_20_39(28, C, D, E, A, B);
124 T_20_39(29, B, C, D, E, A);
125 T_20_39(30, A, B, C, D, E);
126 T_20_39(31, E, A, B, C, D);
127 T_20_39(32, D, E, A, B, C);
128 T_20_39(33, C, D, E, A, B);
129 T_20_39(34, B, C, D, E, A);
130 T_20_39(35, A, B, C, D, E);
131 T_20_39(36, E, A, B, C, D);
132 T_20_39(37, D, E, A, B, C);
133 T_20_39(38, C, D, E, A, B);
134 T_20_39(39, B, C, D, E, A);
135
136 /* Round 3 */
137 T_40_59(40, A, B, C, D, E);
138 T_40_59(41, E, A, B, C, D);
139 T_40_59(42, D, E, A, B, C);
140 T_40_59(43, C, D, E, A, B);
141 T_40_59(44, B, C, D, E, A);
142 T_40_59(45, A, B, C, D, E);
143 T_40_59(46, E, A, B, C, D);
144 T_40_59(47, D, E, A, B, C);
145 T_40_59(48, C, D, E, A, B);
146 T_40_59(49, B, C, D, E, A);
147 T_40_59(50, A, B, C, D, E);
148 T_40_59(51, E, A, B, C, D);
149 T_40_59(52, D, E, A, B, C);
150 T_40_59(53, C, D, E, A, B);
151 T_40_59(54, B, C, D, E, A);
152 T_40_59(55, A, B, C, D, E);
153 T_40_59(56, E, A, B, C, D);
154 T_40_59(57, D, E, A, B, C);
155 T_40_59(58, C, D, E, A, B);
156 T_40_59(59, B, C, D, E, A);
157
158 /* Round 4 */
159 T_60_79(60, A, B, C, D, E);
160 T_60_79(61, E, A, B, C, D);
161 T_60_79(62, D, E, A, B, C);
162 T_60_79(63, C, D, E, A, B);
163 T_60_79(64, B, C, D, E, A);
164 T_60_79(65, A, B, C, D, E);
165 T_60_79(66, E, A, B, C, D);
166 T_60_79(67, D, E, A, B, C);
167 T_60_79(68, C, D, E, A, B);
168 T_60_79(69, B, C, D, E, A);
169 T_60_79(70, A, B, C, D, E);
170 T_60_79(71, E, A, B, C, D);
171 T_60_79(72, D, E, A, B, C);
172 T_60_79(73, C, D, E, A, B);
173 T_60_79(74, B, C, D, E, A);
174 T_60_79(75, A, B, C, D, E);
175 T_60_79(76, E, A, B, C, D);
176 T_60_79(77, D, E, A, B, C);
177 T_60_79(78, C, D, E, A, B);
178 T_60_79(79, B, C, D, E, A);
179
180 digest[0] += A;
181 digest[1] += B;
182 digest[2] += C;
183 digest[3] += D;
184 digest[4] += E;
80} 185}
81EXPORT_SYMBOL(sha_transform); 186EXPORT_SYMBOL(sha_transform);
82 187
@@ -92,4 +197,3 @@ void sha_init(__u32 *buf)
92 buf[3] = 0x10325476; 197 buf[3] = 0x10325476;
93 buf[4] = 0xc3d2e1f0; 198 buf[4] = 0xc3d2e1f0;
94} 199}
95
diff --git a/mm/failslab.c b/mm/failslab.c
index 1ce58c201dc..0dd7b8fec71 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
34#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 34#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
35static int __init failslab_debugfs_init(void) 35static int __init failslab_debugfs_init(void)
36{ 36{
37 struct dentry *dir;
37 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 38 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
38 int err;
39 39
40 err = init_fault_attr_dentries(&failslab.attr, "failslab"); 40 dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
41 if (err) 41 if (IS_ERR(dir))
42 return err; 42 return PTR_ERR(dir);
43 43
44 if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir, 44 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
45 &failslab.ignore_gfp_wait)) 45 &failslab.ignore_gfp_wait))
46 goto fail; 46 goto fail;
47 if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir, 47 if (!debugfs_create_bool("cache-filter", mode, dir,
48 &failslab.cache_filter)) 48 &failslab.cache_filter))
49 goto fail; 49 goto fail;
50 50
51 return 0; 51 return 0;
52fail: 52fail:
53 cleanup_fault_attr_dentries(&failslab.attr); 53 debugfs_remove_recursive(dir);
54 54
55 return -ENOMEM; 55 return -ENOMEM;
56} 56}
diff --git a/mm/filemap.c b/mm/filemap.c
index 867d40222ec..645a080ba4d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,7 +33,6 @@
33#include <linux/cpuset.h> 33#include <linux/cpuset.h>
34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ 34#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35#include <linux/memcontrol.h> 35#include <linux/memcontrol.h>
36#include <linux/mm_inline.h> /* for page_is_file_cache() */
37#include <linux/cleancache.h> 36#include <linux/cleancache.h>
38#include "internal.h" 37#include "internal.h"
39 38
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
462 int error; 461 int error;
463 462
464 VM_BUG_ON(!PageLocked(page)); 463 VM_BUG_ON(!PageLocked(page));
464 VM_BUG_ON(PageSwapBacked(page));
465 465
466 error = mem_cgroup_cache_charge(page, current->mm, 466 error = mem_cgroup_cache_charge(page, current->mm,
467 gfp_mask & GFP_RECLAIM_MASK); 467 gfp_mask & GFP_RECLAIM_MASK);
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
479 if (likely(!error)) { 479 if (likely(!error)) {
480 mapping->nrpages++; 480 mapping->nrpages++;
481 __inc_zone_page_state(page, NR_FILE_PAGES); 481 __inc_zone_page_state(page, NR_FILE_PAGES);
482 if (PageSwapBacked(page))
483 __inc_zone_page_state(page, NR_SHMEM);
484 spin_unlock_irq(&mapping->tree_lock); 482 spin_unlock_irq(&mapping->tree_lock);
485 } else { 483 } else {
486 page->mapping = NULL; 484 page->mapping = NULL;
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
502{ 500{
503 int ret; 501 int ret;
504 502
505 /*
506 * Splice_read and readahead add shmem/tmpfs pages into the page cache
507 * before shmem_readpage has a chance to mark them as SwapBacked: they
508 * need to go on the anon lru below, and mem_cgroup_cache_charge
509 * (called in add_to_page_cache) needs to know where they're going too.
510 */
511 if (mapping_cap_swap_backed(mapping))
512 SetPageSwapBacked(page);
513
514 ret = add_to_page_cache(page, mapping, offset, gfp_mask); 503 ret = add_to_page_cache(page, mapping, offset, gfp_mask);
515 if (ret == 0) { 504 if (ret == 0)
516 if (page_is_file_cache(page)) 505 lru_cache_add_file(page);
517 lru_cache_add_file(page);
518 else
519 lru_cache_add_anon(page);
520 }
521 return ret; 506 return ret;
522} 507}
523EXPORT_SYMBOL_GPL(add_to_page_cache_lru); 508EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -714,9 +699,16 @@ repeat:
714 page = radix_tree_deref_slot(pagep); 699 page = radix_tree_deref_slot(pagep);
715 if (unlikely(!page)) 700 if (unlikely(!page))
716 goto out; 701 goto out;
717 if (radix_tree_deref_retry(page)) 702 if (radix_tree_exception(page)) {
718 goto repeat; 703 if (radix_tree_deref_retry(page))
719 704 goto repeat;
705 /*
706 * Otherwise, shmem/tmpfs must be storing a swap entry
707 * here as an exceptional entry: so return it without
708 * attempting to raise page count.
709 */
710 goto out;
711 }
720 if (!page_cache_get_speculative(page)) 712 if (!page_cache_get_speculative(page))
721 goto repeat; 713 goto repeat;
722 714
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
753 745
754repeat: 746repeat:
755 page = find_get_page(mapping, offset); 747 page = find_get_page(mapping, offset);
756 if (page) { 748 if (page && !radix_tree_exception(page)) {
757 lock_page(page); 749 lock_page(page);
758 /* Has the page been truncated? */ 750 /* Has the page been truncated? */
759 if (unlikely(page->mapping != mapping)) { 751 if (unlikely(page->mapping != mapping)) {
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
840 rcu_read_lock(); 832 rcu_read_lock();
841restart: 833restart:
842 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 834 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
843 (void ***)pages, start, nr_pages); 835 (void ***)pages, NULL, start, nr_pages);
844 ret = 0; 836 ret = 0;
845 for (i = 0; i < nr_found; i++) { 837 for (i = 0; i < nr_found; i++) {
846 struct page *page; 838 struct page *page;
@@ -849,13 +841,22 @@ repeat:
849 if (unlikely(!page)) 841 if (unlikely(!page))
850 continue; 842 continue;
851 843
852 /* 844 if (radix_tree_exception(page)) {
853 * This can only trigger when the entry at index 0 moves out 845 if (radix_tree_deref_retry(page)) {
854 * of or back to the root: none yet gotten, safe to restart. 846 /*
855 */ 847 * Transient condition which can only trigger
856 if (radix_tree_deref_retry(page)) { 848 * when entry at index 0 moves out of or back
857 WARN_ON(start | i); 849 * to root: none yet gotten, safe to restart.
858 goto restart; 850 */
851 WARN_ON(start | i);
852 goto restart;
853 }
854 /*
855 * Otherwise, shmem/tmpfs must be storing a swap entry
856 * here as an exceptional entry: so skip over it -
857 * we only reach this from invalidate_mapping_pages().
858 */
859 continue;
859 } 860 }
860 861
861 if (!page_cache_get_speculative(page)) 862 if (!page_cache_get_speculative(page))
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
903 rcu_read_lock(); 904 rcu_read_lock();
904restart: 905restart:
905 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 906 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
906 (void ***)pages, index, nr_pages); 907 (void ***)pages, NULL, index, nr_pages);
907 ret = 0; 908 ret = 0;
908 for (i = 0; i < nr_found; i++) { 909 for (i = 0; i < nr_found; i++) {
909 struct page *page; 910 struct page *page;
@@ -912,12 +913,22 @@ repeat:
912 if (unlikely(!page)) 913 if (unlikely(!page))
913 continue; 914 continue;
914 915
915 /* 916 if (radix_tree_exception(page)) {
916 * This can only trigger when the entry at index 0 moves out 917 if (radix_tree_deref_retry(page)) {
917 * of or back to the root: none yet gotten, safe to restart. 918 /*
918 */ 919 * Transient condition which can only trigger
919 if (radix_tree_deref_retry(page)) 920 * when entry at index 0 moves out of or back
920 goto restart; 921 * to root: none yet gotten, safe to restart.
922 */
923 goto restart;
924 }
925 /*
926 * Otherwise, shmem/tmpfs must be storing a swap entry
927 * here as an exceptional entry: so stop looking for
928 * contiguous pages.
929 */
930 break;
931 }
921 932
922 if (!page_cache_get_speculative(page)) 933 if (!page_cache_get_speculative(page))
923 goto repeat; 934 goto repeat;
@@ -977,12 +988,21 @@ repeat:
977 if (unlikely(!page)) 988 if (unlikely(!page))
978 continue; 989 continue;
979 990
980 /* 991 if (radix_tree_exception(page)) {
981 * This can only trigger when the entry at index 0 moves out 992 if (radix_tree_deref_retry(page)) {
982 * of or back to the root: none yet gotten, safe to restart. 993 /*
983 */ 994 * Transient condition which can only trigger
984 if (radix_tree_deref_retry(page)) 995 * when entry at index 0 moves out of or back
985 goto restart; 996 * to root: none yet gotten, safe to restart.
997 */
998 goto restart;
999 }
1000 /*
1001 * This function is never used on a shmem/tmpfs
1002 * mapping, so a swap entry won't be found here.
1003 */
1004 BUG();
1005 }
986 1006
987 if (!page_cache_get_speculative(page)) 1007 if (!page_cache_get_speculative(page))
988 goto repeat; 1008 goto repeat;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5f84d2351dd..f4ec4e7ca4c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,7 +35,6 @@
35#include <linux/limits.h> 35#include <linux/limits.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/rbtree.h> 37#include <linux/rbtree.h>
38#include <linux/shmem_fs.h>
39#include <linux/slab.h> 38#include <linux/slab.h>
40#include <linux/swap.h> 39#include <linux/swap.h>
41#include <linux/swapops.h> 40#include <linux/swapops.h>
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2873 return 0; 2872 return 0;
2874 if (PageCompound(page)) 2873 if (PageCompound(page))
2875 return 0; 2874 return 0;
2876 /*
2877 * Corner case handling. This is called from add_to_page_cache()
2878 * in usual. But some FS (shmem) precharges this page before calling it
2879 * and call add_to_page_cache() with GFP_NOWAIT.
2880 *
2881 * For GFP_NOWAIT case, the page may be pre-charged before calling
2882 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
2883 * charge twice. (It works but has to pay a bit larger cost.)
2884 * And when the page is SwapCache, it should take swap information
2885 * into account. This is under lock_page() now.
2886 */
2887 if (!(gfp_mask & __GFP_WAIT)) {
2888 struct page_cgroup *pc;
2889
2890 pc = lookup_page_cgroup(page);
2891 if (!pc)
2892 return 0;
2893 lock_page_cgroup(pc);
2894 if (PageCgroupUsed(pc)) {
2895 unlock_page_cgroup(pc);
2896 return 0;
2897 }
2898 unlock_page_cgroup(pc);
2899 }
2900 2875
2901 if (unlikely(!mm)) 2876 if (unlikely(!mm))
2902 mm = &init_mm; 2877 mm = &init_mm;
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
3486 cgroup_release_and_wakeup_rmdir(&mem->css); 3461 cgroup_release_and_wakeup_rmdir(&mem->css);
3487} 3462}
3488 3463
3489/*
3490 * A call to try to shrink memory usage on charge failure at shmem's swapin.
3491 * Calling hierarchical_reclaim is not enough because we should update
3492 * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
3493 * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
3494 * not from the memcg which this page would be charged to.
3495 * try_charge_swapin does all of these works properly.
3496 */
3497int mem_cgroup_shmem_charge_fallback(struct page *page,
3498 struct mm_struct *mm,
3499 gfp_t gfp_mask)
3500{
3501 struct mem_cgroup *mem;
3502 int ret;
3503
3504 if (mem_cgroup_disabled())
3505 return 0;
3506
3507 ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
3508 if (!ret)
3509 mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
3510
3511 return ret;
3512}
3513
3514#ifdef CONFIG_DEBUG_VM 3464#ifdef CONFIG_DEBUG_VM
3515static struct page_cgroup *lookup_page_cgroup_used(struct page *page) 3465static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
3516{ 3466{
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
5330 pgoff = pte_to_pgoff(ptent); 5280 pgoff = pte_to_pgoff(ptent);
5331 5281
5332 /* page is moved even if it's not RSS of this task(page-faulted). */ 5282 /* page is moved even if it's not RSS of this task(page-faulted). */
5333 if (!mapping_cap_swap_backed(mapping)) { /* normal file */ 5283 page = find_get_page(mapping, pgoff);
5334 page = find_get_page(mapping, pgoff); 5284
5335 } else { /* shmem/tmpfs file. we should take account of swap too. */ 5285#ifdef CONFIG_SWAP
5336 swp_entry_t ent; 5286 /* shmem/tmpfs may report page out on swap: account for that too. */
5337 mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); 5287 if (radix_tree_exceptional_entry(page)) {
5288 swp_entry_t swap = radix_to_swp_entry(page);
5338 if (do_swap_account) 5289 if (do_swap_account)
5339 entry->val = ent.val; 5290 *entry = swap;
5291 page = find_get_page(&swapper_space, swap.val);
5340 } 5292 }
5341 5293#endif
5342 return page; 5294 return page;
5343} 5295}
5344 5296
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 740c4f52059..2b43ba051ac 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -53,6 +53,7 @@
53#include <linux/hugetlb.h> 53#include <linux/hugetlb.h>
54#include <linux/memory_hotplug.h> 54#include <linux/memory_hotplug.h>
55#include <linux/mm_inline.h> 55#include <linux/mm_inline.h>
56#include <linux/kfifo.h>
56#include "internal.h" 57#include "internal.h"
57 58
58int sysctl_memory_failure_early_kill __read_mostly = 0; 59int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
1178 __memory_failure(pfn, trapno, 0); 1179 __memory_failure(pfn, trapno, 0);
1179} 1180}
1180 1181
1182#define MEMORY_FAILURE_FIFO_ORDER 4
1183#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
1184
1185struct memory_failure_entry {
1186 unsigned long pfn;
1187 int trapno;
1188 int flags;
1189};
1190
1191struct memory_failure_cpu {
1192 DECLARE_KFIFO(fifo, struct memory_failure_entry,
1193 MEMORY_FAILURE_FIFO_SIZE);
1194 spinlock_t lock;
1195 struct work_struct work;
1196};
1197
1198static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
1199
1200/**
1201 * memory_failure_queue - Schedule handling memory failure of a page.
1202 * @pfn: Page Number of the corrupted page
1203 * @trapno: Trap number reported in the signal to user space.
1204 * @flags: Flags for memory failure handling
1205 *
1206 * This function is called by the low level hardware error handler
1207 * when it detects hardware memory corruption of a page. It schedules
1208 * the recovering of error page, including dropping pages, killing
1209 * processes etc.
1210 *
1211 * The function is primarily of use for corruptions that
1212 * happen outside the current execution context (e.g. when
1213 * detected by a background scrubber)
1214 *
1215 * Can run in IRQ context.
1216 */
1217void memory_failure_queue(unsigned long pfn, int trapno, int flags)
1218{
1219 struct memory_failure_cpu *mf_cpu;
1220 unsigned long proc_flags;
1221 struct memory_failure_entry entry = {
1222 .pfn = pfn,
1223 .trapno = trapno,
1224 .flags = flags,
1225 };
1226
1227 mf_cpu = &get_cpu_var(memory_failure_cpu);
1228 spin_lock_irqsave(&mf_cpu->lock, proc_flags);
1229 if (kfifo_put(&mf_cpu->fifo, &entry))
1230 schedule_work_on(smp_processor_id(), &mf_cpu->work);
1231 else
1232 pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
1233 pfn);
1234 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1235 put_cpu_var(memory_failure_cpu);
1236}
1237EXPORT_SYMBOL_GPL(memory_failure_queue);
1238
1239static void memory_failure_work_func(struct work_struct *work)
1240{
1241 struct memory_failure_cpu *mf_cpu;
1242 struct memory_failure_entry entry = { 0, };
1243 unsigned long proc_flags;
1244 int gotten;
1245
1246 mf_cpu = &__get_cpu_var(memory_failure_cpu);
1247 for (;;) {
1248 spin_lock_irqsave(&mf_cpu->lock, proc_flags);
1249 gotten = kfifo_get(&mf_cpu->fifo, &entry);
1250 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1251 if (!gotten)
1252 break;
1253 __memory_failure(entry.pfn, entry.trapno, entry.flags);
1254 }
1255}
1256
1257static int __init memory_failure_init(void)
1258{
1259 struct memory_failure_cpu *mf_cpu;
1260 int cpu;
1261
1262 for_each_possible_cpu(cpu) {
1263 mf_cpu = &per_cpu(memory_failure_cpu, cpu);
1264 spin_lock_init(&mf_cpu->lock);
1265 INIT_KFIFO(mf_cpu->fifo);
1266 INIT_WORK(&mf_cpu->work, memory_failure_work_func);
1267 }
1268
1269 return 0;
1270}
1271core_initcall(memory_failure_init);
1272
1181/** 1273/**
1182 * unpoison_memory - Unpoison a previously poisoned page 1274 * unpoison_memory - Unpoison a previously poisoned page
1183 * @pfn: Page number of the to be unpoisoned page 1275 * @pfn: Page number of the to be unpoisoned page
diff --git a/mm/mincore.c b/mm/mincore.c
index a4e6b9d75c7..636a86876ff 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
69 * file will not get a swp_entry_t in its pte, but rather it is like 69 * file will not get a swp_entry_t in its pte, but rather it is like
70 * any other file mapping (ie. marked !present and faulted in with 70 * any other file mapping (ie. marked !present and faulted in with
71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here. 71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
72 *
73 * However when tmpfs moves the page from pagecache and into swapcache,
74 * it is still in core, but the find_get_page below won't find it.
75 * No big deal, but make a note of it.
76 */ 72 */
77 page = find_get_page(mapping, pgoff); 73 page = find_get_page(mapping, pgoff);
74#ifdef CONFIG_SWAP
75 /* shmem/tmpfs may return swap: account for swapcache page too. */
76 if (radix_tree_exceptional_entry(page)) {
77 swp_entry_t swap = radix_to_swp_entry(page);
78 page = find_get_page(&swapper_space, swap.val);
79 }
80#endif
78 if (page) { 81 if (page) {
79 present = PageUptodate(page); 82 present = PageUptodate(page);
80 page_cache_release(page); 83 page_cache_release(page);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index eafff89b3dd..626303b52f3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
303 do_each_thread(g, p) { 303 do_each_thread(g, p) {
304 unsigned int points; 304 unsigned int points;
305 305
306 if (!p->mm) 306 if (p->exit_state)
307 continue; 307 continue;
308 if (oom_unkillable_task(p, mem, nodemask)) 308 if (oom_unkillable_task(p, mem, nodemask))
309 continue; 309 continue;
@@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
319 */ 319 */
320 if (test_tsk_thread_flag(p, TIF_MEMDIE)) 320 if (test_tsk_thread_flag(p, TIF_MEMDIE))
321 return ERR_PTR(-1UL); 321 return ERR_PTR(-1UL);
322 if (!p->mm)
323 continue;
322 324
323 if (p->flags & PF_EXITING) { 325 if (p->flags & PF_EXITING) {
324 /* 326 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1dbcf8888f1..6e8ecb6e021 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void)
1409{ 1409{
1410 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; 1410 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
1411 struct dentry *dir; 1411 struct dentry *dir;
1412 int err;
1413 1412
1414 err = init_fault_attr_dentries(&fail_page_alloc.attr, 1413 dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
1415 "fail_page_alloc"); 1414 &fail_page_alloc.attr);
1416 if (err) 1415 if (IS_ERR(dir))
1417 return err; 1416 return PTR_ERR(dir);
1418
1419 dir = fail_page_alloc.attr.dir;
1420 1417
1421 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, 1418 if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
1422 &fail_page_alloc.ignore_gfp_wait)) 1419 &fail_page_alloc.ignore_gfp_wait))
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void)
1430 1427
1431 return 0; 1428 return 0;
1432fail: 1429fail:
1433 cleanup_fault_attr_dentries(&fail_page_alloc.attr); 1430 debugfs_remove_recursive(dir);
1434 1431
1435 return -ENOMEM; 1432 return -ENOMEM;
1436} 1433}
diff --git a/mm/shmem.c b/mm/shmem.c
index 5cc21f8b4cd..32f6763f16f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -6,7 +6,8 @@
6 * 2000-2001 Christoph Rohland 6 * 2000-2001 Christoph Rohland
7 * 2000-2001 SAP AG 7 * 2000-2001 SAP AG
8 * 2002 Red Hat Inc. 8 * 2002 Red Hat Inc.
9 * Copyright (C) 2002-2005 Hugh Dickins. 9 * Copyright (C) 2002-2011 Hugh Dickins.
10 * Copyright (C) 2011 Google Inc.
10 * Copyright (C) 2002-2005 VERITAS Software Corporation. 11 * Copyright (C) 2002-2005 VERITAS Software Corporation.
11 * Copyright (C) 2004 Andi Kleen, SuSE Labs 12 * Copyright (C) 2004 Andi Kleen, SuSE Labs
12 * 13 *
@@ -28,7 +29,6 @@
28#include <linux/file.h> 29#include <linux/file.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
30#include <linux/module.h> 31#include <linux/module.h>
31#include <linux/percpu_counter.h>
32#include <linux/swap.h> 32#include <linux/swap.h>
33 33
34static struct vfsmount *shm_mnt; 34static struct vfsmount *shm_mnt;
@@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt;
51#include <linux/shmem_fs.h> 51#include <linux/shmem_fs.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/blkdev.h> 53#include <linux/blkdev.h>
54#include <linux/pagevec.h>
55#include <linux/percpu_counter.h>
54#include <linux/splice.h> 56#include <linux/splice.h>
55#include <linux/security.h> 57#include <linux/security.h>
56#include <linux/swapops.h> 58#include <linux/swapops.h>
@@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt;
63#include <linux/magic.h> 65#include <linux/magic.h>
64 66
65#include <asm/uaccess.h> 67#include <asm/uaccess.h>
66#include <asm/div64.h>
67#include <asm/pgtable.h> 68#include <asm/pgtable.h>
68 69
69/*
70 * The maximum size of a shmem/tmpfs file is limited by the maximum size of
71 * its triple-indirect swap vector - see illustration at shmem_swp_entry().
72 *
73 * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
74 * but one eighth of that on a 64-bit kernel. With 8kB page size, maximum
75 * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
76 * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
77 *
78 * We use / and * instead of shifts in the definitions below, so that the swap
79 * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
80 */
81#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
82#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
83
84#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
85#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
86
87#define SHMEM_MAX_BYTES min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
88#define SHMEM_MAX_INDEX ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
89
90#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) 70#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
91#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 71#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
92 72
93/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
94#define SHMEM_PAGEIN VM_READ
95#define SHMEM_TRUNCATE VM_WRITE
96
97/* Definition to limit shmem_truncate's steps between cond_rescheds */
98#define LATENCY_LIMIT 64
99
100/* Pretend that each entry is of this size in directory's i_size */ 73/* Pretend that each entry is of this size in directory's i_size */
101#define BOGO_DIRENT_SIZE 20 74#define BOGO_DIRENT_SIZE 20
102 75
76/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
77#define SHORT_SYMLINK_LEN 128
78
103struct shmem_xattr { 79struct shmem_xattr {
104 struct list_head list; /* anchored by shmem_inode_info->xattr_list */ 80 struct list_head list; /* anchored by shmem_inode_info->xattr_list */
105 char *name; /* xattr name */ 81 char *name; /* xattr name */
@@ -107,7 +83,7 @@ struct shmem_xattr {
107 char value[0]; 83 char value[0];
108}; 84};
109 85
110/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ 86/* Flag allocation requirements to shmem_getpage */
111enum sgp_type { 87enum sgp_type {
112 SGP_READ, /* don't exceed i_size, don't allocate page */ 88 SGP_READ, /* don't exceed i_size, don't allocate page */
113 SGP_CACHE, /* don't exceed i_size, may allocate page */ 89 SGP_CACHE, /* don't exceed i_size, may allocate page */
@@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index,
137 mapping_gfp_mask(inode->i_mapping), fault_type); 113 mapping_gfp_mask(inode->i_mapping), fault_type);
138} 114}
139 115
140static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
141{
142 /*
143 * The above definition of ENTRIES_PER_PAGE, and the use of
144 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
145 * might be reconsidered if it ever diverges from PAGE_SIZE.
146 *
147 * Mobility flags are masked out as swap vectors cannot move
148 */
149 return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
150 PAGE_CACHE_SHIFT-PAGE_SHIFT);
151}
152
153static inline void shmem_dir_free(struct page *page)
154{
155 __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
156}
157
158static struct page **shmem_dir_map(struct page *page)
159{
160 return (struct page **)kmap_atomic(page, KM_USER0);
161}
162
163static inline void shmem_dir_unmap(struct page **dir)
164{
165 kunmap_atomic(dir, KM_USER0);
166}
167
168static swp_entry_t *shmem_swp_map(struct page *page)
169{
170 return (swp_entry_t *)kmap_atomic(page, KM_USER1);
171}
172
173static inline void shmem_swp_balance_unmap(void)
174{
175 /*
176 * When passing a pointer to an i_direct entry, to code which
177 * also handles indirect entries and so will shmem_swp_unmap,
178 * we must arrange for the preempt count to remain in balance.
179 * What kmap_atomic of a lowmem page does depends on config
180 * and architecture, so pretend to kmap_atomic some lowmem page.
181 */
182 (void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
183}
184
185static inline void shmem_swp_unmap(swp_entry_t *entry)
186{
187 kunmap_atomic(entry, KM_USER1);
188}
189
190static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) 116static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
191{ 117{
192 return sb->s_fs_info; 118 return sb->s_fs_info;
@@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
244static LIST_HEAD(shmem_swaplist); 170static LIST_HEAD(shmem_swaplist);
245static DEFINE_MUTEX(shmem_swaplist_mutex); 171static DEFINE_MUTEX(shmem_swaplist_mutex);
246 172
247static void shmem_free_blocks(struct inode *inode, long pages)
248{
249 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
250 if (sbinfo->max_blocks) {
251 percpu_counter_add(&sbinfo->used_blocks, -pages);
252 inode->i_blocks -= pages*BLOCKS_PER_PAGE;
253 }
254}
255
256static int shmem_reserve_inode(struct super_block *sb) 173static int shmem_reserve_inode(struct super_block *sb)
257{ 174{
258 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 175 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
@@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb)
279} 196}
280 197
281/** 198/**
282 * shmem_recalc_inode - recalculate the size of an inode 199 * shmem_recalc_inode - recalculate the block usage of an inode
283 * @inode: inode to recalc 200 * @inode: inode to recalc
284 * 201 *
285 * We have to calculate the free blocks since the mm can drop 202 * We have to calculate the free blocks since the mm can drop
@@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode)
297 214
298 freed = info->alloced - info->swapped - inode->i_mapping->nrpages; 215 freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
299 if (freed > 0) { 216 if (freed > 0) {
217 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
218 if (sbinfo->max_blocks)
219 percpu_counter_add(&sbinfo->used_blocks, -freed);
300 info->alloced -= freed; 220 info->alloced -= freed;
221 inode->i_blocks -= freed * BLOCKS_PER_PAGE;
301 shmem_unacct_blocks(info->flags, freed); 222 shmem_unacct_blocks(info->flags, freed);
302 shmem_free_blocks(inode, freed);
303 } 223 }
304} 224}
305 225
306/** 226/*
307 * shmem_swp_entry - find the swap vector position in the info structure 227 * Replace item expected in radix tree by a new item, while holding tree lock.
308 * @info: info structure for the inode
309 * @index: index of the page to find
310 * @page: optional page to add to the structure. Has to be preset to
311 * all zeros
312 *
313 * If there is no space allocated yet it will return NULL when
314 * page is NULL, else it will use the page for the needed block,
315 * setting it to NULL on return to indicate that it has been used.
316 *
317 * The swap vector is organized the following way:
318 *
319 * There are SHMEM_NR_DIRECT entries directly stored in the
320 * shmem_inode_info structure. So small files do not need an addional
321 * allocation.
322 *
323 * For pages with index > SHMEM_NR_DIRECT there is the pointer
324 * i_indirect which points to a page which holds in the first half
325 * doubly indirect blocks, in the second half triple indirect blocks:
326 *
327 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
328 * following layout (for SHMEM_NR_DIRECT == 16):
329 *
330 * i_indirect -> dir --> 16-19
331 * | +-> 20-23
332 * |
333 * +-->dir2 --> 24-27
334 * | +-> 28-31
335 * | +-> 32-35
336 * | +-> 36-39
337 * |
338 * +-->dir3 --> 40-43
339 * +-> 44-47
340 * +-> 48-51
341 * +-> 52-55
342 */ 228 */
343static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) 229static int shmem_radix_tree_replace(struct address_space *mapping,
344{ 230 pgoff_t index, void *expected, void *replacement)
345 unsigned long offset; 231{
346 struct page **dir; 232 void **pslot;
347 struct page *subdir; 233 void *item = NULL;
348 234
349 if (index < SHMEM_NR_DIRECT) { 235 VM_BUG_ON(!expected);
350 shmem_swp_balance_unmap(); 236 pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
351 return info->i_direct+index; 237 if (pslot)
352 } 238 item = radix_tree_deref_slot_protected(pslot,
353 if (!info->i_indirect) { 239 &mapping->tree_lock);
354 if (page) { 240 if (item != expected)
355 info->i_indirect = *page; 241 return -ENOENT;
356 *page = NULL; 242 if (replacement)
357 } 243 radix_tree_replace_slot(pslot, replacement);
358 return NULL; /* need another page */ 244 else
359 } 245 radix_tree_delete(&mapping->page_tree, index);
360 246 return 0;
361 index -= SHMEM_NR_DIRECT; 247}
362 offset = index % ENTRIES_PER_PAGE;
363 index /= ENTRIES_PER_PAGE;
364 dir = shmem_dir_map(info->i_indirect);
365
366 if (index >= ENTRIES_PER_PAGE/2) {
367 index -= ENTRIES_PER_PAGE/2;
368 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
369 index %= ENTRIES_PER_PAGE;
370 subdir = *dir;
371 if (!subdir) {
372 if (page) {
373 *dir = *page;
374 *page = NULL;
375 }
376 shmem_dir_unmap(dir);
377 return NULL; /* need another page */
378 }
379 shmem_dir_unmap(dir);
380 dir = shmem_dir_map(subdir);
381 }
382 248
383 dir += index; 249/*
384 subdir = *dir; 250 * Like add_to_page_cache_locked, but error if expected item has gone.
385 if (!subdir) { 251 */
386 if (!page || !(subdir = *page)) { 252static int shmem_add_to_page_cache(struct page *page,
387 shmem_dir_unmap(dir); 253 struct address_space *mapping,
388 return NULL; /* need a page */ 254 pgoff_t index, gfp_t gfp, void *expected)
255{
256 int error = 0;
257
258 VM_BUG_ON(!PageLocked(page));
259 VM_BUG_ON(!PageSwapBacked(page));
260
261 if (!expected)
262 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
263 if (!error) {
264 page_cache_get(page);
265 page->mapping = mapping;
266 page->index = index;
267
268 spin_lock_irq(&mapping->tree_lock);
269 if (!expected)
270 error = radix_tree_insert(&mapping->page_tree,
271 index, page);
272 else
273 error = shmem_radix_tree_replace(mapping, index,
274 expected, page);
275 if (!error) {
276 mapping->nrpages++;
277 __inc_zone_page_state(page, NR_FILE_PAGES);
278 __inc_zone_page_state(page, NR_SHMEM);
279 spin_unlock_irq(&mapping->tree_lock);
280 } else {
281 page->mapping = NULL;
282 spin_unlock_irq(&mapping->tree_lock);
283 page_cache_release(page);
389 } 284 }
390 *dir = subdir; 285 if (!expected)
391 *page = NULL; 286 radix_tree_preload_end();
392 } 287 }
393 shmem_dir_unmap(dir); 288 if (error)
394 return shmem_swp_map(subdir) + offset; 289 mem_cgroup_uncharge_cache_page(page);
290 return error;
395} 291}
396 292
397static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) 293/*
294 * Like delete_from_page_cache, but substitutes swap for page.
295 */
296static void shmem_delete_from_page_cache(struct page *page, void *radswap)
398{ 297{
399 long incdec = value? 1: -1; 298 struct address_space *mapping = page->mapping;
299 int error;
400 300
401 entry->val = value; 301 spin_lock_irq(&mapping->tree_lock);
402 info->swapped += incdec; 302 error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
403 if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { 303 page->mapping = NULL;
404 struct page *page = kmap_atomic_to_page(entry); 304 mapping->nrpages--;
405 set_page_private(page, page_private(page) + incdec); 305 __dec_zone_page_state(page, NR_FILE_PAGES);
406 } 306 __dec_zone_page_state(page, NR_SHMEM);
307 spin_unlock_irq(&mapping->tree_lock);
308 page_cache_release(page);
309 BUG_ON(error);
407} 310}
408 311
409/** 312/*
410 * shmem_swp_alloc - get the position of the swap entry for the page. 313 * Like find_get_pages, but collecting swap entries as well as pages.
411 * @info: info structure for the inode
412 * @index: index of the page to find
413 * @sgp: check and recheck i_size? skip allocation?
414 * @gfp: gfp mask to use for any page allocation
415 *
416 * If the entry does not exist, allocate it.
417 */ 314 */
418static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, 315static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
419 unsigned long index, enum sgp_type sgp, gfp_t gfp) 316 pgoff_t start, unsigned int nr_pages,
420{ 317 struct page **pages, pgoff_t *indices)
421 struct inode *inode = &info->vfs_inode; 318{
422 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 319 unsigned int i;
423 struct page *page = NULL; 320 unsigned int ret;
424 swp_entry_t *entry; 321 unsigned int nr_found;
425 322
426 if (sgp != SGP_WRITE && 323 rcu_read_lock();
427 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) 324restart:
428 return ERR_PTR(-EINVAL); 325 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
429 326 (void ***)pages, indices, start, nr_pages);
430 while (!(entry = shmem_swp_entry(info, index, &page))) { 327 ret = 0;
431 if (sgp == SGP_READ) 328 for (i = 0; i < nr_found; i++) {
432 return shmem_swp_map(ZERO_PAGE(0)); 329 struct page *page;
433 /* 330repeat:
434 * Test used_blocks against 1 less max_blocks, since we have 1 data 331 page = radix_tree_deref_slot((void **)pages[i]);
435 * page (and perhaps indirect index pages) yet to allocate: 332 if (unlikely(!page))
436 * a waste to allocate index if we cannot allocate data. 333 continue;
437 */ 334 if (radix_tree_exception(page)) {
438 if (sbinfo->max_blocks) { 335 if (radix_tree_deref_retry(page))
439 if (percpu_counter_compare(&sbinfo->used_blocks, 336 goto restart;
440 sbinfo->max_blocks - 1) >= 0) 337 /*
441 return ERR_PTR(-ENOSPC); 338 * Otherwise, we must be storing a swap entry
442 percpu_counter_inc(&sbinfo->used_blocks); 339 * here as an exceptional entry: so return it
443 inode->i_blocks += BLOCKS_PER_PAGE; 340 * without attempting to raise page count.
341 */
342 goto export;
444 } 343 }
344 if (!page_cache_get_speculative(page))
345 goto repeat;
445 346
446 spin_unlock(&info->lock); 347 /* Has the page moved? */
447 page = shmem_dir_alloc(gfp); 348 if (unlikely(page != *((void **)pages[i]))) {
448 spin_lock(&info->lock); 349 page_cache_release(page);
449 350 goto repeat;
450 if (!page) {
451 shmem_free_blocks(inode, 1);
452 return ERR_PTR(-ENOMEM);
453 }
454 if (sgp != SGP_WRITE &&
455 ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
456 entry = ERR_PTR(-EINVAL);
457 break;
458 } 351 }
459 if (info->next_index <= index) 352export:
460 info->next_index = index + 1; 353 indices[ret] = indices[i];
461 } 354 pages[ret] = page;
462 if (page) { 355 ret++;
463 /* another task gave its page, or truncated the file */ 356 }
464 shmem_free_blocks(inode, 1); 357 if (unlikely(!ret && nr_found))
465 shmem_dir_free(page); 358 goto restart;
466 } 359 rcu_read_unlock();
467 if (info->next_index <= index && !IS_ERR(entry)) 360 return ret;
468 info->next_index = index + 1;
469 return entry;
470} 361}
471 362
472/** 363/*
473 * shmem_free_swp - free some swap entries in a directory 364 * Remove swap entry from radix tree, free the swap and its page cache.
474 * @dir: pointer to the directory
475 * @edir: pointer after last entry of the directory
476 * @punch_lock: pointer to spinlock when needed for the holepunch case
477 */ 365 */
478static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, 366static int shmem_free_swap(struct address_space *mapping,
479 spinlock_t *punch_lock) 367 pgoff_t index, void *radswap)
480{ 368{
481 spinlock_t *punch_unlock = NULL; 369 int error;
482 swp_entry_t *ptr; 370
483 int freed = 0; 371 spin_lock_irq(&mapping->tree_lock);
484 372 error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
485 for (ptr = dir; ptr < edir; ptr++) { 373 spin_unlock_irq(&mapping->tree_lock);
486 if (ptr->val) { 374 if (!error)
487 if (unlikely(punch_lock)) { 375 free_swap_and_cache(radix_to_swp_entry(radswap));
488 punch_unlock = punch_lock; 376 return error;
489 punch_lock = NULL;
490 spin_lock(punch_unlock);
491 if (!ptr->val)
492 continue;
493 }
494 free_swap_and_cache(*ptr);
495 *ptr = (swp_entry_t){0};
496 freed++;
497 }
498 }
499 if (punch_unlock)
500 spin_unlock(punch_unlock);
501 return freed;
502}
503
504static int shmem_map_and_free_swp(struct page *subdir, int offset,
505 int limit, struct page ***dir, spinlock_t *punch_lock)
506{
507 swp_entry_t *ptr;
508 int freed = 0;
509
510 ptr = shmem_swp_map(subdir);
511 for (; offset < limit; offset += LATENCY_LIMIT) {
512 int size = limit - offset;
513 if (size > LATENCY_LIMIT)
514 size = LATENCY_LIMIT;
515 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
516 punch_lock);
517 if (need_resched()) {
518 shmem_swp_unmap(ptr);
519 if (*dir) {
520 shmem_dir_unmap(*dir);
521 *dir = NULL;
522 }
523 cond_resched();
524 ptr = shmem_swp_map(subdir);
525 }
526 }
527 shmem_swp_unmap(ptr);
528 return freed;
529} 377}
530 378
531static void shmem_free_pages(struct list_head *next) 379/*
380 * Pagevec may contain swap entries, so shuffle up pages before releasing.
381 */
382static void shmem_pagevec_release(struct pagevec *pvec)
532{ 383{
533 struct page *page; 384 int i, j;
534 int freed = 0; 385
535 386 for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
536 do { 387 struct page *page = pvec->pages[i];
537 page = container_of(next, struct page, lru); 388 if (!radix_tree_exceptional_entry(page))
538 next = next->next; 389 pvec->pages[j++] = page;
539 shmem_dir_free(page); 390 }
540 freed++; 391 pvec->nr = j;
541 if (freed >= LATENCY_LIMIT) { 392 pagevec_release(pvec);
542 cond_resched();
543 freed = 0;
544 }
545 } while (next);
546} 393}
547 394
548void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) 395/*
396 * Remove range of pages and swap entries from radix tree, and free them.
397 */
398void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
549{ 399{
400 struct address_space *mapping = inode->i_mapping;
550 struct shmem_inode_info *info = SHMEM_I(inode); 401 struct shmem_inode_info *info = SHMEM_I(inode);
551 unsigned long idx; 402 pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
552 unsigned long size; 403 unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
553 unsigned long limit; 404 pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
554 unsigned long stage; 405 struct pagevec pvec;
555 unsigned long diroff; 406 pgoff_t indices[PAGEVEC_SIZE];
556 struct page **dir;
557 struct page *topdir;
558 struct page *middir;
559 struct page *subdir;
560 swp_entry_t *ptr;
561 LIST_HEAD(pages_to_free);
562 long nr_pages_to_free = 0;
563 long nr_swaps_freed = 0; 407 long nr_swaps_freed = 0;
564 int offset; 408 pgoff_t index;
565 int freed; 409 int i;
566 int punch_hole;
567 spinlock_t *needs_lock;
568 spinlock_t *punch_lock;
569 unsigned long upper_limit;
570 410
571 truncate_inode_pages_range(inode->i_mapping, start, end); 411 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
572 412
573 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 413 pagevec_init(&pvec, 0);
574 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 414 index = start;
575 if (idx >= info->next_index) 415 while (index <= end) {
576 return; 416 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
417 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
418 pvec.pages, indices);
419 if (!pvec.nr)
420 break;
421 mem_cgroup_uncharge_start();
422 for (i = 0; i < pagevec_count(&pvec); i++) {
423 struct page *page = pvec.pages[i];
577 424
578 spin_lock(&info->lock); 425 index = indices[i];
579 info->flags |= SHMEM_TRUNCATE; 426 if (index > end)
580 if (likely(end == (loff_t) -1)) { 427 break;
581 limit = info->next_index; 428
582 upper_limit = SHMEM_MAX_INDEX; 429 if (radix_tree_exceptional_entry(page)) {
583 info->next_index = idx; 430 nr_swaps_freed += !shmem_free_swap(mapping,
584 needs_lock = NULL; 431 index, page);
585 punch_hole = 0; 432 continue;
586 } else { 433 }
587 if (end + 1 >= inode->i_size) { /* we may free a little more */
588 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
589 PAGE_CACHE_SHIFT;
590 upper_limit = SHMEM_MAX_INDEX;
591 } else {
592 limit = (end + 1) >> PAGE_CACHE_SHIFT;
593 upper_limit = limit;
594 }
595 needs_lock = &info->lock;
596 punch_hole = 1;
597 }
598 434
599 topdir = info->i_indirect; 435 if (!trylock_page(page))
600 if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { 436 continue;
601 info->i_indirect = NULL; 437 if (page->mapping == mapping) {
602 nr_pages_to_free++; 438 VM_BUG_ON(PageWriteback(page));
603 list_add(&topdir->lru, &pages_to_free); 439 truncate_inode_page(mapping, page);
440 }
441 unlock_page(page);
442 }
443 shmem_pagevec_release(&pvec);
444 mem_cgroup_uncharge_end();
445 cond_resched();
446 index++;
604 } 447 }
605 spin_unlock(&info->lock);
606 448
607 if (info->swapped && idx < SHMEM_NR_DIRECT) { 449 if (partial) {
608 ptr = info->i_direct; 450 struct page *page = NULL;
609 size = limit; 451 shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
610 if (size > SHMEM_NR_DIRECT) 452 if (page) {
611 size = SHMEM_NR_DIRECT; 453 zero_user_segment(page, partial, PAGE_CACHE_SIZE);
612 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); 454 set_page_dirty(page);
455 unlock_page(page);
456 page_cache_release(page);
457 }
613 } 458 }
614 459
615 /* 460 index = start;
616 * If there are no indirect blocks or we are punching a hole 461 for ( ; ; ) {
617 * below indirect blocks, nothing to be done. 462 cond_resched();
618 */ 463 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
619 if (!topdir || limit <= SHMEM_NR_DIRECT) 464 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
620 goto done2; 465 pvec.pages, indices);
466 if (!pvec.nr) {
467 if (index == start)
468 break;
469 index = start;
470 continue;
471 }
472 if (index == start && indices[0] > end) {
473 shmem_pagevec_release(&pvec);
474 break;
475 }
476 mem_cgroup_uncharge_start();
477 for (i = 0; i < pagevec_count(&pvec); i++) {
478 struct page *page = pvec.pages[i];
621 479
622 /* 480 index = indices[i];
623 * The truncation case has already dropped info->lock, and we're safe 481 if (index > end)
624 * because i_size and next_index have already been lowered, preventing 482 break;
625 * access beyond. But in the punch_hole case, we still need to take
626 * the lock when updating the swap directory, because there might be
627 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
628 * shmem_writepage. However, whenever we find we can remove a whole
629 * directory page (not at the misaligned start or end of the range),
630 * we first NULLify its pointer in the level above, and then have no
631 * need to take the lock when updating its contents: needs_lock and
632 * punch_lock (either pointing to info->lock or NULL) manage this.
633 */
634 483
635 upper_limit -= SHMEM_NR_DIRECT; 484 if (radix_tree_exceptional_entry(page)) {
636 limit -= SHMEM_NR_DIRECT; 485 nr_swaps_freed += !shmem_free_swap(mapping,
637 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 486 index, page);
638 offset = idx % ENTRIES_PER_PAGE; 487 continue;
639 idx -= offset;
640
641 dir = shmem_dir_map(topdir);
642 stage = ENTRIES_PER_PAGEPAGE/2;
643 if (idx < ENTRIES_PER_PAGEPAGE/2) {
644 middir = topdir;
645 diroff = idx/ENTRIES_PER_PAGE;
646 } else {
647 dir += ENTRIES_PER_PAGE/2;
648 dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
649 while (stage <= idx)
650 stage += ENTRIES_PER_PAGEPAGE;
651 middir = *dir;
652 if (*dir) {
653 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
654 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
655 if (!diroff && !offset && upper_limit >= stage) {
656 if (needs_lock) {
657 spin_lock(needs_lock);
658 *dir = NULL;
659 spin_unlock(needs_lock);
660 needs_lock = NULL;
661 } else
662 *dir = NULL;
663 nr_pages_to_free++;
664 list_add(&middir->lru, &pages_to_free);
665 } 488 }
666 shmem_dir_unmap(dir);
667 dir = shmem_dir_map(middir);
668 } else {
669 diroff = 0;
670 offset = 0;
671 idx = stage;
672 }
673 }
674 489
675 for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { 490 lock_page(page);
676 if (unlikely(idx == stage)) { 491 if (page->mapping == mapping) {
677 shmem_dir_unmap(dir); 492 VM_BUG_ON(PageWriteback(page));
678 dir = shmem_dir_map(topdir) + 493 truncate_inode_page(mapping, page);
679 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
680 while (!*dir) {
681 dir++;
682 idx += ENTRIES_PER_PAGEPAGE;
683 if (idx >= limit)
684 goto done1;
685 }
686 stage = idx + ENTRIES_PER_PAGEPAGE;
687 middir = *dir;
688 if (punch_hole)
689 needs_lock = &info->lock;
690 if (upper_limit >= stage) {
691 if (needs_lock) {
692 spin_lock(needs_lock);
693 *dir = NULL;
694 spin_unlock(needs_lock);
695 needs_lock = NULL;
696 } else
697 *dir = NULL;
698 nr_pages_to_free++;
699 list_add(&middir->lru, &pages_to_free);
700 } 494 }
701 shmem_dir_unmap(dir); 495 unlock_page(page);
702 cond_resched();
703 dir = shmem_dir_map(middir);
704 diroff = 0;
705 }
706 punch_lock = needs_lock;
707 subdir = dir[diroff];
708 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
709 if (needs_lock) {
710 spin_lock(needs_lock);
711 dir[diroff] = NULL;
712 spin_unlock(needs_lock);
713 punch_lock = NULL;
714 } else
715 dir[diroff] = NULL;
716 nr_pages_to_free++;
717 list_add(&subdir->lru, &pages_to_free);
718 }
719 if (subdir && page_private(subdir) /* has swap entries */) {
720 size = limit - idx;
721 if (size > ENTRIES_PER_PAGE)
722 size = ENTRIES_PER_PAGE;
723 freed = shmem_map_and_free_swp(subdir,
724 offset, size, &dir, punch_lock);
725 if (!dir)
726 dir = shmem_dir_map(middir);
727 nr_swaps_freed += freed;
728 if (offset || punch_lock) {
729 spin_lock(&info->lock);
730 set_page_private(subdir,
731 page_private(subdir) - freed);
732 spin_unlock(&info->lock);
733 } else
734 BUG_ON(page_private(subdir) != freed);
735 } 496 }
736 offset = 0; 497 shmem_pagevec_release(&pvec);
737 } 498 mem_cgroup_uncharge_end();
738done1: 499 index++;
739 shmem_dir_unmap(dir);
740done2:
741 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
742 /*
743 * Call truncate_inode_pages again: racing shmem_unuse_inode
744 * may have swizzled a page in from swap since
745 * truncate_pagecache or generic_delete_inode did it, before we
746 * lowered next_index. Also, though shmem_getpage checks
747 * i_size before adding to cache, no recheck after: so fix the
748 * narrow window there too.
749 */
750 truncate_inode_pages_range(inode->i_mapping, start, end);
751 } 500 }
752 501
753 spin_lock(&info->lock); 502 spin_lock(&info->lock);
754 info->flags &= ~SHMEM_TRUNCATE;
755 info->swapped -= nr_swaps_freed; 503 info->swapped -= nr_swaps_freed;
756 if (nr_pages_to_free)
757 shmem_free_blocks(inode, nr_pages_to_free);
758 shmem_recalc_inode(inode); 504 shmem_recalc_inode(inode);
759 spin_unlock(&info->lock); 505 spin_unlock(&info->lock);
760 506
761 /* 507 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
762 * Empty swap vector directory pages to be freed?
763 */
764 if (!list_empty(&pages_to_free)) {
765 pages_to_free.prev->next = NULL;
766 shmem_free_pages(pages_to_free.next);
767 }
768} 508}
769EXPORT_SYMBOL_GPL(shmem_truncate_range); 509EXPORT_SYMBOL_GPL(shmem_truncate_range);
770 510
@@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
780 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 520 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
781 loff_t oldsize = inode->i_size; 521 loff_t oldsize = inode->i_size;
782 loff_t newsize = attr->ia_size; 522 loff_t newsize = attr->ia_size;
783 struct page *page = NULL;
784 523
785 if (newsize < oldsize) {
786 /*
787 * If truncating down to a partial page, then
788 * if that page is already allocated, hold it
789 * in memory until the truncation is over, so
790 * truncate_partial_page cannot miss it were
791 * it assigned to swap.
792 */
793 if (newsize & (PAGE_CACHE_SIZE-1)) {
794 (void) shmem_getpage(inode,
795 newsize >> PAGE_CACHE_SHIFT,
796 &page, SGP_READ, NULL);
797 if (page)
798 unlock_page(page);
799 }
800 /*
801 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
802 * detect if any pages might have been added to cache
803 * after truncate_inode_pages. But we needn't bother
804 * if it's being fully truncated to zero-length: the
805 * nrpages check is efficient enough in that case.
806 */
807 if (newsize) {
808 struct shmem_inode_info *info = SHMEM_I(inode);
809 spin_lock(&info->lock);
810 info->flags &= ~SHMEM_PAGEIN;
811 spin_unlock(&info->lock);
812 }
813 }
814 if (newsize != oldsize) { 524 if (newsize != oldsize) {
815 i_size_write(inode, newsize); 525 i_size_write(inode, newsize);
816 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 526 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
822 /* unmap again to remove racily COWed private pages */ 532 /* unmap again to remove racily COWed private pages */
823 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); 533 unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
824 } 534 }
825 if (page)
826 page_cache_release(page);
827 } 535 }
828 536
829 setattr_copy(inode, attr); 537 setattr_copy(inode, attr);
@@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode)
848 list_del_init(&info->swaplist); 556 list_del_init(&info->swaplist);
849 mutex_unlock(&shmem_swaplist_mutex); 557 mutex_unlock(&shmem_swaplist_mutex);
850 } 558 }
851 } 559 } else
560 kfree(info->symlink);
852 561
853 list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) { 562 list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
854 kfree(xattr->name); 563 kfree(xattr->name);
@@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode)
859 end_writeback(inode); 568 end_writeback(inode);
860} 569}
861 570
862static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) 571/*
863{ 572 * If swap found in inode, free it and move page from swapcache to filecache.
864 swp_entry_t *ptr; 573 */
865 574static int shmem_unuse_inode(struct shmem_inode_info *info,
866 for (ptr = dir; ptr < edir; ptr++) { 575 swp_entry_t swap, struct page *page)
867 if (ptr->val == entry.val)
868 return ptr - dir;
869 }
870 return -1;
871}
872
873static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
874{ 576{
875 struct address_space *mapping; 577 struct address_space *mapping = info->vfs_inode.i_mapping;
876 unsigned long idx; 578 void *radswap;
877 unsigned long size; 579 pgoff_t index;
878 unsigned long limit;
879 unsigned long stage;
880 struct page **dir;
881 struct page *subdir;
882 swp_entry_t *ptr;
883 int offset;
884 int error; 580 int error;
885 581
886 idx = 0; 582 radswap = swp_to_radix_entry(swap);
887 ptr = info->i_direct; 583 index = radix_tree_locate_item(&mapping->page_tree, radswap);
888 spin_lock(&info->lock); 584 if (index == -1)
889 if (!info->swapped) { 585 return 0;
890 list_del_init(&info->swaplist);
891 goto lost2;
892 }
893 limit = info->next_index;
894 size = limit;
895 if (size > SHMEM_NR_DIRECT)
896 size = SHMEM_NR_DIRECT;
897 offset = shmem_find_swp(entry, ptr, ptr+size);
898 if (offset >= 0) {
899 shmem_swp_balance_unmap();
900 goto found;
901 }
902 if (!info->i_indirect)
903 goto lost2;
904
905 dir = shmem_dir_map(info->i_indirect);
906 stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
907
908 for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
909 if (unlikely(idx == stage)) {
910 shmem_dir_unmap(dir-1);
911 if (cond_resched_lock(&info->lock)) {
912 /* check it has not been truncated */
913 if (limit > info->next_index) {
914 limit = info->next_index;
915 if (idx >= limit)
916 goto lost2;
917 }
918 }
919 dir = shmem_dir_map(info->i_indirect) +
920 ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
921 while (!*dir) {
922 dir++;
923 idx += ENTRIES_PER_PAGEPAGE;
924 if (idx >= limit)
925 goto lost1;
926 }
927 stage = idx + ENTRIES_PER_PAGEPAGE;
928 subdir = *dir;
929 shmem_dir_unmap(dir);
930 dir = shmem_dir_map(subdir);
931 }
932 subdir = *dir;
933 if (subdir && page_private(subdir)) {
934 ptr = shmem_swp_map(subdir);
935 size = limit - idx;
936 if (size > ENTRIES_PER_PAGE)
937 size = ENTRIES_PER_PAGE;
938 offset = shmem_find_swp(entry, ptr, ptr+size);
939 shmem_swp_unmap(ptr);
940 if (offset >= 0) {
941 shmem_dir_unmap(dir);
942 ptr = shmem_swp_map(subdir);
943 goto found;
944 }
945 }
946 }
947lost1:
948 shmem_dir_unmap(dir-1);
949lost2:
950 spin_unlock(&info->lock);
951 return 0;
952found:
953 idx += offset;
954 ptr += offset;
955 586
956 /* 587 /*
957 * Move _head_ to start search for next from here. 588 * Move _head_ to start search for next from here.
958 * But be careful: shmem_evict_inode checks list_empty without taking 589 * But be careful: shmem_evict_inode checks list_empty without taking
959 * mutex, and there's an instant in list_move_tail when info->swaplist 590 * mutex, and there's an instant in list_move_tail when info->swaplist
960 * would appear empty, if it were the only one on shmem_swaplist. We 591 * would appear empty, if it were the only one on shmem_swaplist.
961 * could avoid doing it if inode NULL; or use this minor optimization.
962 */ 592 */
963 if (shmem_swaplist.next != &info->swaplist) 593 if (shmem_swaplist.next != &info->swaplist)
964 list_move_tail(&shmem_swaplist, &info->swaplist); 594 list_move_tail(&shmem_swaplist, &info->swaplist);
@@ -968,29 +598,34 @@ found:
968 * but also to hold up shmem_evict_inode(): so inode cannot be freed 598 * but also to hold up shmem_evict_inode(): so inode cannot be freed
969 * beneath us (pagelock doesn't help until the page is in pagecache). 599 * beneath us (pagelock doesn't help until the page is in pagecache).
970 */ 600 */
971 mapping = info->vfs_inode.i_mapping; 601 error = shmem_add_to_page_cache(page, mapping, index,
972 error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); 602 GFP_NOWAIT, radswap);
973 /* which does mem_cgroup_uncharge_cache_page on error */ 603 /* which does mem_cgroup_uncharge_cache_page on error */
974 604
975 if (error != -ENOMEM) { 605 if (error != -ENOMEM) {
606 /*
607 * Truncation and eviction use free_swap_and_cache(), which
608 * only does trylock page: if we raced, best clean up here.
609 */
976 delete_from_swap_cache(page); 610 delete_from_swap_cache(page);
977 set_page_dirty(page); 611 set_page_dirty(page);
978 info->flags |= SHMEM_PAGEIN; 612 if (!error) {
979 shmem_swp_set(info, ptr, 0); 613 spin_lock(&info->lock);
980 swap_free(entry); 614 info->swapped--;
615 spin_unlock(&info->lock);
616 swap_free(swap);
617 }
981 error = 1; /* not an error, but entry was found */ 618 error = 1; /* not an error, but entry was found */
982 } 619 }
983 shmem_swp_unmap(ptr);
984 spin_unlock(&info->lock);
985 return error; 620 return error;
986} 621}
987 622
988/* 623/*
989 * shmem_unuse() search for an eventually swapped out shmem page. 624 * Search through swapped inodes to find and replace swap by page.
990 */ 625 */
991int shmem_unuse(swp_entry_t entry, struct page *page) 626int shmem_unuse(swp_entry_t swap, struct page *page)
992{ 627{
993 struct list_head *p, *next; 628 struct list_head *this, *next;
994 struct shmem_inode_info *info; 629 struct shmem_inode_info *info;
995 int found = 0; 630 int found = 0;
996 int error; 631 int error;
@@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
999 * Charge page using GFP_KERNEL while we can wait, before taking 634 * Charge page using GFP_KERNEL while we can wait, before taking
1000 * the shmem_swaplist_mutex which might hold up shmem_writepage(). 635 * the shmem_swaplist_mutex which might hold up shmem_writepage().
1001 * Charged back to the user (not to caller) when swap account is used. 636 * Charged back to the user (not to caller) when swap account is used.
1002 * add_to_page_cache() will be called with GFP_NOWAIT.
1003 */ 637 */
1004 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 638 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
1005 if (error) 639 if (error)
1006 goto out; 640 goto out;
1007 /* 641 /* No radix_tree_preload: swap entry keeps a place for page in tree */
1008 * Try to preload while we can wait, to not make a habit of
1009 * draining atomic reserves; but don't latch on to this cpu,
1010 * it's okay if sometimes we get rescheduled after this.
1011 */
1012 error = radix_tree_preload(GFP_KERNEL);
1013 if (error)
1014 goto uncharge;
1015 radix_tree_preload_end();
1016 642
1017 mutex_lock(&shmem_swaplist_mutex); 643 mutex_lock(&shmem_swaplist_mutex);
1018 list_for_each_safe(p, next, &shmem_swaplist) { 644 list_for_each_safe(this, next, &shmem_swaplist) {
1019 info = list_entry(p, struct shmem_inode_info, swaplist); 645 info = list_entry(this, struct shmem_inode_info, swaplist);
1020 found = shmem_unuse_inode(info, entry, page); 646 if (info->swapped)
647 found = shmem_unuse_inode(info, swap, page);
648 else
649 list_del_init(&info->swaplist);
1021 cond_resched(); 650 cond_resched();
1022 if (found) 651 if (found)
1023 break; 652 break;
1024 } 653 }
1025 mutex_unlock(&shmem_swaplist_mutex); 654 mutex_unlock(&shmem_swaplist_mutex);
1026 655
1027uncharge:
1028 if (!found) 656 if (!found)
1029 mem_cgroup_uncharge_cache_page(page); 657 mem_cgroup_uncharge_cache_page(page);
1030 if (found < 0) 658 if (found < 0)
@@ -1041,10 +669,10 @@ out:
1041static int shmem_writepage(struct page *page, struct writeback_control *wbc) 669static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1042{ 670{
1043 struct shmem_inode_info *info; 671 struct shmem_inode_info *info;
1044 swp_entry_t *entry, swap;
1045 struct address_space *mapping; 672 struct address_space *mapping;
1046 unsigned long index;
1047 struct inode *inode; 673 struct inode *inode;
674 swp_entry_t swap;
675 pgoff_t index;
1048 676
1049 BUG_ON(!PageLocked(page)); 677 BUG_ON(!PageLocked(page));
1050 mapping = page->mapping; 678 mapping = page->mapping;
@@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
1073 701
1074 /* 702 /*
1075 * Add inode to shmem_unuse()'s list of swapped-out inodes, 703 * Add inode to shmem_unuse()'s list of swapped-out inodes,
1076 * if it's not already there. Do it now because we cannot take 704 * if it's not already there. Do it now before the page is
1077 * mutex while holding spinlock, and must do so before the page 705 * moved to swap cache, when its pagelock no longer protects
1078 * is moved to swap cache, when its pagelock no longer protects
1079 * the inode from eviction. But don't unlock the mutex until 706 * the inode from eviction. But don't unlock the mutex until
1080 * we've taken the spinlock, because shmem_unuse_inode() will 707 * we've incremented swapped, because shmem_unuse_inode() will
1081 * prune a !swapped inode from the swaplist under both locks. 708 * prune a !swapped inode from the swaplist under this mutex.
1082 */ 709 */
1083 mutex_lock(&shmem_swaplist_mutex); 710 mutex_lock(&shmem_swaplist_mutex);
1084 if (list_empty(&info->swaplist)) 711 if (list_empty(&info->swaplist))
1085 list_add_tail(&info->swaplist, &shmem_swaplist); 712 list_add_tail(&info->swaplist, &shmem_swaplist);
1086 713
1087 spin_lock(&info->lock);
1088 mutex_unlock(&shmem_swaplist_mutex);
1089
1090 if (index >= info->next_index) {
1091 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
1092 goto unlock;
1093 }
1094 entry = shmem_swp_entry(info, index, NULL);
1095 if (entry->val) {
1096 WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
1097 free_swap_and_cache(*entry);
1098 shmem_swp_set(info, entry, 0);
1099 }
1100 shmem_recalc_inode(inode);
1101
1102 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { 714 if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
1103 delete_from_page_cache(page);
1104 shmem_swp_set(info, entry, swap.val);
1105 shmem_swp_unmap(entry);
1106 swap_shmem_alloc(swap); 715 swap_shmem_alloc(swap);
716 shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
717
718 spin_lock(&info->lock);
719 info->swapped++;
720 shmem_recalc_inode(inode);
1107 spin_unlock(&info->lock); 721 spin_unlock(&info->lock);
722
723 mutex_unlock(&shmem_swaplist_mutex);
1108 BUG_ON(page_mapped(page)); 724 BUG_ON(page_mapped(page));
1109 swap_writepage(page, wbc); 725 swap_writepage(page, wbc);
1110 return 0; 726 return 0;
1111 } 727 }
1112 728
1113 shmem_swp_unmap(entry); 729 mutex_unlock(&shmem_swaplist_mutex);
1114unlock:
1115 spin_unlock(&info->lock);
1116 /*
1117 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
1118 * clear SWAP_HAS_CACHE flag.
1119 */
1120 swapcache_free(swap, NULL); 730 swapcache_free(swap, NULL);
1121redirty: 731redirty:
1122 set_page_dirty(page); 732 set_page_dirty(page);
@@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1153} 763}
1154#endif /* CONFIG_TMPFS */ 764#endif /* CONFIG_TMPFS */
1155 765
1156static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, 766static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1157 struct shmem_inode_info *info, unsigned long idx) 767 struct shmem_inode_info *info, pgoff_t index)
1158{ 768{
1159 struct mempolicy mpol, *spol; 769 struct mempolicy mpol, *spol;
1160 struct vm_area_struct pvma; 770 struct vm_area_struct pvma;
1161 struct page *page;
1162 771
1163 spol = mpol_cond_copy(&mpol, 772 spol = mpol_cond_copy(&mpol,
1164 mpol_shared_policy_lookup(&info->policy, idx)); 773 mpol_shared_policy_lookup(&info->policy, index));
1165 774
1166 /* Create a pseudo vma that just contains the policy */ 775 /* Create a pseudo vma that just contains the policy */
1167 pvma.vm_start = 0; 776 pvma.vm_start = 0;
1168 pvma.vm_pgoff = idx; 777 pvma.vm_pgoff = index;
1169 pvma.vm_ops = NULL; 778 pvma.vm_ops = NULL;
1170 pvma.vm_policy = spol; 779 pvma.vm_policy = spol;
1171 page = swapin_readahead(entry, gfp, &pvma, 0); 780 return swapin_readahead(swap, gfp, &pvma, 0);
1172 return page;
1173} 781}
1174 782
1175static struct page *shmem_alloc_page(gfp_t gfp, 783static struct page *shmem_alloc_page(gfp_t gfp,
1176 struct shmem_inode_info *info, unsigned long idx) 784 struct shmem_inode_info *info, pgoff_t index)
1177{ 785{
1178 struct vm_area_struct pvma; 786 struct vm_area_struct pvma;
1179 787
1180 /* Create a pseudo vma that just contains the policy */ 788 /* Create a pseudo vma that just contains the policy */
1181 pvma.vm_start = 0; 789 pvma.vm_start = 0;
1182 pvma.vm_pgoff = idx; 790 pvma.vm_pgoff = index;
1183 pvma.vm_ops = NULL; 791 pvma.vm_ops = NULL;
1184 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); 792 pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
1185 793
1186 /* 794 /*
1187 * alloc_page_vma() will drop the shared policy reference 795 * alloc_page_vma() will drop the shared policy reference
@@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp,
1190} 798}
1191#else /* !CONFIG_NUMA */ 799#else /* !CONFIG_NUMA */
1192#ifdef CONFIG_TMPFS 800#ifdef CONFIG_TMPFS
1193static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p) 801static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
1194{ 802{
1195} 803}
1196#endif /* CONFIG_TMPFS */ 804#endif /* CONFIG_TMPFS */
1197 805
1198static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, 806static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
1199 struct shmem_inode_info *info, unsigned long idx) 807 struct shmem_inode_info *info, pgoff_t index)
1200{ 808{
1201 return swapin_readahead(entry, gfp, NULL, 0); 809 return swapin_readahead(swap, gfp, NULL, 0);
1202} 810}
1203 811
1204static inline struct page *shmem_alloc_page(gfp_t gfp, 812static inline struct page *shmem_alloc_page(gfp_t gfp,
1205 struct shmem_inode_info *info, unsigned long idx) 813 struct shmem_inode_info *info, pgoff_t index)
1206{ 814{
1207 return alloc_page(gfp); 815 return alloc_page(gfp);
1208} 816}
@@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
1222 * vm. If we swap it in we mark it dirty since we also free the swap 830 * vm. If we swap it in we mark it dirty since we also free the swap
1223 * entry since a page cannot live in both the swap and page cache 831 * entry since a page cannot live in both the swap and page cache
1224 */ 832 */
1225static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx, 833static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1226 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) 834 struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
1227{ 835{
1228 struct address_space *mapping = inode->i_mapping; 836 struct address_space *mapping = inode->i_mapping;
1229 struct shmem_inode_info *info = SHMEM_I(inode); 837 struct shmem_inode_info *info;
1230 struct shmem_sb_info *sbinfo; 838 struct shmem_sb_info *sbinfo;
1231 struct page *page; 839 struct page *page;
1232 struct page *prealloc_page = NULL;
1233 swp_entry_t *entry;
1234 swp_entry_t swap; 840 swp_entry_t swap;
1235 int error; 841 int error;
1236 int ret; 842 int once = 0;
1237 843
1238 if (idx >= SHMEM_MAX_INDEX) 844 if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
1239 return -EFBIG; 845 return -EFBIG;
1240repeat: 846repeat:
1241 page = find_lock_page(mapping, idx); 847 swap.val = 0;
1242 if (page) { 848 page = find_lock_page(mapping, index);
849 if (radix_tree_exceptional_entry(page)) {
850 swap = radix_to_swp_entry(page);
851 page = NULL;
852 }
853
854 if (sgp != SGP_WRITE &&
855 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
856 error = -EINVAL;
857 goto failed;
858 }
859
860 if (page || (sgp == SGP_READ && !swap.val)) {
1243 /* 861 /*
1244 * Once we can get the page lock, it must be uptodate: 862 * Once we can get the page lock, it must be uptodate:
1245 * if there were an error in reading back from swap, 863 * if there were an error in reading back from swap,
1246 * the page would not be inserted into the filecache. 864 * the page would not be inserted into the filecache.
1247 */ 865 */
1248 BUG_ON(!PageUptodate(page)); 866 BUG_ON(page && !PageUptodate(page));
1249 goto done; 867 *pagep = page;
868 return 0;
1250 } 869 }
1251 870
1252 /* 871 /*
1253 * Try to preload while we can wait, to not make a habit of 872 * Fast cache lookup did not find it:
1254 * draining atomic reserves; but don't latch on to this cpu. 873 * bring it back from swap or allocate.
1255 */ 874 */
1256 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); 875 info = SHMEM_I(inode);
1257 if (error) 876 sbinfo = SHMEM_SB(inode->i_sb);
1258 goto out;
1259 radix_tree_preload_end();
1260
1261 if (sgp != SGP_READ && !prealloc_page) {
1262 prealloc_page = shmem_alloc_page(gfp, info, idx);
1263 if (prealloc_page) {
1264 SetPageSwapBacked(prealloc_page);
1265 if (mem_cgroup_cache_charge(prealloc_page,
1266 current->mm, GFP_KERNEL)) {
1267 page_cache_release(prealloc_page);
1268 prealloc_page = NULL;
1269 }
1270 }
1271 }
1272
1273 spin_lock(&info->lock);
1274 shmem_recalc_inode(inode);
1275 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1276 if (IS_ERR(entry)) {
1277 spin_unlock(&info->lock);
1278 error = PTR_ERR(entry);
1279 goto out;
1280 }
1281 swap = *entry;
1282 877
1283 if (swap.val) { 878 if (swap.val) {
1284 /* Look it up and read it in.. */ 879 /* Look it up and read it in.. */
1285 page = lookup_swap_cache(swap); 880 page = lookup_swap_cache(swap);
1286 if (!page) { 881 if (!page) {
1287 shmem_swp_unmap(entry);
1288 spin_unlock(&info->lock);
1289 /* here we actually do the io */ 882 /* here we actually do the io */
1290 if (fault_type) 883 if (fault_type)
1291 *fault_type |= VM_FAULT_MAJOR; 884 *fault_type |= VM_FAULT_MAJOR;
1292 page = shmem_swapin(swap, gfp, info, idx); 885 page = shmem_swapin(swap, gfp, info, index);
1293 if (!page) { 886 if (!page) {
1294 spin_lock(&info->lock); 887 error = -ENOMEM;
1295 entry = shmem_swp_alloc(info, idx, sgp, gfp); 888 goto failed;
1296 if (IS_ERR(entry))
1297 error = PTR_ERR(entry);
1298 else {
1299 if (entry->val == swap.val)
1300 error = -ENOMEM;
1301 shmem_swp_unmap(entry);
1302 }
1303 spin_unlock(&info->lock);
1304 if (error)
1305 goto out;
1306 goto repeat;
1307 } 889 }
1308 wait_on_page_locked(page);
1309 page_cache_release(page);
1310 goto repeat;
1311 } 890 }
1312 891
1313 /* We have to do this with page locked to prevent races */ 892 /* We have to do this with page locked to prevent races */
1314 if (!trylock_page(page)) { 893 lock_page(page);
1315 shmem_swp_unmap(entry);
1316 spin_unlock(&info->lock);
1317 wait_on_page_locked(page);
1318 page_cache_release(page);
1319 goto repeat;
1320 }
1321 if (PageWriteback(page)) {
1322 shmem_swp_unmap(entry);
1323 spin_unlock(&info->lock);
1324 wait_on_page_writeback(page);
1325 unlock_page(page);
1326 page_cache_release(page);
1327 goto repeat;
1328 }
1329 if (!PageUptodate(page)) { 894 if (!PageUptodate(page)) {
1330 shmem_swp_unmap(entry);
1331 spin_unlock(&info->lock);
1332 unlock_page(page);
1333 page_cache_release(page);
1334 error = -EIO; 895 error = -EIO;
1335 goto out; 896 goto failed;
1336 } 897 }
1337 898 wait_on_page_writeback(page);
1338 error = add_to_page_cache_locked(page, mapping, 899
1339 idx, GFP_NOWAIT); 900 /* Someone may have already done it for us */
1340 if (error) { 901 if (page->mapping) {
1341 shmem_swp_unmap(entry); 902 if (page->mapping == mapping &&
1342 spin_unlock(&info->lock); 903 page->index == index)
1343 if (error == -ENOMEM) { 904 goto done;
1344 /* 905 error = -EEXIST;
1345 * reclaim from proper memory cgroup and 906 goto failed;
1346 * call memcg's OOM if needed.
1347 */
1348 error = mem_cgroup_shmem_charge_fallback(
1349 page, current->mm, gfp);
1350 if (error) {
1351 unlock_page(page);
1352 page_cache_release(page);
1353 goto out;
1354 }
1355 }
1356 unlock_page(page);
1357 page_cache_release(page);
1358 goto repeat;
1359 } 907 }
1360 908
1361 info->flags |= SHMEM_PAGEIN; 909 error = mem_cgroup_cache_charge(page, current->mm,
1362 shmem_swp_set(info, entry, 0); 910 gfp & GFP_RECLAIM_MASK);
1363 shmem_swp_unmap(entry); 911 if (!error)
1364 delete_from_swap_cache(page); 912 error = shmem_add_to_page_cache(page, mapping, index,
913 gfp, swp_to_radix_entry(swap));
914 if (error)
915 goto failed;
916
917 spin_lock(&info->lock);
918 info->swapped--;
919 shmem_recalc_inode(inode);
1365 spin_unlock(&info->lock); 920 spin_unlock(&info->lock);
921
922 delete_from_swap_cache(page);
1366 set_page_dirty(page); 923 set_page_dirty(page);
1367 swap_free(swap); 924 swap_free(swap);
1368 925
1369 } else if (sgp == SGP_READ) { 926 } else {
1370 shmem_swp_unmap(entry); 927 if (shmem_acct_block(info->flags)) {
1371 page = find_get_page(mapping, idx); 928 error = -ENOSPC;
1372 if (page && !trylock_page(page)) { 929 goto failed;
1373 spin_unlock(&info->lock);
1374 wait_on_page_locked(page);
1375 page_cache_release(page);
1376 goto repeat;
1377 } 930 }
1378 spin_unlock(&info->lock);
1379
1380 } else if (prealloc_page) {
1381 shmem_swp_unmap(entry);
1382 sbinfo = SHMEM_SB(inode->i_sb);
1383 if (sbinfo->max_blocks) { 931 if (sbinfo->max_blocks) {
1384 if (percpu_counter_compare(&sbinfo->used_blocks, 932 if (percpu_counter_compare(&sbinfo->used_blocks,
1385 sbinfo->max_blocks) >= 0 || 933 sbinfo->max_blocks) >= 0) {
1386 shmem_acct_block(info->flags)) 934 error = -ENOSPC;
1387 goto nospace; 935 goto unacct;
936 }
1388 percpu_counter_inc(&sbinfo->used_blocks); 937 percpu_counter_inc(&sbinfo->used_blocks);
1389 inode->i_blocks += BLOCKS_PER_PAGE;
1390 } else if (shmem_acct_block(info->flags))
1391 goto nospace;
1392
1393 page = prealloc_page;
1394 prealloc_page = NULL;
1395
1396 entry = shmem_swp_alloc(info, idx, sgp, gfp);
1397 if (IS_ERR(entry))
1398 error = PTR_ERR(entry);
1399 else {
1400 swap = *entry;
1401 shmem_swp_unmap(entry);
1402 } 938 }
1403 ret = error || swap.val; 939
1404 if (ret) 940 page = shmem_alloc_page(gfp, info, index);
1405 mem_cgroup_uncharge_cache_page(page); 941 if (!page) {
1406 else 942 error = -ENOMEM;
1407 ret = add_to_page_cache_lru(page, mapping, 943 goto decused;
1408 idx, GFP_NOWAIT);
1409 /*
1410 * At add_to_page_cache_lru() failure,
1411 * uncharge will be done automatically.
1412 */
1413 if (ret) {
1414 shmem_unacct_blocks(info->flags, 1);
1415 shmem_free_blocks(inode, 1);
1416 spin_unlock(&info->lock);
1417 page_cache_release(page);
1418 if (error)
1419 goto out;
1420 goto repeat;
1421 } 944 }
1422 945
1423 info->flags |= SHMEM_PAGEIN; 946 SetPageSwapBacked(page);
947 __set_page_locked(page);
948 error = mem_cgroup_cache_charge(page, current->mm,
949 gfp & GFP_RECLAIM_MASK);
950 if (!error)
951 error = shmem_add_to_page_cache(page, mapping, index,
952 gfp, NULL);
953 if (error)
954 goto decused;
955 lru_cache_add_anon(page);
956
957 spin_lock(&info->lock);
1424 info->alloced++; 958 info->alloced++;
959 inode->i_blocks += BLOCKS_PER_PAGE;
960 shmem_recalc_inode(inode);
1425 spin_unlock(&info->lock); 961 spin_unlock(&info->lock);
962
1426 clear_highpage(page); 963 clear_highpage(page);
1427 flush_dcache_page(page); 964 flush_dcache_page(page);
1428 SetPageUptodate(page); 965 SetPageUptodate(page);
1429 if (sgp == SGP_DIRTY) 966 if (sgp == SGP_DIRTY)
1430 set_page_dirty(page); 967 set_page_dirty(page);
1431
1432 } else {
1433 spin_unlock(&info->lock);
1434 error = -ENOMEM;
1435 goto out;
1436 } 968 }
1437done: 969done:
1438 *pagep = page; 970 /* Perhaps the file has been truncated since we checked */
1439 error = 0; 971 if (sgp != SGP_WRITE &&
1440out: 972 ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1441 if (prealloc_page) { 973 error = -EINVAL;
1442 mem_cgroup_uncharge_cache_page(prealloc_page); 974 goto trunc;
1443 page_cache_release(prealloc_page);
1444 } 975 }
1445 return error; 976 *pagep = page;
977 return 0;
1446 978
1447nospace:
1448 /* 979 /*
1449 * Perhaps the page was brought in from swap between find_lock_page 980 * Error recovery.
1450 * and taking info->lock? We allow for that at add_to_page_cache_lru,
1451 * but must also avoid reporting a spurious ENOSPC while working on a
1452 * full tmpfs.
1453 */ 981 */
1454 page = find_get_page(mapping, idx); 982trunc:
983 ClearPageDirty(page);
984 delete_from_page_cache(page);
985 spin_lock(&info->lock);
986 info->alloced--;
987 inode->i_blocks -= BLOCKS_PER_PAGE;
1455 spin_unlock(&info->lock); 988 spin_unlock(&info->lock);
989decused:
990 if (sbinfo->max_blocks)
991 percpu_counter_add(&sbinfo->used_blocks, -1);
992unacct:
993 shmem_unacct_blocks(info->flags, 1);
994failed:
995 if (swap.val && error != -EINVAL) {
996 struct page *test = find_get_page(mapping, index);
997 if (test && !radix_tree_exceptional_entry(test))
998 page_cache_release(test);
999 /* Have another try if the entry has changed */
1000 if (test != swp_to_radix_entry(swap))
1001 error = -EEXIST;
1002 }
1456 if (page) { 1003 if (page) {
1004 unlock_page(page);
1457 page_cache_release(page); 1005 page_cache_release(page);
1006 }
1007 if (error == -ENOSPC && !once++) {
1008 info = SHMEM_I(inode);
1009 spin_lock(&info->lock);
1010 shmem_recalc_inode(inode);
1011 spin_unlock(&info->lock);
1458 goto repeat; 1012 goto repeat;
1459 } 1013 }
1460 error = -ENOSPC; 1014 if (error == -EEXIST)
1461 goto out; 1015 goto repeat;
1016 return error;
1462} 1017}
1463 1018
1464static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1019static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1467 int error; 1022 int error;
1468 int ret = VM_FAULT_LOCKED; 1023 int ret = VM_FAULT_LOCKED;
1469 1024
1470 if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
1471 return VM_FAULT_SIGBUS;
1472
1473 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); 1025 error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
1474 if (error) 1026 if (error)
1475 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); 1027 return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1482} 1034}
1483 1035
1484#ifdef CONFIG_NUMA 1036#ifdef CONFIG_NUMA
1485static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 1037static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
1486{ 1038{
1487 struct inode *i = vma->vm_file->f_path.dentry->d_inode; 1039 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1488 return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); 1040 return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
1489} 1041}
1490 1042
1491static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 1043static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
1492 unsigned long addr) 1044 unsigned long addr)
1493{ 1045{
1494 struct inode *i = vma->vm_file->f_path.dentry->d_inode; 1046 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1495 unsigned long idx; 1047 pgoff_t index;
1496 1048
1497 idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 1049 index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
1498 return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); 1050 return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
1499} 1051}
1500#endif 1052#endif
1501 1053
@@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1593 1145
1594#ifdef CONFIG_TMPFS 1146#ifdef CONFIG_TMPFS
1595static const struct inode_operations shmem_symlink_inode_operations; 1147static const struct inode_operations shmem_symlink_inode_operations;
1596static const struct inode_operations shmem_symlink_inline_operations; 1148static const struct inode_operations shmem_short_symlink_operations;
1597 1149
1598static int 1150static int
1599shmem_write_begin(struct file *file, struct address_space *mapping, 1151shmem_write_begin(struct file *file, struct address_space *mapping,
@@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1626{ 1178{
1627 struct inode *inode = filp->f_path.dentry->d_inode; 1179 struct inode *inode = filp->f_path.dentry->d_inode;
1628 struct address_space *mapping = inode->i_mapping; 1180 struct address_space *mapping = inode->i_mapping;
1629 unsigned long index, offset; 1181 pgoff_t index;
1182 unsigned long offset;
1630 enum sgp_type sgp = SGP_READ; 1183 enum sgp_type sgp = SGP_READ;
1631 1184
1632 /* 1185 /*
@@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1642 1195
1643 for (;;) { 1196 for (;;) {
1644 struct page *page = NULL; 1197 struct page *page = NULL;
1645 unsigned long end_index, nr, ret; 1198 pgoff_t end_index;
1199 unsigned long nr, ret;
1646 loff_t i_size = i_size_read(inode); 1200 loff_t i_size = i_size_read(inode);
1647 1201
1648 end_index = i_size >> PAGE_CACHE_SHIFT; 1202 end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
1880 buf->f_namelen = NAME_MAX; 1434 buf->f_namelen = NAME_MAX;
1881 if (sbinfo->max_blocks) { 1435 if (sbinfo->max_blocks) {
1882 buf->f_blocks = sbinfo->max_blocks; 1436 buf->f_blocks = sbinfo->max_blocks;
1883 buf->f_bavail = buf->f_bfree = 1437 buf->f_bavail =
1884 sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); 1438 buf->f_bfree = sbinfo->max_blocks -
1439 percpu_counter_sum(&sbinfo->used_blocks);
1885 } 1440 }
1886 if (sbinfo->max_inodes) { 1441 if (sbinfo->max_inodes) {
1887 buf->f_files = sbinfo->max_inodes; 1442 buf->f_files = sbinfo->max_inodes;
@@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2055 1610
2056 info = SHMEM_I(inode); 1611 info = SHMEM_I(inode);
2057 inode->i_size = len-1; 1612 inode->i_size = len-1;
2058 if (len <= SHMEM_SYMLINK_INLINE_LEN) { 1613 if (len <= SHORT_SYMLINK_LEN) {
2059 /* do it inline */ 1614 info->symlink = kmemdup(symname, len, GFP_KERNEL);
2060 memcpy(info->inline_symlink, symname, len); 1615 if (!info->symlink) {
2061 inode->i_op = &shmem_symlink_inline_operations; 1616 iput(inode);
1617 return -ENOMEM;
1618 }
1619 inode->i_op = &shmem_short_symlink_operations;
2062 } else { 1620 } else {
2063 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); 1621 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2064 if (error) { 1622 if (error) {
@@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2081 return 0; 1639 return 0;
2082} 1640}
2083 1641
2084static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) 1642static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
2085{ 1643{
2086 nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink); 1644 nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
2087 return NULL; 1645 return NULL;
2088} 1646}
2089 1647
2090static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) 1648static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2091{ 1649{
2092 struct page *page = NULL; 1650 struct page *page = NULL;
2093 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); 1651 int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
2094 nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); 1652 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
2095 if (page) 1653 if (page)
2096 unlock_page(page); 1654 unlock_page(page);
2097 return page; 1655 return page;
@@ -2202,7 +1760,6 @@ out:
2202 return err; 1760 return err;
2203} 1761}
2204 1762
2205
2206static const struct xattr_handler *shmem_xattr_handlers[] = { 1763static const struct xattr_handler *shmem_xattr_handlers[] = {
2207#ifdef CONFIG_TMPFS_POSIX_ACL 1764#ifdef CONFIG_TMPFS_POSIX_ACL
2208 &generic_acl_access_handler, 1765 &generic_acl_access_handler,
@@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2332} 1889}
2333#endif /* CONFIG_TMPFS_XATTR */ 1890#endif /* CONFIG_TMPFS_XATTR */
2334 1891
2335static const struct inode_operations shmem_symlink_inline_operations = { 1892static const struct inode_operations shmem_short_symlink_operations = {
2336 .readlink = generic_readlink, 1893 .readlink = generic_readlink,
2337 .follow_link = shmem_follow_link_inline, 1894 .follow_link = shmem_follow_short_symlink,
2338#ifdef CONFIG_TMPFS_XATTR 1895#ifdef CONFIG_TMPFS_XATTR
2339 .setxattr = shmem_setxattr, 1896 .setxattr = shmem_setxattr,
2340 .getxattr = shmem_getxattr, 1897 .getxattr = shmem_getxattr,
@@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
2534 if (config.max_inodes < inodes) 2091 if (config.max_inodes < inodes)
2535 goto out; 2092 goto out;
2536 /* 2093 /*
2537 * Those tests also disallow limited->unlimited while any are in 2094 * Those tests disallow limited->unlimited while any are in use;
2538 * use, so i_blocks will always be zero when max_blocks is zero;
2539 * but we must separately disallow unlimited->limited, because 2095 * but we must separately disallow unlimited->limited, because
2540 * in that case we have no record of how much is already in use. 2096 * in that case we have no record of how much is already in use.
2541 */ 2097 */
@@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
2627 goto failed; 2183 goto failed;
2628 sbinfo->free_inodes = sbinfo->max_inodes; 2184 sbinfo->free_inodes = sbinfo->max_inodes;
2629 2185
2630 sb->s_maxbytes = SHMEM_MAX_BYTES; 2186 sb->s_maxbytes = MAX_LFS_FILESIZE;
2631 sb->s_blocksize = PAGE_CACHE_SIZE; 2187 sb->s_blocksize = PAGE_CACHE_SIZE;
2632 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 2188 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
2633 sb->s_magic = TMPFS_MAGIC; 2189 sb->s_magic = TMPFS_MAGIC;
@@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep;
2662 2218
2663static struct inode *shmem_alloc_inode(struct super_block *sb) 2219static struct inode *shmem_alloc_inode(struct super_block *sb)
2664{ 2220{
2665 struct shmem_inode_info *p; 2221 struct shmem_inode_info *info;
2666 p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); 2222 info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
2667 if (!p) 2223 if (!info)
2668 return NULL; 2224 return NULL;
2669 return &p->vfs_inode; 2225 return &info->vfs_inode;
2670} 2226}
2671 2227
2672static void shmem_i_callback(struct rcu_head *head) 2228static void shmem_destroy_callback(struct rcu_head *head)
2673{ 2229{
2674 struct inode *inode = container_of(head, struct inode, i_rcu); 2230 struct inode *inode = container_of(head, struct inode, i_rcu);
2675 INIT_LIST_HEAD(&inode->i_dentry); 2231 INIT_LIST_HEAD(&inode->i_dentry);
@@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head)
2678 2234
2679static void shmem_destroy_inode(struct inode *inode) 2235static void shmem_destroy_inode(struct inode *inode)
2680{ 2236{
2681 if ((inode->i_mode & S_IFMT) == S_IFREG) { 2237 if ((inode->i_mode & S_IFMT) == S_IFREG)
2682 /* only struct inode is valid if it's an inline symlink */
2683 mpol_free_shared_policy(&SHMEM_I(inode)->policy); 2238 mpol_free_shared_policy(&SHMEM_I(inode)->policy);
2684 } 2239 call_rcu(&inode->i_rcu, shmem_destroy_callback);
2685 call_rcu(&inode->i_rcu, shmem_i_callback);
2686} 2240}
2687 2241
2688static void init_once(void *foo) 2242static void shmem_init_inode(void *foo)
2689{ 2243{
2690 struct shmem_inode_info *p = (struct shmem_inode_info *) foo; 2244 struct shmem_inode_info *info = foo;
2691 2245 inode_init_once(&info->vfs_inode);
2692 inode_init_once(&p->vfs_inode);
2693} 2246}
2694 2247
2695static int init_inodecache(void) 2248static int shmem_init_inodecache(void)
2696{ 2249{
2697 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", 2250 shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
2698 sizeof(struct shmem_inode_info), 2251 sizeof(struct shmem_inode_info),
2699 0, SLAB_PANIC, init_once); 2252 0, SLAB_PANIC, shmem_init_inode);
2700 return 0; 2253 return 0;
2701} 2254}
2702 2255
2703static void destroy_inodecache(void) 2256static void shmem_destroy_inodecache(void)
2704{ 2257{
2705 kmem_cache_destroy(shmem_inode_cachep); 2258 kmem_cache_destroy(shmem_inode_cachep);
2706} 2259}
@@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = {
2797#endif 2350#endif
2798}; 2351};
2799 2352
2800
2801static struct dentry *shmem_mount(struct file_system_type *fs_type, 2353static struct dentry *shmem_mount(struct file_system_type *fs_type,
2802 int flags, const char *dev_name, void *data) 2354 int flags, const char *dev_name, void *data)
2803{ 2355{
2804 return mount_nodev(fs_type, flags, data, shmem_fill_super); 2356 return mount_nodev(fs_type, flags, data, shmem_fill_super);
2805} 2357}
2806 2358
2807static struct file_system_type tmpfs_fs_type = { 2359static struct file_system_type shmem_fs_type = {
2808 .owner = THIS_MODULE, 2360 .owner = THIS_MODULE,
2809 .name = "tmpfs", 2361 .name = "tmpfs",
2810 .mount = shmem_mount, 2362 .mount = shmem_mount,
2811 .kill_sb = kill_litter_super, 2363 .kill_sb = kill_litter_super,
2812}; 2364};
2813 2365
2814int __init init_tmpfs(void) 2366int __init shmem_init(void)
2815{ 2367{
2816 int error; 2368 int error;
2817 2369
@@ -2819,18 +2371,18 @@ int __init init_tmpfs(void)
2819 if (error) 2371 if (error)
2820 goto out4; 2372 goto out4;
2821 2373
2822 error = init_inodecache(); 2374 error = shmem_init_inodecache();
2823 if (error) 2375 if (error)
2824 goto out3; 2376 goto out3;
2825 2377
2826 error = register_filesystem(&tmpfs_fs_type); 2378 error = register_filesystem(&shmem_fs_type);
2827 if (error) { 2379 if (error) {
2828 printk(KERN_ERR "Could not register tmpfs\n"); 2380 printk(KERN_ERR "Could not register tmpfs\n");
2829 goto out2; 2381 goto out2;
2830 } 2382 }
2831 2383
2832 shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, 2384 shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
2833 tmpfs_fs_type.name, NULL); 2385 shmem_fs_type.name, NULL);
2834 if (IS_ERR(shm_mnt)) { 2386 if (IS_ERR(shm_mnt)) {
2835 error = PTR_ERR(shm_mnt); 2387 error = PTR_ERR(shm_mnt);
2836 printk(KERN_ERR "Could not kern_mount tmpfs\n"); 2388 printk(KERN_ERR "Could not kern_mount tmpfs\n");
@@ -2839,9 +2391,9 @@ int __init init_tmpfs(void)
2839 return 0; 2391 return 0;
2840 2392
2841out1: 2393out1:
2842 unregister_filesystem(&tmpfs_fs_type); 2394 unregister_filesystem(&shmem_fs_type);
2843out2: 2395out2:
2844 destroy_inodecache(); 2396 shmem_destroy_inodecache();
2845out3: 2397out3:
2846 bdi_destroy(&shmem_backing_dev_info); 2398 bdi_destroy(&shmem_backing_dev_info);
2847out4: 2399out4:
@@ -2849,45 +2401,6 @@ out4:
2849 return error; 2401 return error;
2850} 2402}
2851 2403
2852#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2853/**
2854 * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
2855 * @inode: the inode to be searched
2856 * @pgoff: the offset to be searched
2857 * @pagep: the pointer for the found page to be stored
2858 * @ent: the pointer for the found swap entry to be stored
2859 *
2860 * If a page is found, refcount of it is incremented. Callers should handle
2861 * these refcount.
2862 */
2863void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2864 struct page **pagep, swp_entry_t *ent)
2865{
2866 swp_entry_t entry = { .val = 0 }, *ptr;
2867 struct page *page = NULL;
2868 struct shmem_inode_info *info = SHMEM_I(inode);
2869
2870 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2871 goto out;
2872
2873 spin_lock(&info->lock);
2874 ptr = shmem_swp_entry(info, pgoff, NULL);
2875#ifdef CONFIG_SWAP
2876 if (ptr && ptr->val) {
2877 entry.val = ptr->val;
2878 page = find_get_page(&swapper_space, entry.val);
2879 } else
2880#endif
2881 page = find_get_page(inode->i_mapping, pgoff);
2882 if (ptr)
2883 shmem_swp_unmap(ptr);
2884 spin_unlock(&info->lock);
2885out:
2886 *pagep = page;
2887 *ent = entry;
2888}
2889#endif
2890
2891#else /* !CONFIG_SHMEM */ 2404#else /* !CONFIG_SHMEM */
2892 2405
2893/* 2406/*
@@ -2901,23 +2414,23 @@ out:
2901 2414
2902#include <linux/ramfs.h> 2415#include <linux/ramfs.h>
2903 2416
2904static struct file_system_type tmpfs_fs_type = { 2417static struct file_system_type shmem_fs_type = {
2905 .name = "tmpfs", 2418 .name = "tmpfs",
2906 .mount = ramfs_mount, 2419 .mount = ramfs_mount,
2907 .kill_sb = kill_litter_super, 2420 .kill_sb = kill_litter_super,
2908}; 2421};
2909 2422
2910int __init init_tmpfs(void) 2423int __init shmem_init(void)
2911{ 2424{
2912 BUG_ON(register_filesystem(&tmpfs_fs_type) != 0); 2425 BUG_ON(register_filesystem(&shmem_fs_type) != 0);
2913 2426
2914 shm_mnt = kern_mount(&tmpfs_fs_type); 2427 shm_mnt = kern_mount(&shmem_fs_type);
2915 BUG_ON(IS_ERR(shm_mnt)); 2428 BUG_ON(IS_ERR(shm_mnt));
2916 2429
2917 return 0; 2430 return 0;
2918} 2431}
2919 2432
2920int shmem_unuse(swp_entry_t entry, struct page *page) 2433int shmem_unuse(swp_entry_t swap, struct page *page)
2921{ 2434{
2922 return 0; 2435 return 0;
2923} 2436}
@@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
2927 return 0; 2440 return 0;
2928} 2441}
2929 2442
2930void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) 2443void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
2931{ 2444{
2932 truncate_inode_pages_range(inode->i_mapping, start, end); 2445 truncate_inode_pages_range(inode->i_mapping, lstart, lend);
2933} 2446}
2934EXPORT_SYMBOL_GPL(shmem_truncate_range); 2447EXPORT_SYMBOL_GPL(shmem_truncate_range);
2935 2448
2936#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2937/**
2938 * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
2939 * @inode: the inode to be searched
2940 * @pgoff: the offset to be searched
2941 * @pagep: the pointer for the found page to be stored
2942 * @ent: the pointer for the found swap entry to be stored
2943 *
2944 * If a page is found, refcount of it is incremented. Callers should handle
2945 * these refcount.
2946 */
2947void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2948 struct page **pagep, swp_entry_t *ent)
2949{
2950 struct page *page = NULL;
2951
2952 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2953 goto out;
2954 page = find_get_page(inode->i_mapping, pgoff);
2955out:
2956 *pagep = page;
2957 *ent = (swp_entry_t){ .val = 0 };
2958}
2959#endif
2960
2961#define shmem_vm_ops generic_file_vm_ops 2449#define shmem_vm_ops generic_file_vm_ops
2962#define shmem_file_operations ramfs_file_operations 2450#define shmem_file_operations ramfs_file_operations
2963#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) 2451#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
2964#define shmem_acct_size(flags, size) 0 2452#define shmem_acct_size(flags, size) 0
2965#define shmem_unacct_size(flags, size) do {} while (0) 2453#define shmem_unacct_size(flags, size) do {} while (0)
2966#define SHMEM_MAX_BYTES MAX_LFS_FILESIZE
2967 2454
2968#endif /* CONFIG_SHMEM */ 2455#endif /* CONFIG_SHMEM */
2969 2456
@@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
2987 if (IS_ERR(shm_mnt)) 2474 if (IS_ERR(shm_mnt))
2988 return (void *)shm_mnt; 2475 return (void *)shm_mnt;
2989 2476
2990 if (size < 0 || size > SHMEM_MAX_BYTES) 2477 if (size < 0 || size > MAX_LFS_FILESIZE)
2991 return ERR_PTR(-EINVAL); 2478 return ERR_PTR(-EINVAL);
2992 2479
2993 if (shmem_acct_size(flags, size)) 2480 if (shmem_acct_size(flags, size))
diff --git a/mm/slab.c b/mm/slab.c
index 95947400702..6d90a091fdc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -622,6 +622,51 @@ int slab_is_available(void)
622static struct lock_class_key on_slab_l3_key; 622static struct lock_class_key on_slab_l3_key;
623static struct lock_class_key on_slab_alc_key; 623static struct lock_class_key on_slab_alc_key;
624 624
625static struct lock_class_key debugobj_l3_key;
626static struct lock_class_key debugobj_alc_key;
627
628static void slab_set_lock_classes(struct kmem_cache *cachep,
629 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
630 int q)
631{
632 struct array_cache **alc;
633 struct kmem_list3 *l3;
634 int r;
635
636 l3 = cachep->nodelists[q];
637 if (!l3)
638 return;
639
640 lockdep_set_class(&l3->list_lock, l3_key);
641 alc = l3->alien;
642 /*
643 * FIXME: This check for BAD_ALIEN_MAGIC
644 * should go away when common slab code is taught to
645 * work even without alien caches.
646 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
647 * for alloc_alien_cache,
648 */
649 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
650 return;
651 for_each_node(r) {
652 if (alc[r])
653 lockdep_set_class(&alc[r]->lock, alc_key);
654 }
655}
656
657static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
658{
659 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
660}
661
662static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
663{
664 int node;
665
666 for_each_online_node(node)
667 slab_set_debugobj_lock_classes_node(cachep, node);
668}
669
625static void init_node_lock_keys(int q) 670static void init_node_lock_keys(int q)
626{ 671{
627 struct cache_sizes *s = malloc_sizes; 672 struct cache_sizes *s = malloc_sizes;
@@ -630,29 +675,14 @@ static void init_node_lock_keys(int q)
630 return; 675 return;
631 676
632 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 677 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
633 struct array_cache **alc;
634 struct kmem_list3 *l3; 678 struct kmem_list3 *l3;
635 int r;
636 679
637 l3 = s->cs_cachep->nodelists[q]; 680 l3 = s->cs_cachep->nodelists[q];
638 if (!l3 || OFF_SLAB(s->cs_cachep)) 681 if (!l3 || OFF_SLAB(s->cs_cachep))
639 continue; 682 continue;
640 lockdep_set_class(&l3->list_lock, &on_slab_l3_key); 683
641 alc = l3->alien; 684 slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
642 /* 685 &on_slab_alc_key, q);
643 * FIXME: This check for BAD_ALIEN_MAGIC
644 * should go away when common slab code is taught to
645 * work even without alien caches.
646 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
647 * for alloc_alien_cache,
648 */
649 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
650 continue;
651 for_each_node(r) {
652 if (alc[r])
653 lockdep_set_class(&alc[r]->lock,
654 &on_slab_alc_key);
655 }
656 } 686 }
657} 687}
658 688
@@ -671,6 +701,14 @@ static void init_node_lock_keys(int q)
671static inline void init_lock_keys(void) 701static inline void init_lock_keys(void)
672{ 702{
673} 703}
704
705static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
706{
707}
708
709static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
710{
711}
674#endif 712#endif
675 713
676/* 714/*
@@ -1264,6 +1302,8 @@ static int __cpuinit cpuup_prepare(long cpu)
1264 spin_unlock_irq(&l3->list_lock); 1302 spin_unlock_irq(&l3->list_lock);
1265 kfree(shared); 1303 kfree(shared);
1266 free_alien_cache(alien); 1304 free_alien_cache(alien);
1305 if (cachep->flags & SLAB_DEBUG_OBJECTS)
1306 slab_set_debugobj_lock_classes_node(cachep, node);
1267 } 1307 }
1268 init_node_lock_keys(node); 1308 init_node_lock_keys(node);
1269 1309
@@ -1626,6 +1666,9 @@ void __init kmem_cache_init_late(void)
1626{ 1666{
1627 struct kmem_cache *cachep; 1667 struct kmem_cache *cachep;
1628 1668
1669 /* Annotate slab for lockdep -- annotate the malloc caches */
1670 init_lock_keys();
1671
1629 /* 6) resize the head arrays to their final sizes */ 1672 /* 6) resize the head arrays to their final sizes */
1630 mutex_lock(&cache_chain_mutex); 1673 mutex_lock(&cache_chain_mutex);
1631 list_for_each_entry(cachep, &cache_chain, next) 1674 list_for_each_entry(cachep, &cache_chain, next)
@@ -1636,9 +1679,6 @@ void __init kmem_cache_init_late(void)
1636 /* Done! */ 1679 /* Done! */
1637 g_cpucache_up = FULL; 1680 g_cpucache_up = FULL;
1638 1681
1639 /* Annotate slab for lockdep -- annotate the malloc caches */
1640 init_lock_keys();
1641
1642 /* 1682 /*
1643 * Register a cpu startup notifier callback that initializes 1683 * Register a cpu startup notifier callback that initializes
1644 * cpu_cache_get for all new cpus 1684 * cpu_cache_get for all new cpus
@@ -2426,6 +2466,16 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2426 goto oops; 2466 goto oops;
2427 } 2467 }
2428 2468
2469 if (flags & SLAB_DEBUG_OBJECTS) {
2470 /*
2471 * Would deadlock through slab_destroy()->call_rcu()->
2472 * debug_object_activate()->kmem_cache_alloc().
2473 */
2474 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2475
2476 slab_set_debugobj_lock_classes(cachep);
2477 }
2478
2429 /* cache setup completed, link it into the list */ 2479 /* cache setup completed, link it into the list */
2430 list_add(&cachep->next, &cache_chain); 2480 list_add(&cachep->next, &cache_chain);
2431oops: 2481oops:
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1b8c3390724..17bc224bce6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
1924 1924
1925 /* 1925 /*
1926 * Find out how many pages are allowed for a single swap 1926 * Find out how many pages are allowed for a single swap
1927 * device. There are two limiting factors: 1) the number of 1927 * device. There are three limiting factors: 1) the number
1928 * bits for the swap offset in the swp_entry_t type and 1928 * of bits for the swap offset in the swp_entry_t type, and
1929 * 2) the number of bits in the a swap pte as defined by 1929 * 2) the number of bits in the swap pte as defined by the
1930 * the different architectures. In order to find the 1930 * the different architectures, and 3) the number of free bits
1931 * largest possible bit mask a swap entry with swap type 0 1931 * in an exceptional radix_tree entry. In order to find the
1932 * largest possible bit mask, a swap entry with swap type 0
1932 * and swap offset ~0UL is created, encoded to a swap pte, 1933 * and swap offset ~0UL is created, encoded to a swap pte,
1933 * decoded to a swp_entry_t again and finally the swap 1934 * decoded to a swp_entry_t again, and finally the swap
1934 * offset is extracted. This will mask all the bits from 1935 * offset is extracted. This will mask all the bits from
1935 * the initial ~0UL mask that can't be encoded in either 1936 * the initial ~0UL mask that can't be encoded in either
1936 * the swp_entry_t or the architecture definition of a 1937 * the swp_entry_t or the architecture definition of a
1937 * swap pte. 1938 * swap pte. Then the same is done for a radix_tree entry.
1938 */ 1939 */
1939 maxpages = swp_offset(pte_to_swp_entry( 1940 maxpages = swp_offset(pte_to_swp_entry(
1940 swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; 1941 swp_entry_to_pte(swp_entry(0, ~0UL))));
1942 maxpages = swp_offset(radix_to_swp_entry(
1943 swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
1944
1941 if (maxpages > swap_header->info.last_page) { 1945 if (maxpages > swap_header->info.last_page) {
1942 maxpages = swap_header->info.last_page + 1; 1946 maxpages = swap_header->info.last_page + 1;
1943 /* p->max is an unsigned int: don't overflow it */ 1947 /* p->max is an unsigned int: don't overflow it */
diff --git a/mm/truncate.c b/mm/truncate.c
index 232eb2736a7..b40ac6d4e86 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
336 unsigned long count = 0; 336 unsigned long count = 0;
337 int i; 337 int i;
338 338
339 /*
340 * Note: this function may get called on a shmem/tmpfs mapping:
341 * pagevec_lookup() might then return 0 prematurely (because it
342 * got a gangful of swap entries); but it's hardly worth worrying
343 * about - it can rarely have anything to free from such a mapping
344 * (most pages are dirty), and already skips over any difficulties.
345 */
346
339 pagevec_init(&pvec, 0); 347 pagevec_init(&pvec, 0);
340 while (index <= end && pagevec_lookup(&pvec, mapping, index, 348 while (index <= end && pagevec_lookup(&pvec, mapping, index,
341 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 349 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 2252c2085da..52cfd0c3ea7 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
242 if (brdev->payload == p_bridged) { 242 if (brdev->payload == p_bridged) {
243 skb_push(skb, 2); 243 skb_push(skb, 2);
244 memset(skb->data, 0, 2); 244 memset(skb->data, 0, 2);
245 } else { /* p_routed */
246 skb_pull(skb, ETH_HLEN);
247 } 245 }
248 } 246 }
249 skb_debug(skb); 247 skb_debug(skb);
diff --git a/net/core/Makefile b/net/core/Makefile
index 8a04dd22cf7..0d357b1c4e5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o 6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
new file mode 100644
index 00000000000..45329d7c9dd
--- /dev/null
+++ b/net/core/secure_seq.c
@@ -0,0 +1,184 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/cryptohash.h>
4#include <linux/module.h>
5#include <linux/cache.h>
6#include <linux/random.h>
7#include <linux/hrtimer.h>
8#include <linux/ktime.h>
9#include <linux/string.h>
10
11#include <net/secure_seq.h>
12
13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
14
15static int __init net_secret_init(void)
16{
17 get_random_bytes(net_secret, sizeof(net_secret));
18 return 0;
19}
20late_initcall(net_secret_init);
21
22static u32 seq_scale(u32 seq)
23{
24 /*
25 * As close as possible to RFC 793, which
26 * suggests using a 250 kHz clock.
27 * Further reading shows this assumes 2 Mb/s networks.
28 * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
29 * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
30 * we also need to limit the resolution so that the u32 seq
31 * overlaps less than one time per MSL (2 minutes).
32 * Choosing a clock of 64 ns period is OK. (period of 274 s)
33 */
34 return seq + (ktime_to_ns(ktime_get_real()) >> 6);
35}
36
37#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
38__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
39 __be16 sport, __be16 dport)
40{
41 u32 secret[MD5_MESSAGE_BYTES / 4];
42 u32 hash[MD5_DIGEST_WORDS];
43 u32 i;
44
45 memcpy(hash, saddr, 16);
46 for (i = 0; i < 4; i++)
47 secret[i] = net_secret[i] + daddr[i];
48 secret[4] = net_secret[4] +
49 (((__force u16)sport << 16) + (__force u16)dport);
50 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
51 secret[i] = net_secret[i];
52
53 md5_transform(hash, secret);
54
55 return seq_scale(hash[0]);
56}
57EXPORT_SYMBOL(secure_tcpv6_sequence_number);
58
59u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
60 __be16 dport)
61{
62 u32 secret[MD5_MESSAGE_BYTES / 4];
63 u32 hash[MD5_DIGEST_WORDS];
64 u32 i;
65
66 memcpy(hash, saddr, 16);
67 for (i = 0; i < 4; i++)
68 secret[i] = net_secret[i] + (__force u32) daddr[i];
69 secret[4] = net_secret[4] + (__force u32)dport;
70 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
71 secret[i] = net_secret[i];
72
73 md5_transform(hash, secret);
74
75 return hash[0];
76}
77#endif
78
79#ifdef CONFIG_INET
80__u32 secure_ip_id(__be32 daddr)
81{
82 u32 hash[MD5_DIGEST_WORDS];
83
84 hash[0] = (__force __u32) daddr;
85 hash[1] = net_secret[13];
86 hash[2] = net_secret[14];
87 hash[3] = net_secret[15];
88
89 md5_transform(hash, net_secret);
90
91 return hash[0];
92}
93
94__u32 secure_ipv6_id(const __be32 daddr[4])
95{
96 __u32 hash[4];
97
98 memcpy(hash, daddr, 16);
99 md5_transform(hash, net_secret);
100
101 return hash[0];
102}
103
104__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
105 __be16 sport, __be16 dport)
106{
107 u32 hash[MD5_DIGEST_WORDS];
108
109 hash[0] = (__force u32)saddr;
110 hash[1] = (__force u32)daddr;
111 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
112 hash[3] = net_secret[15];
113
114 md5_transform(hash, net_secret);
115
116 return seq_scale(hash[0]);
117}
118
119u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
120{
121 u32 hash[MD5_DIGEST_WORDS];
122
123 hash[0] = (__force u32)saddr;
124 hash[1] = (__force u32)daddr;
125 hash[2] = (__force u32)dport ^ net_secret[14];
126 hash[3] = net_secret[15];
127
128 md5_transform(hash, net_secret);
129
130 return hash[0];
131}
132EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
133#endif
134
135#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
136u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
137 __be16 sport, __be16 dport)
138{
139 u32 hash[MD5_DIGEST_WORDS];
140 u64 seq;
141
142 hash[0] = (__force u32)saddr;
143 hash[1] = (__force u32)daddr;
144 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
145 hash[3] = net_secret[15];
146
147 md5_transform(hash, net_secret);
148
149 seq = hash[0] | (((u64)hash[1]) << 32);
150 seq += ktime_to_ns(ktime_get_real());
151 seq &= (1ull << 48) - 1;
152
153 return seq;
154}
155EXPORT_SYMBOL(secure_dccp_sequence_number);
156
157#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
158u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
159 __be16 sport, __be16 dport)
160{
161 u32 secret[MD5_MESSAGE_BYTES / 4];
162 u32 hash[MD5_DIGEST_WORDS];
163 u64 seq;
164 u32 i;
165
166 memcpy(hash, saddr, 16);
167 for (i = 0; i < 4; i++)
168 secret[i] = net_secret[i] + daddr[i];
169 secret[4] = net_secret[4] +
170 (((__force u16)sport << 16) + (__force u16)dport);
171 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
172 secret[i] = net_secret[i];
173
174 md5_transform(hash, secret);
175
176 seq = hash[0] | (((u64)hash[1]) << 32);
177 seq += ktime_to_ns(ktime_get_real());
178 seq &= (1ull << 48) - 1;
179
180 return seq;
181}
182EXPORT_SYMBOL(secure_dccpv6_sequence_number);
183#endif
184#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2beda824636..27002dffe7e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1369,8 +1369,21 @@ pull_pages:
1369} 1369}
1370EXPORT_SYMBOL(__pskb_pull_tail); 1370EXPORT_SYMBOL(__pskb_pull_tail);
1371 1371
1372/* Copy some data bits from skb to kernel buffer. */ 1372/**
1373 1373 * skb_copy_bits - copy bits from skb to kernel buffer
1374 * @skb: source skb
1375 * @offset: offset in source
1376 * @to: destination buffer
1377 * @len: number of bytes to copy
1378 *
1379 * Copy the specified number of bytes from the source skb to the
1380 * destination buffer.
1381 *
1382 * CAUTION ! :
1383 * If its prototype is ever changed,
1384 * check arch/{*}/net/{*}.S files,
1385 * since it is called from BPF assembly code.
1386 */
1374int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 1387int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1375{ 1388{
1376 int start = skb_headlen(skb); 1389 int start = skb_headlen(skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8c36adfd191..332639b56f4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -26,6 +26,7 @@
26#include <net/timewait_sock.h> 26#include <net/timewait_sock.h>
27#include <net/tcp_states.h> 27#include <net/tcp_states.h>
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29#include <net/secure_seq.h>
29 30
30#include "ackvec.h" 31#include "ackvec.h"
31#include "ccid.h" 32#include "ccid.h"
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8dc4348774a..b74f76117dc 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
29#include <net/transp_v6.h> 29#include <net/transp_v6.h>
30#include <net/ip6_checksum.h> 30#include <net/ip6_checksum.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32#include <net/secure_seq.h>
32 33
33#include "dccp.h" 34#include "dccp.h"
34#include "ipv6.h" 35#include "ipv6.h"
@@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
69 dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); 70 dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
70} 71}
71 72
72static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, 73static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
73 __be16 sport, __be16 dport )
74{
75 return secure_tcpv6_sequence_number(saddr, daddr, sport, dport);
76}
77
78static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
79{ 74{
80 return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, 75 return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
81 ipv6_hdr(skb)->saddr.s6_addr32, 76 ipv6_hdr(skb)->saddr.s6_addr32,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f1d27f6c935..283c0a26e03 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1718 1718
1719 pmc->sfcount[sfmode]--; 1719 pmc->sfcount[sfmode]--;
1720 for (j=0; j<i; j++) 1720 for (j=0; j<i; j++)
1721 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); 1721 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
1722 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 1722 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1723#ifdef CONFIG_IP_MULTICAST 1723#ifdef CONFIG_IP_MULTICAST
1724 struct ip_sf_list *psf; 1724 struct ip_sf_list *psf;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3c0369a3a66..984ec656b03 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -21,6 +21,7 @@
21 21
22#include <net/inet_connection_sock.h> 22#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 23#include <net/inet_hashtables.h>
24#include <net/secure_seq.h>
24#include <net/ip.h> 25#include <net/ip.h>
25 26
26/* 27/*
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e38213817d0..86f13c67ea8 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
19#include <linux/net.h> 19#include <linux/net.h>
20#include <net/ip.h> 20#include <net/ip.h>
21#include <net/inetpeer.h> 21#include <net/inetpeer.h>
22#include <net/secure_seq.h>
22 23
23/* 24/*
24 * Theory of operations. 25 * Theory of operations.
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ccaaa851ab4..77d3eded665 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
204 skb = skb2; 204 skb = skb2;
205 } 205 }
206 206
207 rcu_read_lock();
207 neigh = dst_get_neighbour(dst); 208 neigh = dst_get_neighbour(dst);
208 if (neigh) 209 if (neigh) {
209 return neigh_output(neigh, skb); 210 int res = neigh_output(neigh, skb);
211
212 rcu_read_unlock();
213 return res;
214 }
215 rcu_read_unlock();
210 216
211 if (net_ratelimit()) 217 if (net_ratelimit())
212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 218 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 3e61faf23a9..f52d41ea069 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -12,6 +12,7 @@
12#include <linux/ip.h> 12#include <linux/ip.h>
13 13
14#include <linux/netfilter.h> 14#include <linux/netfilter.h>
15#include <net/secure_seq.h>
15#include <net/netfilter/nf_nat.h> 16#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h> 17#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h> 18#include <net/netfilter/nf_nat_rule.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1730689f560..e3dec1c9f09 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111#include <net/atmclip.h> 111#include <net/atmclip.h>
112#include <net/secure_seq.h>
112 113
113#define RT_FL_TOS(oldflp4) \ 114#define RT_FL_TOS(oldflp4) \
114 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 115 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
@@ -1628,16 +1629,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1628{ 1629{
1629 struct rtable *rt = (struct rtable *) dst; 1630 struct rtable *rt = (struct rtable *) dst;
1630 __be32 orig_gw = rt->rt_gateway; 1631 __be32 orig_gw = rt->rt_gateway;
1631 struct neighbour *n; 1632 struct neighbour *n, *old_n;
1632 1633
1633 dst_confirm(&rt->dst); 1634 dst_confirm(&rt->dst);
1634 1635
1635 neigh_release(dst_get_neighbour(&rt->dst));
1636 dst_set_neighbour(&rt->dst, NULL);
1637
1638 rt->rt_gateway = peer->redirect_learned.a4; 1636 rt->rt_gateway = peer->redirect_learned.a4;
1639 rt_bind_neighbour(rt); 1637
1640 n = dst_get_neighbour(&rt->dst); 1638 n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1639 if (IS_ERR(n))
1640 return PTR_ERR(n);
1641 old_n = xchg(&rt->dst._neighbour, n);
1642 if (old_n)
1643 neigh_release(old_n);
1641 if (!n || !(n->nud_state & NUD_VALID)) { 1644 if (!n || !(n->nud_state & NUD_VALID)) {
1642 if (n) 1645 if (n)
1643 neigh_event_send(n, NULL); 1646 neigh_event_send(n, NULL);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 955b8e65b69..1c12b8ec849 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,6 +72,7 @@
72#include <net/timewait_sock.h> 72#include <net/timewait_sock.h>
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74#include <net/netdma.h> 74#include <net/netdma.h>
75#include <net/secure_seq.h>
75 76
76#include <linux/inet.h> 77#include <linux/inet.h>
77#include <linux/ipv6.h> 78#include <linux/ipv6.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a55500cc0b2..f012ebd87b4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
656 * layer address of our nexhop router 656 * layer address of our nexhop router
657 */ 657 */
658 658
659 if (dst_get_neighbour(&rt->dst) == NULL) 659 if (dst_get_neighbour_raw(&rt->dst) == NULL)
660 ifa->flags &= ~IFA_F_OPTIMISTIC; 660 ifa->flags &= ~IFA_F_OPTIMISTIC;
661 661
662 ifa->idev = idev; 662 ifa->idev = idev;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 16560336eb7..9ef1831746e 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -33,6 +33,11 @@
33#include <linux/errqueue.h> 33#include <linux/errqueue.h>
34#include <asm/uaccess.h> 34#include <asm/uaccess.h>
35 35
36static inline int ipv6_mapped_addr_any(const struct in6_addr *a)
37{
38 return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0));
39}
40
36int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 41int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
37{ 42{
38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 43 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
@@ -102,10 +107,12 @@ ipv4_connected:
102 107
103 ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); 108 ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
104 109
105 if (ipv6_addr_any(&np->saddr)) 110 if (ipv6_addr_any(&np->saddr) ||
111 ipv6_mapped_addr_any(&np->saddr))
106 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); 112 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
107 113
108 if (ipv6_addr_any(&np->rcv_saddr)) { 114 if (ipv6_addr_any(&np->rcv_saddr) ||
115 ipv6_mapped_addr_any(&np->rcv_saddr)) {
109 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, 116 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
110 &np->rcv_saddr); 117 &np->rcv_saddr);
111 if (sk->sk_prot->rehash) 118 if (sk->sk_prot->rehash)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b5319723370..73f1a00a96a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -20,6 +20,7 @@
20#include <net/inet_connection_sock.h> 20#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 21#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 22#include <net/inet6_hashtables.h>
23#include <net/secure_seq.h>
23#include <net/ip.h> 24#include <net/ip.h>
24 25
25int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) 26int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 54a4678955b..320d91d20ad 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1455 RT6_TRACE("aging clone %p\n", rt); 1455 RT6_TRACE("aging clone %p\n", rt);
1456 return -1; 1456 return -1;
1457 } else if ((rt->rt6i_flags & RTF_GATEWAY) && 1457 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1458 (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { 1458 (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) {
1459 RT6_TRACE("purging route %p via non-router but gateway\n", 1459 RT6_TRACE("purging route %p via non-router but gateway\n",
1460 rt); 1460 rt);
1461 return -1; 1461 return -1;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 32e5339db0c..4c882cf4e8a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb)
135 skb->len); 135 skb->len);
136 } 136 }
137 137
138 rcu_read_lock();
138 neigh = dst_get_neighbour(dst); 139 neigh = dst_get_neighbour(dst);
139 if (neigh) 140 if (neigh) {
140 return neigh_output(neigh, skb); 141 int res = neigh_output(neigh, skb);
141 142
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
142 IP6_INC_STATS_BH(dev_net(dst->dev), 147 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb); 149 kfree_skb(skb);
@@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk,
975 * dst entry and replace it instead with the 980 * dst entry and replace it instead with the
976 * dst entry of the nexthop router 981 * dst entry of the nexthop router
977 */ 982 */
983 rcu_read_lock();
978 n = dst_get_neighbour(*dst); 984 n = dst_get_neighbour(*dst);
979 if (n && !(n->nud_state & NUD_VALID)) { 985 if (n && !(n->nud_state & NUD_VALID)) {
980 struct inet6_ifaddr *ifp; 986 struct inet6_ifaddr *ifp;
981 struct flowi6 fl_gw6; 987 struct flowi6 fl_gw6;
982 int redirect; 988 int redirect;
983 989
990 rcu_read_unlock();
984 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
985 (*dst)->dev, 1); 992 (*dst)->dev, 1);
986 993
@@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
1000 if ((err = (*dst)->error)) 1007 if ((err = (*dst)->error))
1001 goto out_err_release; 1008 goto out_err_release;
1002 } 1009 }
1010 } else {
1011 rcu_read_unlock();
1003 } 1012 }
1004#endif 1013#endif
1005 1014
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e8987da0666..9e69eb0ec6d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -364,7 +364,7 @@ out:
364#ifdef CONFIG_IPV6_ROUTER_PREF 364#ifdef CONFIG_IPV6_ROUTER_PREF
365static void rt6_probe(struct rt6_info *rt) 365static void rt6_probe(struct rt6_info *rt)
366{ 366{
367 struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; 367 struct neighbour *neigh;
368 /* 368 /*
369 * Okay, this does not seem to be appropriate 369 * Okay, this does not seem to be appropriate
370 * for now, however, we need to check if it 370 * for now, however, we need to check if it
@@ -373,8 +373,10 @@ static void rt6_probe(struct rt6_info *rt)
373 * Router Reachability Probe MUST be rate-limited 373 * Router Reachability Probe MUST be rate-limited
374 * to no more than one per minute. 374 * to no more than one per minute.
375 */ 375 */
376 rcu_read_lock();
377 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
376 if (!neigh || (neigh->nud_state & NUD_VALID)) 378 if (!neigh || (neigh->nud_state & NUD_VALID))
377 return; 379 goto out;
378 read_lock_bh(&neigh->lock); 380 read_lock_bh(&neigh->lock);
379 if (!(neigh->nud_state & NUD_VALID) && 381 if (!(neigh->nud_state & NUD_VALID) &&
380 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 382 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -387,8 +389,11 @@ static void rt6_probe(struct rt6_info *rt)
387 target = (struct in6_addr *)&neigh->primary_key; 389 target = (struct in6_addr *)&neigh->primary_key;
388 addrconf_addr_solict_mult(target, &mcaddr); 390 addrconf_addr_solict_mult(target, &mcaddr);
389 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 391 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
390 } else 392 } else {
391 read_unlock_bh(&neigh->lock); 393 read_unlock_bh(&neigh->lock);
394 }
395out:
396 rcu_read_unlock();
392} 397}
393#else 398#else
394static inline void rt6_probe(struct rt6_info *rt) 399static inline void rt6_probe(struct rt6_info *rt)
@@ -412,8 +417,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
412 417
413static inline int rt6_check_neigh(struct rt6_info *rt) 418static inline int rt6_check_neigh(struct rt6_info *rt)
414{ 419{
415 struct neighbour *neigh = dst_get_neighbour(&rt->dst); 420 struct neighbour *neigh;
416 int m; 421 int m;
422
423 rcu_read_lock();
424 neigh = dst_get_neighbour(&rt->dst);
417 if (rt->rt6i_flags & RTF_NONEXTHOP || 425 if (rt->rt6i_flags & RTF_NONEXTHOP ||
418 !(rt->rt6i_flags & RTF_GATEWAY)) 426 !(rt->rt6i_flags & RTF_GATEWAY))
419 m = 1; 427 m = 1;
@@ -430,6 +438,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
430 read_unlock_bh(&neigh->lock); 438 read_unlock_bh(&neigh->lock);
431 } else 439 } else
432 m = 0; 440 m = 0;
441 rcu_read_unlock();
433 return m; 442 return m;
434} 443}
435 444
@@ -769,7 +778,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
769 rt->rt6i_dst.plen = 128; 778 rt->rt6i_dst.plen = 128;
770 rt->rt6i_flags |= RTF_CACHE; 779 rt->rt6i_flags |= RTF_CACHE;
771 rt->dst.flags |= DST_HOST; 780 rt->dst.flags |= DST_HOST;
772 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); 781 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
773 } 782 }
774 return rt; 783 return rt;
775} 784}
@@ -803,7 +812,7 @@ restart:
803 dst_hold(&rt->dst); 812 dst_hold(&rt->dst);
804 read_unlock_bh(&table->tb6_lock); 813 read_unlock_bh(&table->tb6_lock);
805 814
806 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 815 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
807 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 816 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
808 else if (!(rt->dst.flags & DST_HOST)) 817 else if (!(rt->dst.flags & DST_HOST))
809 nrt = rt6_alloc_clone(rt, &fl6->daddr); 818 nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -1587,7 +1596,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1587 dst_confirm(&rt->dst); 1596 dst_confirm(&rt->dst);
1588 1597
1589 /* Duplicate redirect: silently ignore. */ 1598 /* Duplicate redirect: silently ignore. */
1590 if (neigh == dst_get_neighbour(&rt->dst)) 1599 if (neigh == dst_get_neighbour_raw(&rt->dst))
1591 goto out; 1600 goto out;
1592 1601
1593 nrt = ip6_rt_copy(rt, dest); 1602 nrt = ip6_rt_copy(rt, dest);
@@ -1682,7 +1691,7 @@ again:
1682 1. It is connected route. Action: COW 1691 1. It is connected route. Action: COW
1683 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1692 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1684 */ 1693 */
1685 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1694 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1686 nrt = rt6_alloc_cow(rt, daddr, saddr); 1695 nrt = rt6_alloc_cow(rt, daddr, saddr);
1687 else 1696 else
1688 nrt = rt6_alloc_clone(rt, daddr); 1697 nrt = rt6_alloc_clone(rt, daddr);
@@ -2326,6 +2335,7 @@ static int rt6_fill_node(struct net *net,
2326 struct nlmsghdr *nlh; 2335 struct nlmsghdr *nlh;
2327 long expires; 2336 long expires;
2328 u32 table; 2337 u32 table;
2338 struct neighbour *n;
2329 2339
2330 if (prefix) { /* user wants prefix routes only */ 2340 if (prefix) { /* user wants prefix routes only */
2331 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2341 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2414,8 +2424,11 @@ static int rt6_fill_node(struct net *net,
2414 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2424 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2415 goto nla_put_failure; 2425 goto nla_put_failure;
2416 2426
2417 if (dst_get_neighbour(&rt->dst)) 2427 rcu_read_lock();
2418 NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); 2428 n = dst_get_neighbour(&rt->dst);
2429 if (n)
2430 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2431 rcu_read_unlock();
2419 2432
2420 if (rt->dst.dev) 2433 if (rt->dst.dev)
2421 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2434 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
@@ -2608,12 +2621,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2608#else 2621#else
2609 seq_puts(m, "00000000000000000000000000000000 00 "); 2622 seq_puts(m, "00000000000000000000000000000000 00 ");
2610#endif 2623#endif
2624 rcu_read_lock();
2611 n = dst_get_neighbour(&rt->dst); 2625 n = dst_get_neighbour(&rt->dst);
2612 if (n) { 2626 if (n) {
2613 seq_printf(m, "%pi6", n->primary_key); 2627 seq_printf(m, "%pi6", n->primary_key);
2614 } else { 2628 } else {
2615 seq_puts(m, "00000000000000000000000000000000"); 2629 seq_puts(m, "00000000000000000000000000000000");
2616 } 2630 }
2631 rcu_read_unlock();
2617 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2632 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2618 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2633 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2619 rt->dst.__use, rt->rt6i_flags, 2634 rt->dst.__use, rt->rt6i_flags,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 78aa53492b3..d1fb63f4aeb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -61,6 +61,7 @@
61#include <net/timewait_sock.h> 61#include <net/timewait_sock.h>
62#include <net/netdma.h> 62#include <net/netdma.h>
63#include <net/inet_common.h> 63#include <net/inet_common.h>
64#include <net/secure_seq.h>
64 65
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
66 67
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index be43fd805bd..2b771dc708a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3771,6 +3771,7 @@ err_sock:
3771void ip_vs_control_cleanup(void) 3771void ip_vs_control_cleanup(void)
3772{ 3772{
3773 EnterFunction(2); 3773 EnterFunction(2);
3774 unregister_netdevice_notifier(&ip_vs_dst_notifier);
3774 ip_vs_genl_unregister(); 3775 ip_vs_genl_unregister();
3775 nf_unregister_sockopt(&ip_vs_sockopts); 3776 nf_unregister_sockopt(&ip_vs_sockopts);
3776 LeaveFunction(2); 3777 LeaveFunction(2);
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index ea750e9df65..d2732fc952e 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,8 +1,6 @@
1# 1#
2# Makefile for the NetLabel subsystem. 2# Makefile for the NetLabel subsystem.
3# 3#
4# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
5#
6 4
7# base objects 5# base objects
8obj-y := netlabel_user.o netlabel_kapi.o 6obj-y := netlabel_user.o netlabel_kapi.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index c0519139679..96b749dacc3 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 2b9644e19de..fdbc1d2c735 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index dd53a36d89a..6bf878335d9 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index af7f3355103..d24d774bfd6 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 2aa975e5452..7d8083cde34 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 0261dda3f2d..bfcc0f7024c 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index b528dd928d3..58107d06084 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -5,7 +5,7 @@
5 * system manages static and dynamic label mappings for network protocols such 5 * system manages static and dynamic label mappings for network protocols such
6 * as CIPSO and RIPSO. 6 * as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index dff8a080924..bfa55586977 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index 8db37f4c10f..5a9f31ce579 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index f1ecf848e3a..e6e823656f9 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -5,7 +5,7 @@
5 * NetLabel system. The NetLabel system manages static and dynamic label 5 * NetLabel system. The NetLabel system manages static and dynamic label
6 * mappings for network protocols such as CIPSO and RIPSO. 6 * mappings for network protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index 0bc8dc3f9e3..700af49022a 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -5,7 +5,7 @@
5 * NetLabel system. The NetLabel system manages static and dynamic label 5 * NetLabel system. The NetLabel system manages static and dynamic label
6 * mappings for network protocols such as CIPSO and RIPSO. 6 * mappings for network protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index a3fd75ac3fa..9fae63f1029 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index f4fc4c9ad56..81969785e27 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4536ee64383..4f5510e2bd6 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
410 /* Return Congestion Notification only if we dropped a packet 410 /* Return Congestion Notification only if we dropped a packet
411 * from this flow. 411 * from this flow.
412 */ 412 */
413 return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; 413 if (qlen != slot->qlen)
414 return NET_XMIT_CN;
415
416 /* As we dropped a packet, better let upper stack know this */
417 qdisc_tree_decrease_qlen(sch, 1);
418 return NET_XMIT_SUCCESS;
414} 419}
415 420
416static struct sk_buff * 421static struct sk_buff *
diff --git a/net/socket.c b/net/socket.c
index b1cbbcd9255..24a77400b65 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1873 1873
1874struct used_address {
1875 struct sockaddr_storage name;
1876 unsigned int name_len;
1877};
1878
1874static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, 1879static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1875 struct msghdr *msg_sys, unsigned flags, int nosec) 1880 struct msghdr *msg_sys, unsigned flags,
1881 struct used_address *used_address)
1876{ 1882{
1877 struct compat_msghdr __user *msg_compat = 1883 struct compat_msghdr __user *msg_compat =
1878 (struct compat_msghdr __user *)msg; 1884 (struct compat_msghdr __user *)msg;
@@ -1953,8 +1959,28 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1953 1959
1954 if (sock->file->f_flags & O_NONBLOCK) 1960 if (sock->file->f_flags & O_NONBLOCK)
1955 msg_sys->msg_flags |= MSG_DONTWAIT; 1961 msg_sys->msg_flags |= MSG_DONTWAIT;
1956 err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, 1962 /*
1957 total_len); 1963 * If this is sendmmsg() and current destination address is same as
1964 * previously succeeded address, omit asking LSM's decision.
1965 * used_address->name_len is initialized to UINT_MAX so that the first
1966 * destination address never matches.
1967 */
1968 if (used_address && used_address->name_len == msg_sys->msg_namelen &&
1969 !memcmp(&used_address->name, msg->msg_name,
1970 used_address->name_len)) {
1971 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1972 goto out_freectl;
1973 }
1974 err = sock_sendmsg(sock, msg_sys, total_len);
1975 /*
1976 * If this is sendmmsg() and sending to current destination address was
1977 * successful, remember it.
1978 */
1979 if (used_address && err >= 0) {
1980 used_address->name_len = msg_sys->msg_namelen;
1981 memcpy(&used_address->name, msg->msg_name,
1982 used_address->name_len);
1983 }
1958 1984
1959out_freectl: 1985out_freectl:
1960 if (ctl_buf != ctl) 1986 if (ctl_buf != ctl)
@@ -1979,7 +2005,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1979 if (!sock) 2005 if (!sock)
1980 goto out; 2006 goto out;
1981 2007
1982 err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); 2008 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
1983 2009
1984 fput_light(sock->file, fput_needed); 2010 fput_light(sock->file, fput_needed);
1985out: 2011out:
@@ -1998,6 +2024,10 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
1998 struct mmsghdr __user *entry; 2024 struct mmsghdr __user *entry;
1999 struct compat_mmsghdr __user *compat_entry; 2025 struct compat_mmsghdr __user *compat_entry;
2000 struct msghdr msg_sys; 2026 struct msghdr msg_sys;
2027 struct used_address used_address;
2028
2029 if (vlen > UIO_MAXIOV)
2030 vlen = UIO_MAXIOV;
2001 2031
2002 datagrams = 0; 2032 datagrams = 0;
2003 2033
@@ -2005,27 +2035,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2005 if (!sock) 2035 if (!sock)
2006 return err; 2036 return err;
2007 2037
2008 err = sock_error(sock->sk); 2038 used_address.name_len = UINT_MAX;
2009 if (err)
2010 goto out_put;
2011
2012 entry = mmsg; 2039 entry = mmsg;
2013 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2040 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2041 err = 0;
2014 2042
2015 while (datagrams < vlen) { 2043 while (datagrams < vlen) {
2016 /*
2017 * No need to ask LSM for more than the first datagram.
2018 */
2019 if (MSG_CMSG_COMPAT & flags) { 2044 if (MSG_CMSG_COMPAT & flags) {
2020 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, 2045 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2021 &msg_sys, flags, datagrams); 2046 &msg_sys, flags, &used_address);
2022 if (err < 0) 2047 if (err < 0)
2023 break; 2048 break;
2024 err = __put_user(err, &compat_entry->msg_len); 2049 err = __put_user(err, &compat_entry->msg_len);
2025 ++compat_entry; 2050 ++compat_entry;
2026 } else { 2051 } else {
2027 err = __sys_sendmsg(sock, (struct msghdr __user *)entry, 2052 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2028 &msg_sys, flags, datagrams); 2053 &msg_sys, flags, &used_address);
2029 if (err < 0) 2054 if (err < 0)
2030 break; 2055 break;
2031 err = put_user(err, &entry->msg_len); 2056 err = put_user(err, &entry->msg_len);
@@ -2037,29 +2062,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2037 ++datagrams; 2062 ++datagrams;
2038 } 2063 }
2039 2064
2040out_put:
2041 fput_light(sock->file, fput_needed); 2065 fput_light(sock->file, fput_needed);
2042 2066
2043 if (err == 0) 2067 /* We only return an error if no datagrams were able to be sent */
2044 return datagrams; 2068 if (datagrams != 0)
2045
2046 if (datagrams != 0) {
2047 /*
2048 * We may send less entries than requested (vlen) if the
2049 * sock is non blocking...
2050 */
2051 if (err != -EAGAIN) {
2052 /*
2053 * ... or if sendmsg returns an error after we
2054 * send some datagrams, where we record the
2055 * error to return on the next call or if the
2056 * app asks about it using getsockopt(SO_ERROR).
2057 */
2058 sock->sk->sk_err = -err;
2059 }
2060
2061 return datagrams; 2069 return datagrams;
2062 }
2063 2070
2064 return err; 2071 return err;
2065} 2072}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9b6a4d1ea8f..f4385e45a5f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
187/** 187/**
188 * xprt_reserve_xprt - serialize write access to transports 188 * xprt_reserve_xprt - serialize write access to transports
189 * @task: task that is requesting access to the transport 189 * @task: task that is requesting access to the transport
190 * @xprt: pointer to the target transport
190 * 191 *
191 * This prevents mixing the payload of separate requests, and prevents 192 * This prevents mixing the payload of separate requests, and prevents
192 * transport connects from colliding with writes. No congestion control 193 * transport connects from colliding with writes. No congestion control
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 28d2aa109be..e83e7fee3bc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3464,7 +3464,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3464 tmp) { 3464 tmp) {
3465 enum ieee80211_band band = nla_type(attr); 3465 enum ieee80211_band band = nla_type(attr);
3466 3466
3467 if (band < 0 || band > IEEE80211_NUM_BANDS) { 3467 if (band < 0 || band >= IEEE80211_NUM_BANDS) {
3468 err = -EINVAL; 3468 err = -EINVAL;
3469 goto out_free; 3469 goto out_free;
3470 } 3470 }
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 58064d9e565..791ab2e77f3 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -462,8 +462,8 @@ static struct xfrm_algo_desc ealg_list[] = {
462 .desc = { 462 .desc = {
463 .sadb_alg_id = SADB_X_EALG_AESCTR, 463 .sadb_alg_id = SADB_X_EALG_AESCTR,
464 .sadb_alg_ivlen = 8, 464 .sadb_alg_ivlen = 8,
465 .sadb_alg_minbits = 128, 465 .sadb_alg_minbits = 160,
466 .sadb_alg_maxbits = 256 466 .sadb_alg_maxbits = 288
467 } 467 }
468}, 468},
469}; 469};
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a38316b2e3f..266a2292451 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -14,7 +14,7 @@
14 * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. 14 * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
15 * <dgoeddel@trustedcs.com> 15 * <dgoeddel@trustedcs.com>
16 * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P. 16 * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P.
17 * Paul Moore <paul.moore@hp.com> 17 * Paul Moore <paul@paul-moore.com>
18 * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. 18 * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd.
19 * Yuichi Nakamura <ynakam@hitachisoft.jp> 19 * Yuichi Nakamura <ynakam@hitachisoft.jp>
20 * 20 *
diff --git a/security/selinux/include/netif.h b/security/selinux/include/netif.h
index ce23edd128b..43d507242b4 100644
--- a/security/selinux/include/netif.h
+++ b/security/selinux/include/netif.h
@@ -8,7 +8,7 @@
8 * 8 *
9 * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com> 9 * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
10 * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. 10 * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
11 * Paul Moore, <paul.moore@hp.com> 11 * Paul Moore <paul@paul-moore.com>
12 * 12 *
13 * This program is free software; you can redistribute it and/or modify 13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2, 14 * it under the terms of the GNU General Public License version 2,
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index cf2f628e6e2..8c59b8f150e 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * SELinux interface to the NetLabel subsystem 2 * SELinux interface to the NetLabel subsystem
3 * 3 *
4 * Author : Paul Moore <paul.moore@hp.com> 4 * Author: Paul Moore <paul@paul-moore.com>
5 * 5 *
6 */ 6 */
7 7
diff --git a/security/selinux/include/netnode.h b/security/selinux/include/netnode.h
index 1b94450d11d..df7a5ed6c69 100644
--- a/security/selinux/include/netnode.h
+++ b/security/selinux/include/netnode.h
@@ -6,7 +6,7 @@
6 * needed to reduce the lookup overhead since most of these queries happen on 6 * needed to reduce the lookup overhead since most of these queries happen on
7 * a per-packet basis. 7 * a per-packet basis.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/security/selinux/include/netport.h b/security/selinux/include/netport.h
index 8991752eaf9..4d965b83d73 100644
--- a/security/selinux/include/netport.h
+++ b/security/selinux/include/netport.h
@@ -5,7 +5,7 @@
5 * mapping is maintained as part of the normal policy but a fast cache is 5 * mapping is maintained as part of the normal policy but a fast cache is
6 * needed to reduce the lookup overhead. 6 * needed to reduce the lookup overhead.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/security/selinux/netif.c b/security/selinux/netif.c
index 58cc481c93d..326f22cbe40 100644
--- a/security/selinux/netif.c
+++ b/security/selinux/netif.c
@@ -8,7 +8,7 @@
8 * 8 *
9 * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com> 9 * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
10 * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. 10 * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
11 * Paul Moore <paul.moore@hp.com> 11 * Paul Moore <paul@paul-moore.com>
12 * 12 *
13 * This program is free software; you can redistribute it and/or modify 13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2, 14 * it under the terms of the GNU General Public License version 2,
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index c3bf3ed07b0..da4b8b23328 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -4,7 +4,7 @@
4 * This file provides the necessary glue to tie NetLabel into the SELinux 4 * This file provides the necessary glue to tie NetLabel into the SELinux
5 * subsystem. 5 * subsystem.
6 * 6 *
7 * Author: Paul Moore <paul.moore@hp.com> 7 * Author: Paul Moore <paul@paul-moore.com>
8 * 8 *
9 */ 9 */
10 10
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 8b691a86318..3bf46abaa68 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -6,7 +6,7 @@
6 * needed to reduce the lookup overhead since most of these queries happen on 6 * needed to reduce the lookup overhead since most of these queries happen on
7 * a per-packet basis. 7 * a per-packet basis.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 * This code is heavily based on the "netif" concept originally developed by 11 * This code is heavily based on the "netif" concept originally developed by
12 * James Morris <jmorris@redhat.com> 12 * James Morris <jmorris@redhat.com>
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
index ae76e298de7..0b62bd11246 100644
--- a/security/selinux/netport.c
+++ b/security/selinux/netport.c
@@ -5,7 +5,7 @@
5 * mapping is maintained as part of the normal policy but a fast cache is 5 * mapping is maintained as part of the normal policy but a fast cache is
6 * needed to reduce the lookup overhead. 6 * needed to reduce the lookup overhead.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 * This code is heavily based on the "netif" concept originally developed by 10 * This code is heavily based on the "netif" concept originally developed by
11 * James Morris <jmorris@redhat.com> 11 * James Morris <jmorris@redhat.com>
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index de7900ef53d..55d92cbb177 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2,7 +2,7 @@
2 * 2 *
3 * Added conditional policy language extensions 3 * Added conditional policy language extensions
4 * 4 *
5 * Updated: Hewlett-Packard <paul.moore@hp.com> 5 * Updated: Hewlett-Packard <paul@paul-moore.com>
6 * 6 *
7 * Added support for the policy capability bitmap 7 * Added support for the policy capability bitmap
8 * 8 *
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index d42951fcbe8..30f119b1d1e 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -4,7 +4,7 @@
4 * Author : Stephen Smalley, <sds@epoch.ncsc.mil> 4 * Author : Stephen Smalley, <sds@epoch.ncsc.mil>
5 */ 5 */
6/* 6/*
7 * Updated: Hewlett-Packard <paul.moore@hp.com> 7 * Updated: Hewlett-Packard <paul@paul-moore.com>
8 * 8 *
9 * Added support to import/export the NetLabel category bitmap 9 * Added support to import/export the NetLabel category bitmap
10 * 10 *
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index e96174216bc..fbf9c5816c7 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -11,7 +11,7 @@
11 * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. 11 * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
12 */ 12 */
13/* 13/*
14 * Updated: Hewlett-Packard <paul.moore@hp.com> 14 * Updated: Hewlett-Packard <paul@paul-moore.com>
15 * 15 *
16 * Added support to import/export the MLS label from NetLabel 16 * Added support to import/export the MLS label from NetLabel
17 * 17 *
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index 037bf9d82d4..e4369e3e636 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -11,7 +11,7 @@
11 * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. 11 * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
12 */ 12 */
13/* 13/*
14 * Updated: Hewlett-Packard <paul.moore@hp.com> 14 * Updated: Hewlett-Packard <paul@paul-moore.com>
15 * 15 *
16 * Added support to import/export the MLS label from NetLabel 16 * Added support to import/export the MLS label from NetLabel
17 * 17 *
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index d246aca3f4f..2381d0ded22 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -13,7 +13,7 @@
13 * 13 *
14 * Added conditional policy language extensions 14 * Added conditional policy language extensions
15 * 15 *
16 * Updated: Hewlett-Packard <paul.moore@hp.com> 16 * Updated: Hewlett-Packard <paul@paul-moore.com>
17 * 17 *
18 * Added support for the policy capability bitmap 18 * Added support for the policy capability bitmap
19 * 19 *
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 973e00e34fa..f6917bc0aa0 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -13,7 +13,7 @@
13 * 13 *
14 * Added conditional policy language extensions 14 * Added conditional policy language extensions
15 * 15 *
16 * Updated: Hewlett-Packard <paul.moore@hp.com> 16 * Updated: Hewlett-Packard <paul@paul-moore.com>
17 * 17 *
18 * Added support for NetLabel 18 * Added support for NetLabel
19 * Added support for the policy capability bitmap 19 * Added support for the policy capability bitmap
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index f375eb2e195..b9c5e149903 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com> 10 * Copyright (C) 2007 Casey Schaufler <casey@schaufler-ca.com>
11 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. 11 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
12 * Paul Moore <paul.moore@hp.com> 12 * Paul Moore <paul@paul-moore.com>
13 * Copyright (C) 2010 Nokia Corporation 13 * Copyright (C) 2010 Nokia Corporation
14 * 14 *
15 * This program is free software; you can redistribute it and/or modify 15 * This program is free software; you can redistribute it and/or modify
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 5fb2e28e796..91cdf9435fe 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -342,7 +342,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream,
342 kfree(bufs); 342 kfree(bufs);
343 return -EFAULT; 343 return -EFAULT;
344 } 344 }
345 bufs[ch] = compat_ptr(ptr); 345 bufs[i] = compat_ptr(ptr);
346 bufptr++; 346 bufptr++;
347 } 347 }
348 if (dir == SNDRV_PCM_STREAM_PLAYBACK) 348 if (dir == SNDRV_PCM_STREAM_PLAYBACK)
diff --git a/sound/core/rtctimer.c b/sound/core/rtctimer.c
index 0851cd13e30..e85e72baff9 100644
--- a/sound/core/rtctimer.c
+++ b/sound/core/rtctimer.c
@@ -22,7 +22,7 @@
22 22
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/moduleparam.h> 25#include <linux/module.h>
26#include <linux/log2.h> 26#include <linux/log2.h>
27#include <sound/core.h> 27#include <sound/core.h>
28#include <sound/timer.h> 28#include <sound/timer.h>
diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c
index 3a7afa31c1d..71d32c868c9 100644
--- a/sound/pci/asihpi/hpidspcd.c
+++ b/sound/pci/asihpi/hpidspcd.c
@@ -43,6 +43,7 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
43 struct pci_dev *dev = os_data; 43 struct pci_dev *dev = os_data;
44 struct code_header header; 44 struct code_header header;
45 char fw_name[20]; 45 char fw_name[20];
46 short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND;
46 int err; 47 int err;
47 48
48 sprintf(fw_name, "asihpi/dsp%04x.bin", adapter); 49 sprintf(fw_name, "asihpi/dsp%04x.bin", adapter);
@@ -85,8 +86,10 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
85 86
86 HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name); 87 HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name);
87 dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL); 88 dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL);
88 if (!dsp_code->pvt) 89 if (!dsp_code->pvt) {
89 return HPI_ERROR_MEMORY_ALLOC; 90 err_ret = HPI_ERROR_MEMORY_ALLOC;
91 goto error2;
92 }
90 93
91 dsp_code->pvt->dev = dev; 94 dsp_code->pvt->dev = dev;
92 dsp_code->pvt->firmware = firmware; 95 dsp_code->pvt->firmware = firmware;
@@ -99,7 +102,7 @@ error2:
99 release_firmware(firmware); 102 release_firmware(firmware);
100error1: 103error1:
101 dsp_code->block_length = 0; 104 dsp_code->block_length = 0;
102 return HPI_ERROR_DSP_FILE_NOT_FOUND; 105 return err_ret;
103} 106}
104 107
105/*-------------------------------------------------------------------*/ 108/*-------------------------------------------------------------------*/
diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index 9683f84ecdc..a32502e796d 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -177,16 +177,21 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
177 } else { 177 } else {
178 u16 __user *ptr = NULL; 178 u16 __user *ptr = NULL;
179 u32 size = 0; 179 u32 size = 0;
180 180 u32 adapter_present;
181 /* -1=no data 0=read from user mem, 1=write to user mem */ 181 /* -1=no data 0=read from user mem, 1=write to user mem */
182 int wrflag = -1; 182 int wrflag = -1;
183 u32 adapter = hm->h.adapter_index; 183 struct hpi_adapter *pa;
184 struct hpi_adapter *pa = &adapters[adapter]; 184
185 if (hm->h.adapter_index < HPI_MAX_ADAPTERS) {
186 pa = &adapters[hm->h.adapter_index];
187 adapter_present = pa->type;
188 } else {
189 adapter_present = 0;
190 }
185 191
186 if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) { 192 if (!adapter_present) {
187 hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER, 193 hpi_init_response(&hr->r0, hm->h.object,
188 HPI_ADAPTER_OPEN, 194 hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER);
189 HPI_ERROR_BAD_ADAPTER_NUMBER);
190 195
191 uncopied_bytes = 196 uncopied_bytes =
192 copy_to_user(puhr, hr, sizeof(hr->h)); 197 copy_to_user(puhr, hr, sizeof(hr->h));
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index af130ee0c45..6edc67ced90 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -521,6 +521,7 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}");
521#define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024) 521#define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024)
522 522
523/* revisions >= 230 indicate AES32 card */ 523/* revisions >= 230 indicate AES32 card */
524#define HDSPM_MADI_ANCIENT_REV 204
524#define HDSPM_MADI_OLD_REV 207 525#define HDSPM_MADI_OLD_REV 207
525#define HDSPM_MADI_REV 210 526#define HDSPM_MADI_REV 210
526#define HDSPM_RAYDAT_REV 211 527#define HDSPM_RAYDAT_REV 211
@@ -1217,6 +1218,22 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm)
1217 rate = 0; 1218 rate = 0;
1218 break; 1219 break;
1219 } 1220 }
1221
1222 /* QS and DS rates normally can not be detected
1223 * automatically by the card. Only exception is MADI
1224 * in 96k frame mode.
1225 *
1226 * So if we read SS values (32 .. 48k), check for
1227 * user-provided DS/QS bits in the control register
1228 * and multiply the base frequency accordingly.
1229 */
1230 if (rate <= 48000) {
1231 if (hdspm->control_register & HDSPM_QuadSpeed)
1232 rate *= 4;
1233 else if (hdspm->control_register &
1234 HDSPM_DoubleSpeed)
1235 rate *= 2;
1236 }
1220 } 1237 }
1221 break; 1238 break;
1222 } 1239 }
@@ -3415,6 +3432,91 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol,
3415 return change; 3432 return change;
3416} 3433}
3417 3434
3435#define HDSPM_MADI_SPEEDMODE(xname, xindex) \
3436{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
3437 .name = xname, \
3438 .index = xindex, \
3439 .info = snd_hdspm_info_madi_speedmode, \
3440 .get = snd_hdspm_get_madi_speedmode, \
3441 .put = snd_hdspm_put_madi_speedmode \
3442}
3443
3444static int hdspm_madi_speedmode(struct hdspm *hdspm)
3445{
3446 if (hdspm->control_register & HDSPM_QuadSpeed)
3447 return 2;
3448 if (hdspm->control_register & HDSPM_DoubleSpeed)
3449 return 1;
3450 return 0;
3451}
3452
3453static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode)
3454{
3455 hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed);
3456 switch (mode) {
3457 case 0:
3458 break;
3459 case 1:
3460 hdspm->control_register |= HDSPM_DoubleSpeed;
3461 break;
3462 case 2:
3463 hdspm->control_register |= HDSPM_QuadSpeed;
3464 break;
3465 }
3466 hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register);
3467
3468 return 0;
3469}
3470
3471static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol,
3472 struct snd_ctl_elem_info *uinfo)
3473{
3474 static char *texts[] = { "Single", "Double", "Quad" };
3475
3476 uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
3477 uinfo->count = 1;
3478 uinfo->value.enumerated.items = 3;
3479
3480 if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items)
3481 uinfo->value.enumerated.item =
3482 uinfo->value.enumerated.items - 1;
3483 strcpy(uinfo->value.enumerated.name,
3484 texts[uinfo->value.enumerated.item]);
3485
3486 return 0;
3487}
3488
3489static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol,
3490 struct snd_ctl_elem_value *ucontrol)
3491{
3492 struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
3493
3494 spin_lock_irq(&hdspm->lock);
3495 ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm);
3496 spin_unlock_irq(&hdspm->lock);
3497 return 0;
3498}
3499
3500static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol,
3501 struct snd_ctl_elem_value *ucontrol)
3502{
3503 struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
3504 int change;
3505 int val;
3506
3507 if (!snd_hdspm_use_is_exclusive(hdspm))
3508 return -EBUSY;
3509 val = ucontrol->value.integer.value[0];
3510 if (val < 0)
3511 val = 0;
3512 if (val > 2)
3513 val = 2;
3514 spin_lock_irq(&hdspm->lock);
3515 change = val != hdspm_madi_speedmode(hdspm);
3516 hdspm_set_madi_speedmode(hdspm, val);
3517 spin_unlock_irq(&hdspm->lock);
3518 return change;
3519}
3418 3520
3419#define HDSPM_MIXER(xname, xindex) \ 3521#define HDSPM_MIXER(xname, xindex) \
3420{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ 3522{ .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \
@@ -4289,7 +4391,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = {
4289 HDSPM_TX_64("TX 64 channels mode", 0), 4391 HDSPM_TX_64("TX 64 channels mode", 0),
4290 HDSPM_C_TMS("Clear Track Marker", 0), 4392 HDSPM_C_TMS("Clear Track Marker", 0),
4291 HDSPM_SAFE_MODE("Safe Mode", 0), 4393 HDSPM_SAFE_MODE("Safe Mode", 0),
4292 HDSPM_INPUT_SELECT("Input Select", 0) 4394 HDSPM_INPUT_SELECT("Input Select", 0),
4395 HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0)
4293}; 4396};
4294 4397
4295 4398
@@ -4302,7 +4405,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = {
4302 HDSPM_SYNC_CHECK("MADI SyncCheck", 0), 4405 HDSPM_SYNC_CHECK("MADI SyncCheck", 0),
4303 HDSPM_TX_64("TX 64 channels mode", 0), 4406 HDSPM_TX_64("TX 64 channels mode", 0),
4304 HDSPM_C_TMS("Clear Track Marker", 0), 4407 HDSPM_C_TMS("Clear Track Marker", 0),
4305 HDSPM_SAFE_MODE("Safe Mode", 0) 4408 HDSPM_SAFE_MODE("Safe Mode", 0),
4409 HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0)
4306}; 4410};
4307 4411
4308static struct snd_kcontrol_new snd_hdspm_controls_aio[] = { 4412static struct snd_kcontrol_new snd_hdspm_controls_aio[] = {
@@ -6381,6 +6485,7 @@ static int __devinit snd_hdspm_create(struct snd_card *card,
6381 switch (hdspm->firmware_rev) { 6485 switch (hdspm->firmware_rev) {
6382 case HDSPM_MADI_REV: 6486 case HDSPM_MADI_REV:
6383 case HDSPM_MADI_OLD_REV: 6487 case HDSPM_MADI_OLD_REV:
6488 case HDSPM_MADI_ANCIENT_REV:
6384 hdspm->io_type = MADI; 6489 hdspm->io_type = MADI;
6385 hdspm->card_name = "RME MADI"; 6490 hdspm->card_name = "RME MADI";
6386 hdspm->midiPorts = 3; 6491 hdspm->midiPorts = 3;
diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c
index 34aa972669e..3de99af8cb8 100644
--- a/sound/soc/txx9/txx9aclc.c
+++ b/sound/soc/txx9/txx9aclc.c
@@ -290,6 +290,7 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm)
290 290
291static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd) 291static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd)
292{ 292{
293 struct snd_card *card = rtd->card->snd_card;
293 struct snd_soc_dai *dai = rtd->cpu_dai; 294 struct snd_soc_dai *dai = rtd->cpu_dai;
294 struct snd_pcm *pcm = rtd->pcm; 295 struct snd_pcm *pcm = rtd->pcm;
295 struct platform_device *pdev = to_platform_device(dai->platform->dev); 296 struct platform_device *pdev = to_platform_device(dai->platform->dev);
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6d8ef4a3a9b..8b2d37b59c9 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset)
128void print_header(void) 128void print_header(void)
129{ 129{
130 if (show_pkg) 130 if (show_pkg)
131 fprintf(stderr, "pkg "); 131 fprintf(stderr, "pk");
132 if (show_core) 132 if (show_core)
133 fprintf(stderr, "core"); 133 fprintf(stderr, " cr");
134 if (show_cpu) 134 if (show_cpu)
135 fprintf(stderr, " CPU"); 135 fprintf(stderr, " CPU");
136 if (do_nhm_cstates) 136 if (do_nhm_cstates)
137 fprintf(stderr, " %%c0 "); 137 fprintf(stderr, " %%c0 ");
138 if (has_aperf) 138 if (has_aperf)
139 fprintf(stderr, " GHz"); 139 fprintf(stderr, " GHz");
140 fprintf(stderr, " TSC"); 140 fprintf(stderr, " TSC");
141 if (do_nhm_cstates) 141 if (do_nhm_cstates)
142 fprintf(stderr, " %%c1 "); 142 fprintf(stderr, " %%c1");
143 if (do_nhm_cstates) 143 if (do_nhm_cstates)
144 fprintf(stderr, " %%c3 "); 144 fprintf(stderr, " %%c3");
145 if (do_nhm_cstates) 145 if (do_nhm_cstates)
146 fprintf(stderr, " %%c6 "); 146 fprintf(stderr, " %%c6");
147 if (do_snb_cstates) 147 if (do_snb_cstates)
148 fprintf(stderr, " %%c7 "); 148 fprintf(stderr, " %%c7");
149 if (do_snb_cstates) 149 if (do_snb_cstates)
150 fprintf(stderr, " %%pc2 "); 150 fprintf(stderr, " %%pc2");
151 if (do_nhm_cstates) 151 if (do_nhm_cstates)
152 fprintf(stderr, " %%pc3 "); 152 fprintf(stderr, " %%pc3");
153 if (do_nhm_cstates) 153 if (do_nhm_cstates)
154 fprintf(stderr, " %%pc6 "); 154 fprintf(stderr, " %%pc6");
155 if (do_snb_cstates) 155 if (do_snb_cstates)
156 fprintf(stderr, " %%pc7 "); 156 fprintf(stderr, " %%pc7");
157 if (extra_msr_offset) 157 if (extra_msr_offset)
158 fprintf(stderr, " MSR 0x%x ", extra_msr_offset); 158 fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
159 159
160 putc('\n', stderr); 160 putc('\n', stderr);
161} 161}
@@ -194,14 +194,14 @@ void print_cnt(struct counters *p)
194 /* topology columns, print blanks on 1st (average) line */ 194 /* topology columns, print blanks on 1st (average) line */
195 if (p == cnt_average) { 195 if (p == cnt_average) {
196 if (show_pkg) 196 if (show_pkg)
197 fprintf(stderr, " "); 197 fprintf(stderr, " ");
198 if (show_core) 198 if (show_core)
199 fprintf(stderr, " "); 199 fprintf(stderr, " ");
200 if (show_cpu) 200 if (show_cpu)
201 fprintf(stderr, " "); 201 fprintf(stderr, " ");
202 } else { 202 } else {
203 if (show_pkg) 203 if (show_pkg)
204 fprintf(stderr, "%4d", p->pkg); 204 fprintf(stderr, "%d", p->pkg);
205 if (show_core) 205 if (show_core)
206 fprintf(stderr, "%4d", p->core); 206 fprintf(stderr, "%4d", p->core);
207 if (show_cpu) 207 if (show_cpu)
@@ -241,22 +241,22 @@ void print_cnt(struct counters *p)
241 if (!skip_c1) 241 if (!skip_c1)
242 fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); 242 fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
243 else 243 else
244 fprintf(stderr, " ****"); 244 fprintf(stderr, " ****");
245 } 245 }
246 if (do_nhm_cstates) 246 if (do_nhm_cstates)
247 fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); 247 fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc);
248 if (do_nhm_cstates) 248 if (do_nhm_cstates)
249 fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); 249 fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc);
250 if (do_snb_cstates) 250 if (do_snb_cstates)
251 fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); 251 fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc);
252 if (do_snb_cstates) 252 if (do_snb_cstates)
253 fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); 253 fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc);
254 if (do_nhm_cstates) 254 if (do_nhm_cstates)
255 fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); 255 fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc);
256 if (do_nhm_cstates) 256 if (do_nhm_cstates)
257 fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); 257 fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc);
258 if (do_snb_cstates) 258 if (do_snb_cstates)
259 fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); 259 fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc);
260 if (extra_msr_offset) 260 if (extra_msr_offset)
261 fprintf(stderr, " 0x%016llx", p->extra_msr); 261 fprintf(stderr, " 0x%016llx", p->extra_msr);
262 putc('\n', stderr); 262 putc('\n', stderr);
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 2618ef2ba31..33c5c7ee148 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -137,7 +137,6 @@ void cmdline(int argc, char **argv)
137void validate_cpuid(void) 137void validate_cpuid(void)
138{ 138{
139 unsigned int eax, ebx, ecx, edx, max_level; 139 unsigned int eax, ebx, ecx, edx, max_level;
140 char brand[16];
141 unsigned int fms, family, model, stepping; 140 unsigned int fms, family, model, stepping;
142 141
143 eax = ebx = ecx = edx = 0; 142 eax = ebx = ecx = edx = 0;
@@ -160,8 +159,8 @@ void validate_cpuid(void)
160 model += ((fms >> 16) & 0xf) << 4; 159 model += ((fms >> 16) & 0xf) << 4;
161 160
162 if (verbose > 1) 161 if (verbose > 1)
163 printf("CPUID %s %d levels family:model:stepping " 162 printf("CPUID %d levels family:model:stepping "
164 "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, 163 "0x%x:%x:%x (%d:%d:%d)\n", max_level,
165 family, model, stepping, family, model, stepping); 164 family, model, stepping, family, model, stepping);
166 165
167 if (!(edx & (1 << 5))) { 166 if (!(edx & (1 << 5))) {