aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2014-11-02 16:36:05 -0500
committerOlof Johansson <olof@lixom.net>2014-11-02 16:37:07 -0500
commit4257412db57900e43716d0b7ddd4f4a51e6ed2f4 (patch)
tree759963245a484422e9ad2639cb223b53f844ff15
parentcc040ba269ae6972face1dc7376ab3eaab9f64c8 (diff)
parent4b91f7f3c8b20e073b7bfc098625b37f99789508 (diff)
Merge tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes
Merge "omap fixes against v3.18-rc2" from Tony Lindgren: Few fixes for omaps to enable NAND BCH so devices won't produce errors when booted with omap2plus_defconfig, and reduce bloat by making IPV6 a loadable module. Also let's add a warning about legacy boot being deprecated for omap3. We now have things working with device tree, and only omap3 is still booting in legacy mode. So hopefully this warning will help move the remaining legacy mode users to boot with device tree. As the total reduction of code and static data is somewhere around 20000 lines of code once we remove omap3 legacy mode booting, we really do want to make omap3 to boot also in device tree mode only over the next few merge cycles. * tag 'fixes-against-v3.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap: (407 commits) ARM: OMAP2+: Warn about deprecated legacy booting mode ARM: omap2plus_defconfig: Fix errors with NAND BCH ARM: omap2plus_defconfig: Fix bloat caused by having ipv6 built-in + Linux 3.18-rc2 Signed-off-by: Olof Johansson <olof@lixom.net>
-rw-r--r--Documentation/arm64/memory.txt2
-rw-r--r--Documentation/devicetree/bindings/mailbox/mailbox.txt38
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt19
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-rockchip.txt4
-rw-r--r--Documentation/devicetree/bindings/thermal/imx-thermal.txt5
-rw-r--r--Documentation/devicetree/bindings/watchdog/cadence-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt3
-rw-r--r--Documentation/devicetree/bindings/watchdog/meson6-wdt.txt13
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/samsung-wdt.txt1
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/overlayfs.txt198
-rw-r--r--Documentation/filesystems/vfs.txt7
-rw-r--r--Documentation/kernel-parameters.txt14
-rw-r--r--Documentation/mailbox.txt122
-rw-r--r--Documentation/power/pm_qos_interface.txt4
-rw-r--r--Documentation/scsi/osd.txt3
-rw-r--r--Documentation/target/tcmu-design.txt378
-rw-r--r--MAINTAINERS17
-rw-r--r--Makefile2
-rw-r--r--arch/arc/Kconfig6
-rw-r--r--arch/arc/Makefile17
-rw-r--r--arch/arc/boot/dts/angel4.dts5
-rw-r--r--arch/arc/boot/dts/nsimosci.dts7
-rw-r--r--arch/arc/configs/fpga_defconfig1
-rw-r--r--arch/arc/configs/fpga_noramfs_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_defconfig1
-rw-r--r--arch/arc/include/asm/arcregs.h89
-rw-r--r--arch/arc/include/asm/atomic.h4
-rw-r--r--arch/arc/include/asm/bitops.h4
-rw-r--r--arch/arc/include/asm/bug.h7
-rw-r--r--arch/arc/include/asm/cache.h2
-rw-r--r--arch/arc/include/asm/current.h4
-rw-r--r--arch/arc/include/asm/irqflags.h4
-rw-r--r--arch/arc/include/asm/kgdb.h32
-rw-r--r--arch/arc/include/asm/processor.h13
-rw-r--r--arch/arc/include/asm/setup.h1
-rw-r--r--arch/arc/include/asm/smp.h10
-rw-r--r--arch/arc/include/asm/string.h3
-rw-r--r--arch/arc/include/asm/syscalls.h4
-rw-r--r--arch/arc/include/asm/thread_info.h4
-rw-r--r--arch/arc/include/asm/unaligned.h2
-rw-r--r--arch/arc/kernel/Makefile2
-rw-r--r--arch/arc/kernel/disasm.c4
-rw-r--r--arch/arc/kernel/head.S10
-rw-r--r--arch/arc/kernel/kgdb.c5
-rw-r--r--arch/arc/kernel/perf_event.c22
-rw-r--r--arch/arc/kernel/setup.c272
-rw-r--r--arch/arc/kernel/smp.c2
-rw-r--r--arch/arc/mm/cache_arc700.c14
-rw-r--r--arch/arc/mm/tlb.c8
-rw-r--r--arch/arc/plat-arcfpga/Kconfig13
-rw-r--r--arch/arc/plat-arcfpga/include/plat/irq.h27
-rw-r--r--arch/arc/plat-arcfpga/include/plat/memmap.h29
-rw-r--r--arch/arc/plat-arcfpga/platform.c61
-rw-r--r--arch/arc/plat-arcfpga/smp.c3
-rw-r--r--arch/arc/plat-tb10x/Kconfig1
-rw-r--r--arch/arc/plat-tb10x/tb10x.c13
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig4
-rw-r--r--arch/arm/mach-highbank/highbank.c2
-rw-r--r--arch/arm/mach-omap2/omap_device.c4
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi35
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/include/asm/compat.h4
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/irq_work.h11
-rw-r--r--arch/arm64/kernel/efi.c44
-rw-r--r--arch/arm64/kernel/process.c5
-rw-r--r--arch/arm64/mm/ioremap.c4
-rw-r--r--arch/arm64/mm/mmu.c12
-rw-r--r--arch/arm64/mm/pgd.c18
-rw-r--r--arch/arm64/net/bpf_jit.h8
-rw-r--r--arch/arm64/net/bpf_jit_comp.c84
-rw-r--r--arch/ia64/kernel/efi.c6
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/ath79/mach-db120.c2
-rw-r--r--arch/mips/cavium-octeon/setup.c9
-rw-r--r--arch/mips/include/asm/cop2.h8
-rw-r--r--arch/mips/include/asm/ftrace.h4
-rw-r--r--arch/mips/include/asm/idle.h7
-rw-r--r--arch/mips/include/uapi/asm/ptrace.h2
-rw-r--r--arch/mips/kernel/idle.c3
-rw-r--r--arch/mips/lasat/Kconfig2
-rw-r--r--arch/mips/loongson/lemote-2f/clock.c5
-rw-r--r--arch/mips/math-emu/cp1emu.c4
-rw-r--r--arch/mips/mm/tlbex.c6
-rw-r--r--arch/mips/mti-malta/Makefile3
-rw-r--r--arch/mips/mti-sead3/Makefile1
-rw-r--r--arch/mips/mti-sead3/sead3-i2c.c8
-rw-r--r--arch/mips/mti-sead3/sead3-pic32-bus.c102
-rw-r--r--arch/mips/mti-sead3/sead3-pic32-i2c-drv.c423
-rw-r--r--arch/mips/pci/pci-lantiq.c7
-rw-r--r--arch/mips/pmcs-msp71xx/msp_irq.c1
-rw-r--r--arch/mips/pmcs-msp71xx/msp_irq_cic.c4
-rw-r--r--arch/mips/sibyte/Makefile1
-rw-r--r--arch/powerpc/configs/pseries_le_defconfig7
-rw-r--r--arch/powerpc/include/asm/eeh.h3
-rw-r--r--arch/powerpc/include/asm/perf_event.h2
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/syscall.h2
-rw-r--r--arch/powerpc/kernel/dma.c8
-rw-r--r--arch/powerpc/kernel/eeh.c19
-rw-r--r--arch/powerpc/kernel/eeh_driver.c12
-rw-r--r--arch/powerpc/kernel/eeh_pe.c10
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S5
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/misc.S4
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c2
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/rtas_pci.c30
-rw-r--r--arch/powerpc/kernel/setup_64.c32
-rw-r--r--arch/powerpc/kernel/stacktrace.c2
-rw-r--r--arch/powerpc/mm/numa.c41
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c57
-rw-r--r--arch/powerpc/platforms/powernv/opal.c21
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c22
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c11
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h3
-rw-r--r--arch/powerpc/sysdev/msi_bitmap.c66
-rw-r--r--arch/s390/include/uapi/asm/unistd.h3
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/syscalls.S1
-rw-r--r--arch/s390/kernel/uprobes.c2
-rw-r--r--arch/s390/lib/probes.c2
-rw-r--r--arch/s390/mm/pgtable.c6
-rw-r--r--arch/sparc/include/asm/oplib_64.h3
-rw-r--r--arch/sparc/include/asm/setup.h2
-rw-r--r--arch/sparc/kernel/entry.h3
-rw-r--r--arch/sparc/kernel/head_64.S40
-rw-r--r--arch/sparc/kernel/hvtramp.S1
-rw-r--r--arch/sparc/kernel/setup_64.c28
-rw-r--r--arch/sparc/kernel/trampoline_64.S12
-rw-r--r--arch/sparc/mm/gup.c30
-rw-r--r--arch/sparc/prom/cif.S5
-rw-r--r--arch/sparc/prom/init_64.c6
-rw-r--r--arch/sparc/prom/p1275.c2
-rw-r--r--arch/x86/boot/compressed/eboot.c32
-rw-r--r--arch/x86/include/asm/efi.h31
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/uapi/asm/vmx.h2
-rw-r--r--arch/x86/kvm/emulate.c250
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c38
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c36
-rw-r--r--arch/x86/platform/efi/efi.c52
-rw-r--r--arch/x86/platform/efi/efi_32.c12
-rw-r--r--arch/x86/platform/efi/efi_64.c6
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S4
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_weak_decls.h7
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/mmu.c5
-rw-r--r--arch/x86/xen/p2m.c83
-rw-r--r--arch/x86/xen/setup.c1
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--crypto/cts.c3
-rw-r--r--crypto/sha1_generic.c2
-rw-r--r--crypto/sha256_generic.c5
-rw-r--r--crypto/sha512_generic.c2
-rw-r--r--crypto/tgr192.c4
-rw-r--r--crypto/vmac.c2
-rw-r--r--crypto/wp512.c8
-rw-r--r--drivers/acpi/Kconfig2
-rw-r--r--drivers/acpi/Makefile1
-rw-r--r--drivers/acpi/acpi_platform.c3
-rw-r--r--drivers/acpi/acpica/achware.h2
-rw-r--r--drivers/acpi/acpica/aclocal.h4
-rw-r--r--drivers/acpi/acpica/actables.h2
-rw-r--r--drivers/acpi/acpica/amlresrc.h34
-rw-r--r--drivers/acpi/acpica/evgpe.c23
-rw-r--r--drivers/acpi/acpica/evgpeinit.c1
-rw-r--r--drivers/acpi/acpica/evxface.c27
-rw-r--r--drivers/acpi/acpica/evxfevnt.c40
-rw-r--r--drivers/acpi/acpica/evxfgpe.c12
-rw-r--r--drivers/acpi/acpica/hwgpe.c9
-rw-r--r--drivers/acpi/acpica/tbxfroot.c33
-rw-r--r--drivers/acpi/device_pm.c3
-rw-r--r--drivers/acpi/ec.c107
-rw-r--r--drivers/acpi/fan.c338
-rw-r--r--drivers/acpi/int340x_thermal.c51
-rw-r--r--drivers/acpi/internal.h10
-rw-r--r--drivers/acpi/scan.c3
-rw-r--r--drivers/acpi/sysfs.c4
-rw-r--r--drivers/acpi/thermal.c18
-rw-r--r--drivers/acpi/utils.c28
-rw-r--r--drivers/char/random.c8
-rw-r--r--drivers/cpufreq/cpufreq-dt.c21
-rw-r--r--drivers/cpufreq/cpufreq.c38
-rw-r--r--drivers/cpufreq/highbank-cpufreq.c2
-rw-r--r--drivers/cpufreq/intel_pstate.c110
-rw-r--r--drivers/cpuidle/Kconfig.mips2
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c27
-rw-r--r--drivers/firmware/efi/efi.c79
-rw-r--r--drivers/firmware/efi/libstub/arm-stub.c4
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c62
-rw-r--r--drivers/firmware/efi/runtime-wrappers.c164
-rw-r--r--drivers/firmware/efi/vars.c61
-rw-r--r--drivers/gpu/drm/cirrus/cirrus_drv.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c19
-rw-r--r--drivers/gpu/drm/i915/intel_display.c36
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h3
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c5
-rw-r--r--drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_chan.c12
-rw-r--r--drivers/gpu/drm/qxl/qxl_display.c16
-rw-r--r--drivers/gpu/drm/radeon/btc_dpm.c18
-rw-r--r--drivers/gpu/drm/radeon/btc_dpm.h2
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c21
-rw-r--r--drivers/gpu/drm/radeon/cypress_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/dce3_1_afmt.c6
-rw-r--r--drivers/gpu/drm/radeon/dce6_afmt.c8
-rw-r--r--drivers/gpu/drm/radeon/evergreen_hdmi.c8
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/r600_dma.c21
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c2
-rw-r--r--drivers/gpu/drm/radeon/rs780_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/rv6xx_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/rv770_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c25
-rw-r--r--drivers/gpu/drm/radeon/sumo_dpm.c1
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.c1
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c28
-rw-r--r--drivers/hwmon/menf21bmc_hwmon.c1
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c6
-rw-r--r--drivers/leds/led-class.c23
-rw-r--r--drivers/leds/led-core.c19
-rw-r--r--drivers/leds/leds-gpio-register.c5
-rw-r--r--drivers/leds/leds-gpio.c14
-rw-r--r--drivers/leds/leds-lp3944.c3
-rw-r--r--drivers/leds/trigger/ledtrig-gpio.c2
-rw-r--r--drivers/mailbox/Makefile4
-rw-r--r--drivers/mailbox/mailbox.c465
-rw-r--r--drivers/mailbox/pl320-ipc.c2
-rw-r--r--drivers/pci/pcie/pme.c6
-rw-r--r--drivers/pwm/Kconfig22
-rw-r--r--drivers/pwm/Makefile2
-rw-r--r--drivers/pwm/core.c31
-rw-r--r--drivers/pwm/pwm-atmel.c24
-rw-r--r--drivers/pwm/pwm-fsl-ftm.c90
-rw-r--r--drivers/pwm/pwm-imx.c71
-rw-r--r--drivers/pwm/pwm-lpss-pci.c64
-rw-r--r--drivers/pwm/pwm-lpss-platform.c68
-rw-r--r--drivers/pwm/pwm-lpss.c137
-rw-r--r--drivers/pwm/pwm-lpss.h32
-rw-r--r--drivers/pwm/pwm-rockchip.c57
-rw-r--r--drivers/rtc/Kconfig2
-rw-r--r--drivers/rtc/rtc-efi.c1
-rw-r--r--drivers/s390/char/Kconfig2
-rw-r--r--drivers/scsi/osd/Kbuild2
-rw-r--r--drivers/scsi/osd/Kconfig2
-rw-r--r--drivers/scsi/osd/osd_debug.h2
-rw-r--r--drivers/scsi/osd/osd_initiator.c4
-rw-r--r--drivers/scsi/osd/osd_uld.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c35
-rw-r--r--drivers/scsi/qla2xxx/qla_target.h12
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.c11
-rw-r--r--drivers/target/Kconfig7
-rw-r--r--drivers/target/Makefile1
-rw-r--r--drivers/target/iscsi/iscsi_target.c6
-rw-r--r--drivers/target/iscsi/iscsi_target_configfs.c10
-rw-r--r--drivers/target/iscsi/iscsi_target_erl0.c6
-rw-r--r--drivers/target/iscsi/iscsi_target_login.c8
-rw-r--r--drivers/target/iscsi/iscsi_target_util.c5
-rw-r--r--drivers/target/loopback/tcm_loop.c29
-rw-r--r--drivers/target/target_core_alua.c33
-rw-r--r--drivers/target/target_core_configfs.c26
-rw-r--r--drivers/target/target_core_device.c48
-rw-r--r--drivers/target/target_core_fabric_configfs.c13
-rw-r--r--drivers/target/target_core_fabric_lib.c6
-rw-r--r--drivers/target/target_core_file.c13
-rw-r--r--drivers/target/target_core_internal.h6
-rw-r--r--drivers/target/target_core_pr.c107
-rw-r--r--drivers/target/target_core_pr.h2
-rw-r--r--drivers/target/target_core_pscsi.c16
-rw-r--r--drivers/target/target_core_sbc.c2
-rw-r--r--drivers/target/target_core_tmr.c24
-rw-r--r--drivers/target/target_core_tpg.c53
-rw-r--r--drivers/target/target_core_transport.c27
-rw-r--r--drivers/target/target_core_ua.c15
-rw-r--r--drivers/target/target_core_ua.h1
-rw-r--r--drivers/target/target_core_user.c1167
-rw-r--r--drivers/target/tcm_fc/tfc_sess.c2
-rw-r--r--drivers/thermal/Kconfig49
-rw-r--r--drivers/thermal/Makefile3
-rw-r--r--drivers/thermal/fair_share.c12
-rw-r--r--drivers/thermal/gov_bang_bang.c131
-rw-r--r--drivers/thermal/imx_thermal.c91
-rw-r--r--drivers/thermal/int3403_thermal.c296
-rw-r--r--drivers/thermal/int340x_thermal/Makefile4
-rw-r--r--drivers/thermal/int340x_thermal/acpi_thermal_rel.c400
-rw-r--r--drivers/thermal/int340x_thermal/acpi_thermal_rel.h84
-rw-r--r--drivers/thermal/int340x_thermal/int3400_thermal.c271
-rw-r--r--drivers/thermal/int340x_thermal/int3402_thermal.c242
-rw-r--r--drivers/thermal/int340x_thermal/int3403_thermal.c477
-rw-r--r--drivers/thermal/of-thermal.c12
-rw-r--r--drivers/thermal/step_wise.c7
-rw-r--r--drivers/thermal/thermal_core.c12
-rw-r--r--drivers/thermal/thermal_core.h8
-rw-r--r--drivers/uio/uio.c12
-rw-r--r--drivers/watchdog/Kconfig54
-rw-r--r--drivers/watchdog/Makefile5
-rw-r--r--drivers/watchdog/booke_wdt.c28
-rw-r--r--drivers/watchdog/cadence_wdt.c516
-rw-r--r--drivers/watchdog/da9063_wdt.c191
-rw-r--r--drivers/watchdog/dw_wdt.c36
-rw-r--r--drivers/watchdog/imx2_wdt.c43
-rw-r--r--drivers/watchdog/meson_wdt.c236
-rw-r--r--drivers/watchdog/of_xilinx_wdt.c1
-rw-r--r--drivers/watchdog/qcom-wdt.c224
-rw-r--r--drivers/watchdog/rn5t618_wdt.c198
-rw-r--r--drivers/watchdog/s3c2410_wdt.c47
-rw-r--r--drivers/watchdog/stmp3xxx_rtc_wdt.c24
-rw-r--r--drivers/watchdog/sunxi_wdt.c111
-rw-r--r--drivers/watchdog/ts72xx_wdt.c6
-rw-r--r--drivers/xen/balloon.c3
-rw-r--r--drivers/xen/pci.c27
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile1
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/buffer.c48
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/main.c7
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/common.h2
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/file.c2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/namei.c2
-rw-r--r--fs/exofs/ore.c4
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/exofs/ore_raid.h2
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/exofs/symlink.c2
-rw-r--r--fs/exofs/sys.c2
-rw-r--r--fs/ext4/balloc.c15
-rw-r--r--fs/ext4/bitmap.c12
-rw-r--r--fs/ext4/dir.c8
-rw-r--r--fs/ext4/ext4.h50
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h6
-rw-r--r--fs/ext4/extents.c619
-rw-r--r--fs/ext4/extents_status.c200
-rw-r--r--fs/ext4/extents_status.h13
-rw-r--r--fs/ext4/ialloc.c3
-rw-r--r--fs/ext4/indirect.c86
-rw-r--r--fs/ext4/inline.c7
-rw-r--r--fs/ext4/inode.c133
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/mballoc.c15
-rw-r--r--fs/ext4/migrate.c11
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/move_extent.c1068
-rw-r--r--fs/ext4/namei.c335
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c245
-rw-r--r--fs/ext4/xattr.c44
-rw-r--r--fs/internal.h7
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/checkpoint.c334
-rw-r--r--fs/jbd2/journal.c18
-rw-r--r--fs/jbd2/recovery.c1
-rw-r--r--fs/namei.c41
-rw-r--r--fs/namespace.c27
-rw-r--r--fs/nfs/objlayout/objio_osd.c2
-rw-r--r--fs/nfs/objlayout/objlayout.c2
-rw-r--r--fs/nfs/objlayout/objlayout.h2
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c2
-rw-r--r--fs/open.c23
-rw-r--r--fs/overlayfs/Kconfig10
-rw-r--r--fs/overlayfs/Makefile7
-rw-r--r--fs/overlayfs/copy_up.c414
-rw-r--r--fs/overlayfs/dir.c921
-rw-r--r--fs/overlayfs/inode.c425
-rw-r--r--fs/overlayfs/overlayfs.h191
-rw-r--r--fs/overlayfs/readdir.c590
-rw-r--r--fs/overlayfs/super.c796
-rw-r--r--fs/splice.c1
-rw-r--r--include/acpi/acnames.h1
-rw-r--r--include/acpi/acpi_bus.h1
-rw-r--r--include/acpi/acpixf.h2
-rw-r--r--include/acpi/actypes.h4
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/linux/audit.h2
-rw-r--r--include/linux/buffer_head.h47
-rw-r--r--include/linux/clocksource.h2
-rw-r--r--include/linux/cpufreq-dt.h22
-rw-r--r--include/linux/cpufreq.h2
-rw-r--r--include/linux/crash_dump.h15
-rw-r--r--include/linux/efi.h17
-rw-r--r--include/linux/fs.h39
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/kgdb.h2
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/leds.h16
-rw-r--r--include/linux/mailbox_client.h46
-rw-r--r--include/linux/mailbox_controller.h133
-rw-r--r--include/linux/memory.h2
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/oom.h3
-rw-r--r--include/linux/pl320-ipc.h (renamed from include/linux/mailbox.h)0
-rw-r--r--include/linux/pm_qos.h5
-rw-r--r--include/linux/pnfs_osd_xdr.h2
-rw-r--r--include/linux/string.h5
-rw-r--r--include/linux/thermal.h4
-rw-r--r--include/linux/uio_driver.h12
-rw-r--r--include/linux/uprobes.h14
-rw-r--r--include/linux/watchdog.h9
-rw-r--r--include/scsi/osd_initiator.h2
-rw-r--r--include/scsi/osd_ore.h2
-rw-r--r--include/scsi/osd_protocol.h4
-rw-r--r--include/scsi/osd_sec.h2
-rw-r--r--include/scsi/osd_sense.h2
-rw-r--r--include/scsi/osd_types.h2
-rw-r--r--include/target/target_core_base.h17
-rw-r--r--include/trace/events/ext4.h59
-rw-r--r--include/trace/events/thermal.h83
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/target_core_user.h142
-rw-r--r--kernel/freezer.c9
-rw-r--r--kernel/power/process.c57
-rw-r--r--kernel/power/qos.c27
-rw-r--r--lib/cmdline.c29
-rw-r--r--lib/string.c16
-rw-r--r--mm/oom_kill.c17
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/shmem.c36
-rw-r--r--mm/truncate.c57
-rw-r--r--sound/core/pcm_native.c14
-rw-r--r--sound/pci/hda/hda_local.h4
-rw-r--r--sound/pci/hda/patch_hdmi.c15
-rw-r--r--sound/pci/hda/patch_realtek.c7
-rw-r--r--sound/usb/quirks-table.h30
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixxf.c8
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c2
-rw-r--r--virt/kvm/iommu.c8
-rw-r--r--virt/kvm/kvm_main.c7
-rw-r--r--virt/kvm/vfio.c5
-rw-r--r--virt/kvm/vfio.h4
453 files changed, 14972 insertions, 4850 deletions
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 344e85cc7323..d7273a5f6456 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -17,7 +17,7 @@ User addresses have bits 63:48 set to 0 while the kernel addresses have
17the same bits set to 1. TTBRx selection is given by bit 63 of the 17the same bits set to 1. TTBRx selection is given by bit 63 of the
18virtual address. The swapper_pg_dir contains only kernel (global) 18virtual address. The swapper_pg_dir contains only kernel (global)
19mappings while the user pgd contains only user (non-global) mappings. 19mappings while the user pgd contains only user (non-global) mappings.
20The swapper_pgd_dir address is written to TTBR1 and never written to 20The swapper_pg_dir address is written to TTBR1 and never written to
21TTBR0. 21TTBR0.
22 22
23 23
diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 000000000000..1a2cd3d266db
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
1* Generic Mailbox Controller and client driver bindings
2
3Generic binding to provide a way for Mailbox controller drivers to
4assign appropriate mailbox channel to client drivers.
5
6* Mailbox Controller
7
8Required property:
9- #mbox-cells: Must be at least 1. Number of cells in a mailbox
10 specifier.
11
12Example:
13 mailbox: mailbox {
14 ...
15 #mbox-cells = <1>;
16 };
17
18
19* Mailbox Client
20
21Required property:
22- mboxes: List of phandle and mailbox channel specifiers.
23
24Optional property:
25- mbox-names: List of identifier strings for each mailbox channel
26 required by the client. The use of this property
27 is discouraged in favor of using index in list of
28 'mboxes' while requesting a mailbox. Instead the
29 platforms may define channel indices, in DT headers,
30 to something legible.
31
32Example:
33 pwr_cntrl: power {
34 ...
35 mbox-names = "pwr-ctrl", "rpc";
36 mboxes = <&mailbox 0
37 &mailbox 1>;
38 };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
index 0bda229a6171..3899d6a557c1 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
@@ -1,5 +1,20 @@
1Freescale FlexTimer Module (FTM) PWM controller 1Freescale FlexTimer Module (FTM) PWM controller
2 2
3The same FTM PWM device can have a different endianness on different SoCs. The
4device tree provides a property to describing this so that an operating system
5device driver can handle all variants of the device. Refer to the table below
6for the endianness of the FTM PWM block as integrated into the existing SoCs:
7
8 SoC | FTM-PWM endianness
9 --------+-------------------
10 Vybrid | LE
11 LS1 | BE
12 LS2 | LE
13
14Please see ../regmap/regmap.txt for more detail about how to specify endian
15modes in device tree.
16
17
3Required properties: 18Required properties:
4- compatible: Should be "fsl,vf610-ftm-pwm". 19- compatible: Should be "fsl,vf610-ftm-pwm".
5- reg: Physical base address and length of the controller's registers 20- reg: Physical base address and length of the controller's registers
@@ -16,7 +31,8 @@ Required properties:
16- pinctrl-names: Must contain a "default" entry. 31- pinctrl-names: Must contain a "default" entry.
17- pinctrl-NNN: One property must exist for each entry in pinctrl-names. 32- pinctrl-NNN: One property must exist for each entry in pinctrl-names.
18 See pinctrl/pinctrl-bindings.txt for details of the property values. 33 See pinctrl/pinctrl-bindings.txt for details of the property values.
19 34- big-endian: Boolean property, required if the FTM PWM registers use a big-
35 endian rather than little-endian layout.
20 36
21Example: 37Example:
22 38
@@ -32,4 +48,5 @@ pwm0: pwm@40038000 {
32 <&clks VF610_CLK_FTM0_EXT_FIX_EN>; 48 <&clks VF610_CLK_FTM0_EXT_FIX_EN>;
33 pinctrl-names = "default"; 49 pinctrl-names = "default";
34 pinctrl-0 = <&pinctrl_pwm0_1>; 50 pinctrl-0 = <&pinctrl_pwm0_1>;
51 big-endian;
35}; 52};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
index d47d15a6a298..b8be3d09ee26 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
@@ -7,8 +7,8 @@ Required properties:
7 "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC 7 "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC
8 - reg: physical base address and length of the controller's registers 8 - reg: physical base address and length of the controller's registers
9 - clocks: phandle and clock specifier of the PWM reference clock 9 - clocks: phandle and clock specifier of the PWM reference clock
10 - #pwm-cells: should be 2. See pwm.txt in this directory for a 10 - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory
11 description of the cell format. 11 for a description of the cell format.
12 12
13Example: 13Example:
14 14
diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
index 1f0f67234a91..3c67bd50aa10 100644
--- a/Documentation/devicetree/bindings/thermal/imx-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
@@ -1,7 +1,10 @@
1* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs 1* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs
2 2
3Required properties: 3Required properties:
4- compatible : "fsl,imx6q-thermal" 4- compatible : "fsl,imx6q-tempmon" for i.MX6Q, "fsl,imx6sx-tempmon" for i.MX6SX.
5 i.MX6SX has two more IRQs than i.MX6Q, one is IRQ_LOW and the other is IRQ_PANIC,
6 when temperature is below than low threshold, IRQ_LOW will be triggered, when temperature
7 is higher than panic threshold, system will auto reboot by SRC module.
5- fsl,tempmon : phandle pointer to system controller that contains TEMPMON 8- fsl,tempmon : phandle pointer to system controller that contains TEMPMON
6 control registers, e.g. ANATOP on imx6q. 9 control registers, e.g. ANATOP on imx6q.
7- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON 10- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON
diff --git a/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt
new file mode 100644
index 000000000000..c3a36ee45552
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt
@@ -0,0 +1,24 @@
1Zynq Watchdog Device Tree Bindings
2-------------------------------------------
3
4Required properties:
5- compatible : Should be "cdns,wdt-r1p2".
6- clocks : This is pclk (APB clock).
7- interrupts : This is wd_irq - watchdog timeout interrupt.
8- interrupt-parent : Must be core interrupt controller.
9
10Optional properties
11- reset-on-timeout : If this property exists, then a reset is done
12 when watchdog times out.
13- timeout-sec : Watchdog timeout value (in seconds).
14
15Example:
16 watchdog@f8005000 {
17 compatible = "cdns,wdt-r1p2";
18 clocks = <&clkc 45>;
19 interrupt-parent = <&intc>;
20 interrupts = <0 9 1>;
21 reg = <0xf8005000 0x1000>;
22 reset-on-timeout;
23 timeout-sec = <10>;
24 };
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
index e52ba2da868c..8dab6fd024aa 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
@@ -7,7 +7,8 @@ Required properties:
7 7
8Optional property: 8Optional property:
9- big-endian: If present the watchdog device's registers are implemented 9- big-endian: If present the watchdog device's registers are implemented
10 in big endian mode, otherwise in little mode. 10 in big endian mode, otherwise in native mode(same with CPU), for more
11 detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
11 12
12Examples: 13Examples:
13 14
diff --git a/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt
new file mode 100644
index 000000000000..9200fc2d508c
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt
@@ -0,0 +1,13 @@
1Meson SoCs Watchdog timer
2
3Required properties:
4
5- compatible : should be "amlogic,meson6-wdt"
6- reg : Specifies base physical address and size of the registers.
7
8Example:
9
10wdt: watchdog@c1109900 {
11 compatible = "amlogic,meson6-wdt";
12 reg = <0xc1109900 0x8>;
13};
diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt
new file mode 100644
index 000000000000..4726924d034e
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt
@@ -0,0 +1,24 @@
1Qualcomm Krait Processor Sub-system (KPSS) Watchdog
2---------------------------------------------------
3
4Required properties :
5- compatible : shall contain only one of the following:
6
7 "qcom,kpss-wdt-msm8960"
8 "qcom,kpss-wdt-apq8064"
9 "qcom,kpss-wdt-ipq8064"
10
11- reg : shall contain base register location and length
12- clocks : shall contain the input clock
13
14Optional properties :
15- timeout-sec : shall contain the default watchdog timeout in seconds,
16 if unset, the default timeout is 30 seconds
17
18Example:
19 watchdog@208a038 {
20 compatible = "qcom,kpss-wdt-ipq8064";
21 reg = <0x0208a038 0x40>;
22 clocks = <&sleep_clk>;
23 timeout-sec = <10>;
24 };
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
index cfff37511aac..8f3d96af81d7 100644
--- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
@@ -9,6 +9,7 @@ Required properties:
9 (a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs 9 (a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs
10 (b) "samsung,exynos5250-wdt" for Exynos5250 10 (b) "samsung,exynos5250-wdt" for Exynos5250
11 (c) "samsung,exynos5420-wdt" for Exynos5420 11 (c) "samsung,exynos5420-wdt" for Exynos5420
12 (c) "samsung,exynos7-wdt" for Exynos7
12 13
13- reg : base physical address of the controller and length of memory mapped 14- reg : base physical address of the controller and length of memory mapped
14 region. 15 region.
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 94d93b1f8b53..b30753cbf431 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -67,6 +67,7 @@ prototypes:
67 struct file *, unsigned open_flag, 67 struct file *, unsigned open_flag,
68 umode_t create_mode, int *opened); 68 umode_t create_mode, int *opened);
69 int (*tmpfile) (struct inode *, struct dentry *, umode_t); 69 int (*tmpfile) (struct inode *, struct dentry *, umode_t);
70 int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
70 71
71locking rules: 72locking rules:
72 all may block 73 all may block
@@ -96,6 +97,7 @@ fiemap: no
96update_time: no 97update_time: no
97atomic_open: yes 98atomic_open: yes
98tmpfile: no 99tmpfile: no
100dentry_open: no
99 101
100 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 102 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
101victim. 103victim.
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
new file mode 100644
index 000000000000..530850a72735
--- /dev/null
+++ b/Documentation/filesystems/overlayfs.txt
@@ -0,0 +1,198 @@
1Written by: Neil Brown <neilb@suse.de>
2
3Overlay Filesystem
4==================
5
6This document describes a prototype for a new approach to providing
7overlay-filesystem functionality in Linux (sometimes referred to as
8union-filesystems). An overlay-filesystem tries to present a
9filesystem which is the result over overlaying one filesystem on top
10of the other.
11
12The result will inevitably fail to look exactly like a normal
13filesystem for various technical reasons. The expectation is that
14many use cases will be able to ignore these differences.
15
16This approach is 'hybrid' because the objects that appear in the
17filesystem do not all appear to belong to that filesystem. In many
18cases an object accessed in the union will be indistinguishable
19from accessing the corresponding object from the original filesystem.
20This is most obvious from the 'st_dev' field returned by stat(2).
21
22While directories will report an st_dev from the overlay-filesystem,
23all non-directory objects will report an st_dev from the lower or
24upper filesystem that is providing the object. Similarly st_ino will
25only be unique when combined with st_dev, and both of these can change
26over the lifetime of a non-directory object. Many applications and
27tools ignore these values and will not be affected.
28
29Upper and Lower
30---------------
31
32An overlay filesystem combines two filesystems - an 'upper' filesystem
33and a 'lower' filesystem. When a name exists in both filesystems, the
34object in the 'upper' filesystem is visible while the object in the
35'lower' filesystem is either hidden or, in the case of directories,
36merged with the 'upper' object.
37
38It would be more correct to refer to an upper and lower 'directory
39tree' rather than 'filesystem' as it is quite possible for both
40directory trees to be in the same filesystem and there is no
41requirement that the root of a filesystem be given for either upper or
42lower.
43
44The lower filesystem can be any filesystem supported by Linux and does
45not need to be writable. The lower filesystem can even be another
46overlayfs. The upper filesystem will normally be writable and if it
47is it must support the creation of trusted.* extended attributes, and
48must provide valid d_type in readdir responses, so NFS is not suitable.
49
50A read-only overlay of two read-only filesystems may use any
51filesystem type.
52
53Directories
54-----------
55
56Overlaying mainly involves directories. If a given name appears in both
57upper and lower filesystems and refers to a non-directory in either,
58then the lower object is hidden - the name refers only to the upper
59object.
60
61Where both upper and lower objects are directories, a merged directory
62is formed.
63
64At mount time, the two directories given as mount options "lowerdir" and
65"upperdir" are combined into a merged directory:
66
67 mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper,\
68workdir=/work /merged
69
70The "workdir" needs to be an empty directory on the same filesystem
71as upperdir.
72
73Then whenever a lookup is requested in such a merged directory, the
74lookup is performed in each actual directory and the combined result
75is cached in the dentry belonging to the overlay filesystem. If both
76actual lookups find directories, both are stored and a merged
77directory is created, otherwise only one is stored: the upper if it
78exists, else the lower.
79
80Only the lists of names from directories are merged. Other content
81such as metadata and extended attributes are reported for the upper
82directory only. These attributes of the lower directory are hidden.
83
84whiteouts and opaque directories
85--------------------------------
86
87In order to support rm and rmdir without changing the lower
88filesystem, an overlay filesystem needs to record in the upper filesystem
89that files have been removed. This is done using whiteouts and opaque
90directories (non-directories are always opaque).
91
92A whiteout is created as a character device with 0/0 device number.
93When a whiteout is found in the upper level of a merged directory, any
94matching name in the lower level is ignored, and the whiteout itself
95is also hidden.
96
97A directory is made opaque by setting the xattr "trusted.overlay.opaque"
98to "y". Where the upper filesystem contains an opaque directory, any
99directory in the lower filesystem with the same name is ignored.
100
101readdir
102-------
103
104When a 'readdir' request is made on a merged directory, the upper and
105lower directories are each read and the name lists merged in the
106obvious way (upper is read first, then lower - entries that already
107exist are not re-added). This merged name list is cached in the
108'struct file' and so remains as long as the file is kept open. If the
109directory is opened and read by two processes at the same time, they
110will each have separate caches. A seekdir to the start of the
111directory (offset 0) followed by a readdir will cause the cache to be
112discarded and rebuilt.
113
114This means that changes to the merged directory do not appear while a
115directory is being read. This is unlikely to be noticed by many
116programs.
117
118seek offsets are assigned sequentially when the directories are read.
119Thus if
120 - read part of a directory
121 - remember an offset, and close the directory
122 - re-open the directory some time later
123 - seek to the remembered offset
124
125there may be little correlation between the old and new locations in
126the list of filenames, particularly if anything has changed in the
127directory.
128
129Readdir on directories that are not merged is simply handled by the
130underlying directory (upper or lower).
131
132
133Non-directories
134---------------
135
136Objects that are not directories (files, symlinks, device-special
137files etc.) are presented either from the upper or lower filesystem as
138appropriate. When a file in the lower filesystem is accessed in a way
139the requires write-access, such as opening for write access, changing
140some metadata etc., the file is first copied from the lower filesystem
141to the upper filesystem (copy_up). Note that creating a hard-link
142also requires copy_up, though of course creation of a symlink does
143not.
144
145The copy_up may turn out to be unnecessary, for example if the file is
146opened for read-write but the data is not modified.
147
148The copy_up process first makes sure that the containing directory
149exists in the upper filesystem - creating it and any parents as
150necessary. It then creates the object with the same metadata (owner,
151mode, mtime, symlink-target etc.) and then if the object is a file, the
152data is copied from the lower to the upper filesystem. Finally any
153extended attributes are copied up.
154
155Once the copy_up is complete, the overlay filesystem simply
156provides direct access to the newly created file in the upper
157filesystem - future operations on the file are barely noticed by the
158overlay filesystem (though an operation on the name of the file such as
159rename or unlink will of course be noticed and handled).
160
161
162Non-standard behavior
163---------------------
164
165The copy_up operation essentially creates a new, identical file and
166moves it over to the old name. The new file may be on a different
167filesystem, so both st_dev and st_ino of the file may change.
168
169Any open files referring to this inode will access the old data and
170metadata. Similarly any file locks obtained before copy_up will not
171apply to the copied up file.
172
173On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
174fsetxattr(2) will fail with EROFS.
175
176If a file with multiple hard links is copied up, then this will
177"break" the link. Changes will not be propagated to other names
178referring to the same inode.
179
180Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
181object in overlayfs will not contain valid absolute paths, only
182relative paths leading up to the filesystem's root. This will be
183fixed in the future.
184
185Some operations are not atomic, for example a crash during copy_up or
186rename will leave the filesystem in an inconsistent state. This will
187be addressed in the future.
188
189Changes to underlying filesystems
190---------------------------------
191
192Offline changes, when the overlay is not mounted, are allowed to either
193the upper or the lower trees.
194
195Changes to the underlying filesystems while part of a mounted overlay
196filesystem are not allowed. If the underlying filesystem is changed,
197the behavior of the overlay is undefined, though it will not result in
198a crash or deadlock.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fceff7c00a3c..20bf204426ca 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,7 @@ struct inode_operations {
364 int (*atomic_open)(struct inode *, struct dentry *, struct file *, 364 int (*atomic_open)(struct inode *, struct dentry *, struct file *,
365 unsigned open_flag, umode_t create_mode, int *opened); 365 unsigned open_flag, umode_t create_mode, int *opened);
366 int (*tmpfile) (struct inode *, struct dentry *, umode_t); 366 int (*tmpfile) (struct inode *, struct dentry *, umode_t);
367 int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
367}; 368};
368 369
369Again, all methods are called without any locks being held, unless 370Again, all methods are called without any locks being held, unless
@@ -696,6 +697,12 @@ struct address_space_operations {
696 but instead uses bmap to find out where the blocks in the file 697 but instead uses bmap to find out where the blocks in the file
697 are and uses those addresses directly. 698 are and uses those addresses directly.
698 699
700 dentry_open: *WARNING: probably going away soon, do not use!* This is an
701 alternative to f_op->open(), the difference is that this method may open
702 a file not necessarily originating from the same filesystem as the one
703 i_op->open() was called on. It may be useful for stacking filesystems
704 which want to allow native I/O directly on underlying files.
705
699 706
700 invalidatepage: If a page has PagePrivate set, then invalidatepage 707 invalidatepage: If a page has PagePrivate set, then invalidatepage
701 will be called when part or all of the page is to be removed 708 will be called when part or all of the page is to be removed
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7dbe5ec9d9cd..74339c57b914 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1015,10 +1015,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1015 Format: {"off" | "on" | "skip[mbr]"} 1015 Format: {"off" | "on" | "skip[mbr]"}
1016 1016
1017 efi= [EFI] 1017 efi= [EFI]
1018 Format: { "old_map" } 1018 Format: { "old_map", "nochunk", "noruntime" }
1019 old_map [X86-64]: switch to the old ioremap-based EFI 1019 old_map [X86-64]: switch to the old ioremap-based EFI
1020 runtime services mapping. 32-bit still uses this one by 1020 runtime services mapping. 32-bit still uses this one by
1021 default. 1021 default.
1022 nochunk: disable reading files in "chunks" in the EFI
1023 boot stub, as chunking can cause problems with some
1024 firmware implementations.
1025 noruntime : disable EFI runtime services support
1022 1026
1023 efi_no_storage_paranoia [EFI; X86] 1027 efi_no_storage_paranoia [EFI; X86]
1024 Using this parameter you can use more than 50% of 1028 Using this parameter you can use more than 50% of
@@ -2232,7 +2236,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2232 2236
2233 nodsp [SH] Disable hardware DSP at boot time. 2237 nodsp [SH] Disable hardware DSP at boot time.
2234 2238
2235 noefi [X86] Disable EFI runtime services support. 2239 noefi Disable EFI runtime services support.
2236 2240
2237 noexec [IA-64] 2241 noexec [IA-64]
2238 2242
@@ -3465,6 +3469,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
3465 e.g. base its process migration decisions on it. 3469 e.g. base its process migration decisions on it.
3466 Default is on. 3470 Default is on.
3467 3471
3472 topology_updates= [KNL, PPC, NUMA]
3473 Format: {off}
3474 Specify if the kernel should ignore (off)
3475 topology updates sent by the hypervisor to this
3476 LPAR.
3477
3468 tp720= [HW,PS2] 3478 tp720= [HW,PS2]
3469 3479
3470 tpm_suspend_pcr=[HW,TPM] 3480 tpm_suspend_pcr=[HW,TPM]
diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 000000000000..60f43ff629aa
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
1 The Common Mailbox Framework
2 Jassi Brar <jaswinder.singh@linaro.org>
3
4 This document aims to help developers write client and controller
5drivers for the API. But before we start, let us note that the
6client (especially) and controller drivers are likely going to be
7very platform specific because the remote firmware is likely to be
8proprietary and implement non-standard protocol. So even if two
9platforms employ, say, PL320 controller, the client drivers can't
10be shared across them. Even the PL320 driver might need to accommodate
11some platform specific quirks. So the API is meant mainly to avoid
12similar copies of code written for each platform. Having said that,
13nothing prevents the remote f/w to also be Linux based and use the
14same api there. However none of that helps us locally because we only
15ever deal at client's protocol level.
16 Some of the choices made during implementation are the result of this
17peculiarity of this "common" framework.
18
19
20
21 Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
22
23 Allocate mbox_controller and the array of mbox_chan.
24Populate mbox_chan_ops, except peek_data() all are mandatory.
25The controller driver might know a message has been consumed
26by the remote by getting an IRQ or polling some hardware flag
27or it can never know (the client knows by way of the protocol).
28The method in order of preference is IRQ -> Poll -> None, which
29the controller driver should set via 'txdone_irq' or 'txdone_poll'
30or neither.
31
32
33 Part 2 - Client Driver (See include/linux/mailbox_client.h)
34
35 The client might want to operate in blocking mode (synchronously
36send a message through before returning) or non-blocking/async mode (submit
37a message and a callback function to the API and return immediately).
38
39
40struct demo_client {
41 struct mbox_client cl;
42 struct mbox_chan *mbox;
43 struct completion c;
44 bool async;
45 /* ... */
46};
47
48/*
49 * This is the handler for data received from remote. The behaviour is purely
50 * dependent upon the protocol. This is just an example.
51 */
52static void message_from_remote(struct mbox_client *cl, void *mssg)
53{
54 struct demo_client *dc = container_of(mbox_client,
55 struct demo_client, cl);
56 if (dc->aysnc) {
57 if (is_an_ack(mssg)) {
58 /* An ACK to our last sample sent */
59 return; /* Or do something else here */
60 } else { /* A new message from remote */
61 queue_req(mssg);
62 }
63 } else {
64 /* Remote f/w sends only ACK packets on this channel */
65 return;
66 }
67}
68
69static void sample_sent(struct mbox_client *cl, void *mssg, int r)
70{
71 struct demo_client *dc = container_of(mbox_client,
72 struct demo_client, cl);
73 complete(&dc->c);
74}
75
76static void client_demo(struct platform_device *pdev)
77{
78 struct demo_client *dc_sync, *dc_async;
79 /* The controller already knows async_pkt and sync_pkt */
80 struct async_pkt ap;
81 struct sync_pkt sp;
82
83 dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
84 dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
85
86 /* Populate non-blocking mode client */
87 dc_async->cl.dev = &pdev->dev;
88 dc_async->cl.rx_callback = message_from_remote;
89 dc_async->cl.tx_done = sample_sent;
90 dc_async->cl.tx_block = false;
91 dc_async->cl.tx_tout = 0; /* doesn't matter here */
92 dc_async->cl.knows_txdone = false; /* depending upon protocol */
93 dc_async->async = true;
94 init_completion(&dc_async->c);
95
96 /* Populate blocking mode client */
97 dc_sync->cl.dev = &pdev->dev;
98 dc_sync->cl.rx_callback = message_from_remote;
99 dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
100 dc_sync->cl.tx_block = true;
101 dc_sync->cl.tx_tout = 500; /* by half a second */
102 dc_sync->cl.knows_txdone = false; /* depending upon protocol */
103 dc_sync->async = false;
104
105 /* ASync mailbox is listed second in 'mboxes' property */
106 dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
107 /* Populate data packet */
108 /* ap.xxx = 123; etc */
109 /* Send async message to remote */
110 mbox_send_message(dc_async->mbox, &ap);
111
112 /* Sync mailbox is listed first in 'mboxes' property */
113 dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
114 /* Populate data packet */
115 /* sp.abc = 123; etc */
116 /* Send message to remote in blocking mode */
117 mbox_send_message(dc_sync->mbox, &sp);
118 /* At this point 'sp' has been sent */
119
120 /* Now wait for async chan to be done */
121 wait_for_completion(&dc_async->c);
122}
diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index a5da5c7e7128..129f7c0e1483 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -5,7 +5,8 @@ performance expectations by drivers, subsystems and user space applications on
5one of the parameters. 5one of the parameters.
6 6
7Two different PM QoS frameworks are available: 7Two different PM QoS frameworks are available:
81. PM QoS classes for cpu_dma_latency, network_latency, network_throughput. 81. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
9memory_bandwidth.
92. the per-device PM QoS framework provides the API to manage the per-device latency 102. the per-device PM QoS framework provides the API to manage the per-device latency
10constraints and PM QoS flags. 11constraints and PM QoS flags.
11 12
@@ -13,6 +14,7 @@ Each parameters have defined units:
13 * latency: usec 14 * latency: usec
14 * timeout: usec 15 * timeout: usec
15 * throughput: kbs (kilo bit / sec) 16 * throughput: kbs (kilo bit / sec)
17 * memory bandwidth: mbs (mega bit / sec)
16 18
17 19
181. PM QoS framework 201. PM QoS framework
diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt
index da162f7fd5f5..5a9879bad073 100644
--- a/Documentation/scsi/osd.txt
+++ b/Documentation/scsi/osd.txt
@@ -184,8 +184,7 @@ Any problems, questions, bug reports, lonely OSD nights, please email:
184More up-to-date information can be found on: 184More up-to-date information can be found on:
185http://open-osd.org 185http://open-osd.org
186 186
187Boaz Harrosh <bharrosh@panasas.com> 187Boaz Harrosh <ooo@electrozaur.com>
188Benny Halevy <bhalevy@panasas.com>
189 188
190References 189References
191========== 190==========
diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt
new file mode 100644
index 000000000000..5518465290bf
--- /dev/null
+++ b/Documentation/target/tcmu-design.txt
@@ -0,0 +1,378 @@
1Contents:
2
31) TCM Userspace Design
4 a) Background
5 b) Benefits
6 c) Design constraints
7 d) Implementation overview
8 i. Mailbox
9 ii. Command ring
10 iii. Data Area
11 e) Device discovery
12 f) Device events
13 g) Other contingencies
142) Writing a user pass-through handler
15 a) Discovering and configuring TCMU uio devices
16 b) Waiting for events on the device(s)
17 c) Managing the command ring
183) Command filtering and pass_level
194) A final note
20
21
22TCM Userspace Design
23--------------------
24
25TCM is another name for LIO, an in-kernel iSCSI target (server).
26Existing TCM targets run in the kernel. TCMU (TCM in Userspace)
27allows userspace programs to be written which act as iSCSI targets.
28This document describes the design.
29
30The existing kernel provides modules for different SCSI transport
31protocols. TCM also modularizes the data storage. There are existing
32modules for file, block device, RAM or using another SCSI device as
33storage. These are called "backstores" or "storage engines". These
34built-in modules are implemented entirely as kernel code.
35
36Background:
37
38In addition to modularizing the transport protocol used for carrying
39SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes
40the actual data storage as well. These are referred to as "backstores"
41or "storage engines". The target comes with backstores that allow a
42file, a block device, RAM, or another SCSI device to be used for the
43local storage needed for the exported SCSI LUN. Like the rest of LIO,
44these are implemented entirely as kernel code.
45
46These backstores cover the most common use cases, but not all. One new
47use case that other non-kernel target solutions, such as tgt, are able
48to support is using Gluster's GLFS or Ceph's RBD as a backstore. The
49target then serves as a translator, allowing initiators to store data
50in these non-traditional networked storage systems, while still only
51using standard protocols themselves.
52
53If the target is a userspace process, supporting these is easy. tgt,
54for example, needs only a small adapter module for each, because the
55modules just use the available userspace libraries for RBD and GLFS.
56
57Adding support for these backstores in LIO is considerably more
58difficult, because LIO is entirely kernel code. Instead of undertaking
59the significant work to port the GLFS or RBD APIs and protocols to the
60kernel, another approach is to create a userspace pass-through
61backstore for LIO, "TCMU".
62
63
64Benefits:
65
66In addition to allowing relatively easy support for RBD and GLFS, TCMU
67will also allow easier development of new backstores. TCMU combines
68with the LIO loopback fabric to become something similar to FUSE
69(Filesystem in Userspace), but at the SCSI layer instead of the
70filesystem layer. A SUSE, if you will.
71
72The disadvantage is there are more distinct components to configure, and
73potentially to malfunction. This is unavoidable, but hopefully not
74fatal if we're careful to keep things as simple as possible.
75
76Design constraints:
77
78- Good performance: high throughput, low latency
79- Cleanly handle if userspace:
80 1) never attaches
81 2) hangs
82 3) dies
83 4) misbehaves
84- Allow future flexibility in user & kernel implementations
85- Be reasonably memory-efficient
86- Simple to configure & run
87- Simple to write a userspace backend
88
89
90Implementation overview:
91
92The core of the TCMU interface is a memory region that is shared
93between kernel and userspace. Within this region is: a control area
94(mailbox); a lockless producer/consumer circular buffer for commands
95to be passed up, and status returned; and an in/out data buffer area.
96
97TCMU uses the pre-existing UIO subsystem. UIO allows device driver
98development in userspace, and this is conceptually very close to the
99TCMU use case, except instead of a physical device, TCMU implements a
100memory-mapped layout designed for SCSI commands. Using UIO also
101benefits TCMU by handling device introspection (e.g. a way for
102userspace to determine how large the shared region is) and signaling
103mechanisms in both directions.
104
105There are no embedded pointers in the memory region. Everything is
106expressed as an offset from the region's starting address. This allows
107the ring to still work if the user process dies and is restarted with
108the region mapped at a different virtual address.
109
110See target_core_user.h for the struct definitions.
111
112The Mailbox:
113
114The mailbox is always at the start of the shared memory region, and
115contains a version, details about the starting offset and size of the
116command ring, and head and tail pointers to be used by the kernel and
117userspace (respectively) to put commands on the ring, and indicate
118when the commands are completed.
119
120version - 1 (userspace should abort if otherwise)
121flags - none yet defined.
122cmdr_off - The offset of the start of the command ring from the start
123of the memory region, to account for the mailbox size.
124cmdr_size - The size of the command ring. This does *not* need to be a
125power of two.
126cmd_head - Modified by the kernel to indicate when a command has been
127placed on the ring.
128cmd_tail - Modified by userspace to indicate when it has completed
129processing of a command.
130
131The Command Ring:
132
133Commands are placed on the ring by the kernel incrementing
134mailbox.cmd_head by the size of the command, modulo cmdr_size, and
135then signaling userspace via uio_event_notify(). Once the command is
136completed, userspace updates mailbox.cmd_tail in the same way and
137signals the kernel via a 4-byte write(). When cmd_head equals
138cmd_tail, the ring is empty -- no commands are currently waiting to be
139processed by userspace.
140
141TCMU commands start with a common header containing "len_op", a 32-bit
142value that stores the length, as well as the opcode in the lowest
143unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and
144TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it
145should skip ahead by the bytes in "length". (The kernel inserts PAD
146entries to ensure each CMD entry fits contigously into the circular
147buffer.)
148
149When userspace handles a CMD, it finds the SCSI CDB (Command Data
150Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the
151start of the overall shared memory region, not the entry. The data
152in/out buffers are accessible via tht req.iov[] array. Note that
153each iov.iov_base is also an offset from the start of the region.
154
155TCMU currently does not support BIDI operations.
156
157When completing a command, userspace sets rsp.scsi_status, and
158rsp.sense_buffer if necessary. Userspace then increments
159mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
160kernel via the UIO method, a 4-byte write to the file descriptor.
161
162The Data Area:
163
164This is shared-memory space after the command ring. The organization
165of this area is not defined in the TCMU interface, and userspace
166should access only the parts referenced by pending iovs.
167
168
169Device Discovery:
170
171Other devices may be using UIO besides TCMU. Unrelated user processes
172may also be handling different sets of TCMU devices. TCMU userspace
173processes must find their devices by scanning sysfs
174class/uio/uio*/name. For TCMU devices, these names will be of the
175format:
176
177tcm-user/<hba_num>/<device_name>/<subtype>/<path>
178
179where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num>
180and <device_name> allow userspace to find the device's path in the
181kernel target's configfs tree. Assuming the usual mount point, it is
182found at:
183
184/sys/kernel/config/target/core/user_<hba_num>/<device_name>
185
186This location contains attributes such as "hw_block_size", that
187userspace needs to know for correct operation.
188
189<subtype> will be a userspace-process-unique string to identify the
190TCMU device as expecting to be backed by a certain handler, and <path>
191will be an additional handler-specific string for the user process to
192configure the device, if needed. The name cannot contain ':', due to
193LIO limitations.
194
195For all devices so discovered, the user handler opens /dev/uioX and
196calls mmap():
197
198mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)
199
200where size must be equal to the value read from
201/sys/class/uio/uioX/maps/map0/size.
202
203
204Device Events:
205
206If a new device is added or removed, a notification will be broadcast
207over netlink, using a generic netlink family name of "TCM-USER" and a
208multicast group named "config". This will include the UIO name as
209described in the previous section, as well as the UIO minor
210number. This should allow userspace to identify both the UIO device and
211the LIO device, so that after determining the device is supported
212(based on subtype) it can take the appropriate action.
213
214
215Other contingencies:
216
217Userspace handler process never attaches:
218
219- TCMU will post commands, and then abort them after a timeout period
220 (30 seconds.)
221
222Userspace handler process is killed:
223
224- It is still possible to restart and re-connect to TCMU
225 devices. Command ring is preserved. However, after the timeout period,
226 the kernel will abort pending tasks.
227
228Userspace handler process hangs:
229
230- The kernel will abort pending tasks after a timeout period.
231
232Userspace handler process is malicious:
233
234- The process can trivially break the handling of devices it controls,
235 but should not be able to access kernel memory outside its shared
236 memory areas.
237
238
239Writing a user pass-through handler (with example code)
240-------------------------------------------------------
241
242A user process handing a TCMU device must support the following:
243
244a) Discovering and configuring TCMU uio devices
245b) Waiting for events on the device(s)
246c) Managing the command ring: Parsing operations and commands,
247 performing work as needed, setting response fields (scsi_status and
248 possibly sense_buffer), updating cmd_tail, and notifying the kernel
249 that work has been finished
250
251First, consider instead writing a plugin for tcmu-runner. tcmu-runner
252implements all of this, and provides a higher-level API for plugin
253authors.
254
255TCMU is designed so that multiple unrelated processes can manage TCMU
256devices separately. All handlers should make sure to only open their
257devices, based opon a known subtype string.
258
259a) Discovering and configuring TCMU UIO devices:
260
261(error checking omitted for brevity)
262
263int fd, dev_fd;
264char buf[256];
265unsigned long long map_len;
266void *map;
267
268fd = open("/sys/class/uio/uio0/name", O_RDONLY);
269ret = read(fd, buf, sizeof(buf));
270close(fd);
271buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
272
273/* we only want uio devices whose name is a format we expect */
274if (strncmp(buf, "tcm-user", 8))
275 exit(-1);
276
277/* Further checking for subtype also needed here */
278
279fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY);
280ret = read(fd, buf, sizeof(buf));
281close(fd);
282str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
283
284map_len = strtoull(buf, NULL, 0);
285
286dev_fd = open("/dev/uio0", O_RDWR);
287map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0);
288
289
290b) Waiting for events on the device(s)
291
292while (1) {
293 char buf[4];
294
295 int ret = read(dev_fd, buf, 4); /* will block */
296
297 handle_device_events(dev_fd, map);
298}
299
300
301c) Managing the command ring
302
303#include <linux/target_core_user.h>
304
305int handle_device_events(int fd, void *map)
306{
307 struct tcmu_mailbox *mb = map;
308 struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
309 int did_some_work = 0;
310
311 /* Process events from cmd ring until we catch up with cmd_head */
312 while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
313
314 if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) {
315 uint8_t *cdb = (void *)mb + ent->req.cdb_off;
316 bool success = true;
317
318 /* Handle command here. */
319 printf("SCSI opcode: 0x%x\n", cdb[0]);
320
321 /* Set response fields */
322 if (success)
323 ent->rsp.scsi_status = SCSI_NO_SENSE;
324 else {
325 /* Also fill in rsp->sense_buffer here */
326 ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
327 }
328 }
329 else {
330 /* Do nothing for PAD entries */
331 }
332
333 /* update cmd_tail */
334 mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size;
335 ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
336 did_some_work = 1;
337 }
338
339 /* Notify the kernel that work has been finished */
340 if (did_some_work) {
341 uint32_t buf = 0;
342
343 write(fd, &buf, 4);
344 }
345
346 return 0;
347}
348
349
350Command filtering and pass_level
351--------------------------------
352
353TCMU supports a "pass_level" option with valid values of 0 or 1. When
354the value is 0 (the default), nearly all SCSI commands received for
355the device are passed through to the handler. This allows maximum
356flexibility but increases the amount of code required by the handler,
357to support all mandatory SCSI commands. If pass_level is set to 1,
358then only IO-related commands are presented, and the rest are handled
359by LIO's in-kernel command emulation. The commands presented at level
3601 include all versions of:
361
362READ
363WRITE
364WRITE_VERIFY
365XDWRITEREAD
366WRITE_SAME
367COMPARE_AND_WRITE
368SYNCHRONIZE_CACHE
369UNMAP
370
371
372A final note
373------------
374
375Please be careful to return codes as defined by the SCSI
376specifications. These are different than some values defined in the
377scsi/scsi.h include file. For example, CHECK CONDITION's status code
378is 2, not 1.
diff --git a/MAINTAINERS b/MAINTAINERS
index f54ff97d621b..0d4e8f0b238f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5842,6 +5842,14 @@ S: Maintained
5842F: drivers/net/macvlan.c 5842F: drivers/net/macvlan.c
5843F: include/linux/if_macvlan.h 5843F: include/linux/if_macvlan.h
5844 5844
5845MAILBOX API
5846M: Jassi Brar <jassisinghbrar@gmail.com>
5847L: linux-kernel@vger.kernel.org
5848S: Maintained
5849F: drivers/mailbox/
5850F: include/linux/mailbox_client.h
5851F: include/linux/mailbox_controller.h
5852
5845MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 5853MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
5846M: Michael Kerrisk <mtk.manpages@gmail.com> 5854M: Michael Kerrisk <mtk.manpages@gmail.com>
5847W: http://www.kernel.org/doc/man-pages 5855W: http://www.kernel.org/doc/man-pages
@@ -6830,7 +6838,7 @@ S: Orphan
6830F: drivers/net/wireless/orinoco/ 6838F: drivers/net/wireless/orinoco/
6831 6839
6832OSD LIBRARY and FILESYSTEM 6840OSD LIBRARY and FILESYSTEM
6833M: Boaz Harrosh <bharrosh@panasas.com> 6841M: Boaz Harrosh <ooo@electrozaur.com>
6834M: Benny Halevy <bhalevy@primarydata.com> 6842M: Benny Halevy <bhalevy@primarydata.com>
6835L: osd-dev@open-osd.org 6843L: osd-dev@open-osd.org
6836W: http://open-osd.org 6844W: http://open-osd.org
@@ -6840,6 +6848,13 @@ F: drivers/scsi/osd/
6840F: include/scsi/osd_* 6848F: include/scsi/osd_*
6841F: fs/exofs/ 6849F: fs/exofs/
6842 6850
6851OVERLAYFS FILESYSTEM
6852M: Miklos Szeredi <miklos@szeredi.hu>
6853L: linux-fsdevel@vger.kernel.org
6854S: Supported
6855F: fs/overlayfs/*
6856F: Documentation/filesystems/overlayfs.txt
6857
6843P54 WIRELESS DRIVER 6858P54 WIRELESS DRIVER
6844M: Christian Lamparter <chunkeey@googlemail.com> 6859M: Christian Lamparter <chunkeey@googlemail.com>
6845L: linux-wireless@vger.kernel.org 6860L: linux-wireless@vger.kernel.org
diff --git a/Makefile b/Makefile
index 05d67af376c5..52c129725270 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 18 2PATCHLEVEL = 18
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc1 4EXTRAVERSION = -rc2
5NAME = Shuffling Zombie Juror 5NAME = Shuffling Zombie Juror
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 9596b0ab108d..fe44b2494609 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,6 +9,7 @@
9config ARC 9config ARC
10 def_bool y 10 def_bool y
11 select BUILDTIME_EXTABLE_SORT 11 select BUILDTIME_EXTABLE_SORT
12 select COMMON_CLK
12 select CLONE_BACKWARDS 13 select CLONE_BACKWARDS
13 # ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev 14 # ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev
14 select DEVTMPFS if !INITRAMFS_SOURCE="" 15 select DEVTMPFS if !INITRAMFS_SOURCE=""
@@ -73,9 +74,6 @@ config STACKTRACE_SUPPORT
73config HAVE_LATENCYTOP_SUPPORT 74config HAVE_LATENCYTOP_SUPPORT
74 def_bool y 75 def_bool y
75 76
76config NO_DMA
77 def_bool n
78
79source "init/Kconfig" 77source "init/Kconfig"
80source "kernel/Kconfig.freezer" 78source "kernel/Kconfig.freezer"
81 79
@@ -354,7 +352,7 @@ config ARC_CURR_IN_REG
354 kernel mode. This saves memory access for each such access 352 kernel mode. This saves memory access for each such access
355 353
356 354
357config ARC_MISALIGN_ACCESS 355config ARC_EMUL_UNALIGNED
358 bool "Emulate unaligned memory access (userspace only)" 356 bool "Emulate unaligned memory access (userspace only)"
359 select SYSCTL_ARCH_UNALIGN_NO_WARN 357 select SYSCTL_ARCH_UNALIGN_NO_WARN
360 select SYSCTL_ARCH_UNALIGN_ALLOW 358 select SYSCTL_ARCH_UNALIGN_ALLOW
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 8c0b1aa56f7e..10bc3d4e8a44 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -25,7 +25,6 @@ ifdef CONFIG_ARC_CURR_IN_REG
25LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h 25LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h
26endif 26endif
27 27
28upto_gcc42 := $(call cc-ifversion, -le, 0402, y)
29upto_gcc44 := $(call cc-ifversion, -le, 0404, y) 28upto_gcc44 := $(call cc-ifversion, -le, 0404, y)
30atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) 29atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y)
31atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y) 30atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y)
@@ -60,25 +59,11 @@ ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB
60# --build-id w/o "-marclinux". Default arc-elf32-ld is OK 59# --build-id w/o "-marclinux". Default arc-elf32-ld is OK
61ldflags-$(upto_gcc44) += -marclinux 60ldflags-$(upto_gcc44) += -marclinux
62 61
63ARC_LIBGCC := -mA7
64cflags-$(CONFIG_ARC_HAS_HW_MPY) += -multcost=16
65
66ifndef CONFIG_ARC_HAS_HW_MPY 62ifndef CONFIG_ARC_HAS_HW_MPY
67 cflags-y += -mno-mpy 63 cflags-y += -mno-mpy
68
69# newlib for ARC700 assumes MPY to be always present, which is generally true
70# However, if someone really doesn't want MPY, we need to use the 600 ver
71# which coupled with -mno-mpy will use mpy emulation
72# With gcc 4.4.7, -mno-mpy is enough to make any other related adjustments,
73# e.g. increased cost of MPY. With gcc 4.2.1 this had to be explicitly hinted
74
75 ifeq ($(upto_gcc42),y)
76 ARC_LIBGCC := -marc600
77 cflags-y += -multcost=30
78 endif
79endif 64endif
80 65
81LIBGCC := $(shell $(CC) $(ARC_LIBGCC) $(cflags-y) --print-libgcc-file-name) 66LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
82 67
83# Modules with short calls might break for calls into builtin-kernel 68# Modules with short calls might break for calls into builtin-kernel
84KBUILD_CFLAGS_MODULE += -mlong-calls 69KBUILD_CFLAGS_MODULE += -mlong-calls
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts
index 6b57475967a6..757e0c62c4f9 100644
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/angel4.dts
@@ -24,11 +24,6 @@
24 serial0 = &arcuart0; 24 serial0 = &arcuart0;
25 }; 25 };
26 26
27 memory {
28 device_type = "memory";
29 reg = <0x00000000 0x10000000>; /* 256M */
30 };
31
32 fpga { 27 fpga {
33 compatible = "simple-bus"; 28 compatible = "simple-bus";
34 #address-cells = <1>; 29 #address-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index 4f31b2eb5cdf..cfaedd9c61c9 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,18 +20,13 @@
20 /* this is for console on PGU */ 20 /* this is for console on PGU */
21 /* bootargs = "console=tty0 consoleblank=0"; */ 21 /* bootargs = "console=tty0 consoleblank=0"; */
22 /* this is for console on serial */ 22 /* this is for console on serial */
23 bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug"; 23 bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
24 }; 24 };
25 25
26 aliases { 26 aliases {
27 serial0 = &uart0; 27 serial0 = &uart0;
28 }; 28 };
29 29
30 memory {
31 device_type = "memory";
32 reg = <0x80000000 0x10000000>; /* 256M */
33 };
34
35 fpga { 30 fpga {
36 compatible = "simple-bus"; 31 compatible = "simple-bus";
37 #address-cells = <1>; 32 #address-cells = <1>;
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig
index e283aa586934..ef4d3bc7b6c0 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -23,7 +23,6 @@ CONFIG_MODULES=y
23# CONFIG_IOSCHED_DEADLINE is not set 23# CONFIG_IOSCHED_DEADLINE is not set
24# CONFIG_IOSCHED_CFQ is not set 24# CONFIG_IOSCHED_CFQ is not set
25CONFIG_ARC_PLAT_FPGA_LEGACY=y 25CONFIG_ARC_PLAT_FPGA_LEGACY=y
26CONFIG_ARC_BOARD_ML509=y
27# CONFIG_ARC_HAS_RTSC is not set 26# CONFIG_ARC_HAS_RTSC is not set
28CONFIG_ARC_BUILTIN_DTB_NAME="angel4" 27CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
29CONFIG_PREEMPT=y 28CONFIG_PREEMPT=y
diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig
index 5276a52f6a2f..49c93011ab96 100644
--- a/arch/arc/configs/fpga_noramfs_defconfig
+++ b/arch/arc/configs/fpga_noramfs_defconfig
@@ -20,7 +20,6 @@ CONFIG_MODULES=y
20# CONFIG_IOSCHED_DEADLINE is not set 20# CONFIG_IOSCHED_DEADLINE is not set
21# CONFIG_IOSCHED_CFQ is not set 21# CONFIG_IOSCHED_CFQ is not set
22CONFIG_ARC_PLAT_FPGA_LEGACY=y 22CONFIG_ARC_PLAT_FPGA_LEGACY=y
23CONFIG_ARC_BOARD_ML509=y
24# CONFIG_ARC_HAS_RTSC is not set 23# CONFIG_ARC_HAS_RTSC is not set
25CONFIG_ARC_BUILTIN_DTB_NAME="angel4" 24CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
26CONFIG_PREEMPT=y 25CONFIG_PREEMPT=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index c01ba35a4eff..278dacf2a3f9 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -21,7 +21,6 @@ CONFIG_MODULES=y
21# CONFIG_IOSCHED_DEADLINE is not set 21# CONFIG_IOSCHED_DEADLINE is not set
22# CONFIG_IOSCHED_CFQ is not set 22# CONFIG_IOSCHED_CFQ is not set
23CONFIG_ARC_PLAT_FPGA_LEGACY=y 23CONFIG_ARC_PLAT_FPGA_LEGACY=y
24CONFIG_ARC_BOARD_ML509=y
25# CONFIG_ARC_IDE is not set 24# CONFIG_ARC_IDE is not set
26# CONFIG_ARCTANGENT_EMAC is not set 25# CONFIG_ARCTANGENT_EMAC is not set
27# CONFIG_ARC_HAS_RTSC is not set 26# CONFIG_ARC_HAS_RTSC is not set
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 372466b371bf..be33db8a2ee3 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -9,19 +9,16 @@
9#ifndef _ASM_ARC_ARCREGS_H 9#ifndef _ASM_ARC_ARCREGS_H
10#define _ASM_ARC_ARCREGS_H 10#define _ASM_ARC_ARCREGS_H
11 11
12#ifdef __KERNEL__
13
14/* Build Configuration Registers */ 12/* Build Configuration Registers */
15#define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */ 13#define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */
16#define ARC_REG_CRC_BCR 0x62 14#define ARC_REG_CRC_BCR 0x62
17#define ARC_REG_DVFB_BCR 0x64
18#define ARC_REG_EXTARITH_BCR 0x65
19#define ARC_REG_VECBASE_BCR 0x68 15#define ARC_REG_VECBASE_BCR 0x68
20#define ARC_REG_PERIBASE_BCR 0x69 16#define ARC_REG_PERIBASE_BCR 0x69
21#define ARC_REG_FP_BCR 0x6B /* Single-Precision FPU */ 17#define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */
22#define ARC_REG_DPFP_BCR 0x6C /* Dbl Precision FPU */ 18#define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */
23#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ 19#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */
24#define ARC_REG_TIMERS_BCR 0x75 20#define ARC_REG_TIMERS_BCR 0x75
21#define ARC_REG_AP_BCR 0x76
25#define ARC_REG_ICCM_BCR 0x78 22#define ARC_REG_ICCM_BCR 0x78
26#define ARC_REG_XY_MEM_BCR 0x79 23#define ARC_REG_XY_MEM_BCR 0x79
27#define ARC_REG_MAC_BCR 0x7a 24#define ARC_REG_MAC_BCR 0x7a
@@ -31,6 +28,9 @@
31#define ARC_REG_MIXMAX_BCR 0x7e 28#define ARC_REG_MIXMAX_BCR 0x7e
32#define ARC_REG_BARREL_BCR 0x7f 29#define ARC_REG_BARREL_BCR 0x7f
33#define ARC_REG_D_UNCACH_BCR 0x6A 30#define ARC_REG_D_UNCACH_BCR 0x6A
31#define ARC_REG_BPU_BCR 0xc0
32#define ARC_REG_ISA_CFG_BCR 0xc1
33#define ARC_REG_SMART_BCR 0xFF
34 34
35/* status32 Bits Positions */ 35/* status32 Bits Positions */
36#define STATUS_AE_BIT 5 /* Exception active */ 36#define STATUS_AE_BIT 5 /* Exception active */
@@ -191,14 +191,6 @@
191#define PAGES_TO_KB(n_pages) ((n_pages) << (PAGE_SHIFT - 10)) 191#define PAGES_TO_KB(n_pages) ((n_pages) << (PAGE_SHIFT - 10))
192#define PAGES_TO_MB(n_pages) (PAGES_TO_KB(n_pages) >> 10) 192#define PAGES_TO_MB(n_pages) (PAGES_TO_KB(n_pages) >> 10)
193 193
194#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
195/* These DPFP regs need to be saved/restored across ctx-sw */
196struct arc_fpu {
197 struct {
198 unsigned int l, h;
199 } aux_dpfp[2];
200};
201#endif
202 194
203/* 195/*
204 *************************************************************** 196 ***************************************************************
@@ -212,27 +204,19 @@ struct bcr_identity {
212#endif 204#endif
213}; 205};
214 206
215#define EXTN_SWAP_VALID 0x1 207struct bcr_isa {
216#define EXTN_NORM_VALID 0x2
217#define EXTN_MINMAX_VALID 0x2
218#define EXTN_BARREL_VALID 0x2
219
220struct bcr_extn {
221#ifdef CONFIG_CPU_BIG_ENDIAN 208#ifdef CONFIG_CPU_BIG_ENDIAN
222 unsigned int pad:20, crc:1, ext_arith:2, mul:2, barrel:2, minmax:2, 209 unsigned int pad1:23, atomic1:1, ver:8;
223 norm:2, swap:1;
224#else 210#else
225 unsigned int swap:1, norm:2, minmax:2, barrel:2, mul:2, ext_arith:2, 211 unsigned int ver:8, atomic1:1, pad1:23;
226 crc:1, pad:20;
227#endif 212#endif
228}; 213};
229 214
230/* DSP Options Ref Manual */ 215struct bcr_mpy {
231struct bcr_extn_mac_mul {
232#ifdef CONFIG_CPU_BIG_ENDIAN 216#ifdef CONFIG_CPU_BIG_ENDIAN
233 unsigned int pad:16, type:8, ver:8; 217 unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
234#else 218#else
235 unsigned int ver:8, type:8, pad:16; 219 unsigned int ver:8, type:2, cycles:2, dsp:4, x1616:8, pad:8;
236#endif 220#endif
237}; 221};
238 222
@@ -251,6 +235,7 @@ struct bcr_perip {
251 unsigned int pad:8, sz:8, pad2:8, start:8; 235 unsigned int pad:8, sz:8, pad2:8, start:8;
252#endif 236#endif
253}; 237};
238
254struct bcr_iccm { 239struct bcr_iccm {
255#ifdef CONFIG_CPU_BIG_ENDIAN 240#ifdef CONFIG_CPU_BIG_ENDIAN
256 unsigned int base:16, pad:5, sz:3, ver:8; 241 unsigned int base:16, pad:5, sz:3, ver:8;
@@ -277,8 +262,8 @@ struct bcr_dccm {
277#endif 262#endif
278}; 263};
279 264
280/* Both SP and DP FPU BCRs have same format */ 265/* ARCompact: Both SP and DP FPU BCRs have same format */
281struct bcr_fp { 266struct bcr_fp_arcompact {
282#ifdef CONFIG_CPU_BIG_ENDIAN 267#ifdef CONFIG_CPU_BIG_ENDIAN
283 unsigned int fast:1, ver:8; 268 unsigned int fast:1, ver:8;
284#else 269#else
@@ -286,6 +271,30 @@ struct bcr_fp {
286#endif 271#endif
287}; 272};
288 273
274struct bcr_timer {
275#ifdef CONFIG_CPU_BIG_ENDIAN
276 unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8;
277#else
278 unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15;
279#endif
280};
281
282struct bcr_bpu_arcompact {
283#ifdef CONFIG_CPU_BIG_ENDIAN
284 unsigned int pad2:19, fam:1, pad:2, ent:2, ver:8;
285#else
286 unsigned int ver:8, ent:2, pad:2, fam:1, pad2:19;
287#endif
288};
289
290struct bcr_generic {
291#ifdef CONFIG_CPU_BIG_ENDIAN
292 unsigned int pad:24, ver:8;
293#else
294 unsigned int ver:8, pad:24;
295#endif
296};
297
289/* 298/*
290 ******************************************************************* 299 *******************************************************************
291 * Generic structures to hold build configuration used at runtime 300 * Generic structures to hold build configuration used at runtime
@@ -299,6 +308,10 @@ struct cpuinfo_arc_cache {
299 unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6; 308 unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6;
300}; 309};
301 310
311struct cpuinfo_arc_bpu {
312 unsigned int ver, full, num_cache, num_pred;
313};
314
302struct cpuinfo_arc_ccm { 315struct cpuinfo_arc_ccm {
303 unsigned int base_addr, sz; 316 unsigned int base_addr, sz;
304}; 317};
@@ -306,21 +319,25 @@ struct cpuinfo_arc_ccm {
306struct cpuinfo_arc { 319struct cpuinfo_arc {
307 struct cpuinfo_arc_cache icache, dcache; 320 struct cpuinfo_arc_cache icache, dcache;
308 struct cpuinfo_arc_mmu mmu; 321 struct cpuinfo_arc_mmu mmu;
322 struct cpuinfo_arc_bpu bpu;
309 struct bcr_identity core; 323 struct bcr_identity core;
310 unsigned int timers; 324 struct bcr_isa isa;
325 struct bcr_timer timers;
311 unsigned int vec_base; 326 unsigned int vec_base;
312 unsigned int uncached_base; 327 unsigned int uncached_base;
313 struct cpuinfo_arc_ccm iccm, dccm; 328 struct cpuinfo_arc_ccm iccm, dccm;
314 struct bcr_extn extn; 329 struct {
330 unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
331 fpu_sp:1, fpu_dp:1, pad2:6,
332 debug:1, ap:1, smart:1, rtt:1, pad3:4,
333 pad4:8;
334 } extn;
335 struct bcr_mpy extn_mpy;
315 struct bcr_extn_xymem extn_xymem; 336 struct bcr_extn_xymem extn_xymem;
316 struct bcr_extn_mac_mul extn_mac_mul;
317 struct bcr_fp fp, dpfp;
318}; 337};
319 338
320extern struct cpuinfo_arc cpuinfo_arc700[]; 339extern struct cpuinfo_arc cpuinfo_arc700[];
321 340
322#endif /* __ASEMBLY__ */ 341#endif /* __ASEMBLY__ */
323 342
324#endif /* __KERNEL__ */
325
326#endif /* _ASM_ARC_ARCREGS_H */ 343#endif /* _ASM_ARC_ARCREGS_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 173f303a868f..067551b6920a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -9,8 +9,6 @@
9#ifndef _ASM_ARC_ATOMIC_H 9#ifndef _ASM_ARC_ATOMIC_H
10#define _ASM_ARC_ATOMIC_H 10#define _ASM_ARC_ATOMIC_H
11 11
12#ifdef __KERNEL__
13
14#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
15 13
16#include <linux/types.h> 14#include <linux/types.h>
@@ -170,5 +168,3 @@ ATOMIC_OP(and, &=, and)
170#endif 168#endif
171 169
172#endif 170#endif
173
174#endif
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index ebc0cf3164dc..1a5bf07eefe2 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -13,8 +13,6 @@
13#error only <linux/bitops.h> can be included directly 13#error only <linux/bitops.h> can be included directly
14#endif 14#endif
15 15
16#ifdef __KERNEL__
17
18#ifndef __ASSEMBLY__ 16#ifndef __ASSEMBLY__
19 17
20#include <linux/types.h> 18#include <linux/types.h>
@@ -508,6 +506,4 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word)
508 506
509#endif /* !__ASSEMBLY__ */ 507#endif /* !__ASSEMBLY__ */
510 508
511#endif /* __KERNEL__ */
512
513#endif 509#endif
diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h
index 5b18e94c6678..ea022d47896c 100644
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@@ -21,10 +21,9 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
21 unsigned long address); 21 unsigned long address);
22void die(const char *str, struct pt_regs *regs, unsigned long address); 22void die(const char *str, struct pt_regs *regs, unsigned long address);
23 23
24#define BUG() do { \ 24#define BUG() do { \
25 dump_stack(); \ 25 pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
26 pr_warn("Kernel BUG in %s: %s: %d!\n", \ 26 dump_stack(); \
27 __FILE__, __func__, __LINE__); \
28} while (0) 27} while (0)
29 28
30#define HAVE_ARCH_BUG 29#define HAVE_ARCH_BUG
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index b3c750979aa1..7861255da32d 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -20,7 +20,7 @@
20#define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1)) 20#define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1))
21 21
22/* 22/*
23 * ARC700 doesn't cache any access in top 256M. 23 * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF)
24 * Ideal for wiring memory mapped peripherals as we don't need to do 24 * Ideal for wiring memory mapped peripherals as we don't need to do
25 * explicit uncached accesses (LD.di/ST.di) hence more portable drivers 25 * explicit uncached accesses (LD.di/ST.di) hence more portable drivers
26 */ 26 */
diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h
index 87b918585c4a..c2453ee62801 100644
--- a/arch/arc/include/asm/current.h
+++ b/arch/arc/include/asm/current.h
@@ -12,8 +12,6 @@
12#ifndef _ASM_ARC_CURRENT_H 12#ifndef _ASM_ARC_CURRENT_H
13#define _ASM_ARC_CURRENT_H 13#define _ASM_ARC_CURRENT_H
14 14
15#ifdef __KERNEL__
16
17#ifndef __ASSEMBLY__ 15#ifndef __ASSEMBLY__
18 16
19#ifdef CONFIG_ARC_CURR_IN_REG 17#ifdef CONFIG_ARC_CURR_IN_REG
@@ -27,6 +25,4 @@ register struct task_struct *curr_arc asm("r25");
27 25
28#endif /* ! __ASSEMBLY__ */ 26#endif /* ! __ASSEMBLY__ */
29 27
30#endif /* __KERNEL__ */
31
32#endif /* _ASM_ARC_CURRENT_H */ 28#endif /* _ASM_ARC_CURRENT_H */
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 587df8236e8b..742816f1b210 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -15,8 +15,6 @@
15 * -Conditionally disable interrupts (if they are not enabled, don't disable) 15 * -Conditionally disable interrupts (if they are not enabled, don't disable)
16*/ 16*/
17 17
18#ifdef __KERNEL__
19
20#include <asm/arcregs.h> 18#include <asm/arcregs.h>
21 19
22/* status32 Reg bits related to Interrupt Handling */ 20/* status32 Reg bits related to Interrupt Handling */
@@ -169,6 +167,4 @@ static inline int arch_irqs_disabled(void)
169 167
170#endif /* __ASSEMBLY__ */ 168#endif /* __ASSEMBLY__ */
171 169
172#endif /* KERNEL */
173
174#endif 170#endif
diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
index b65fca7ffeb5..fea931634136 100644
--- a/arch/arc/include/asm/kgdb.h
+++ b/arch/arc/include/asm/kgdb.h
@@ -19,7 +19,7 @@
19 * register API yet */ 19 * register API yet */
20#undef DBG_MAX_REG_NUM 20#undef DBG_MAX_REG_NUM
21 21
22#define GDB_MAX_REGS 39 22#define GDB_MAX_REGS 87
23 23
24#define BREAK_INSTR_SIZE 2 24#define BREAK_INSTR_SIZE 2
25#define CACHE_FLUSH_IS_SAFE 1 25#define CACHE_FLUSH_IS_SAFE 1
@@ -33,23 +33,27 @@ static inline void arch_kgdb_breakpoint(void)
33 33
34extern void kgdb_trap(struct pt_regs *regs); 34extern void kgdb_trap(struct pt_regs *regs);
35 35
36enum arc700_linux_regnums { 36/* This is the numbering of registers according to the GDB. See GDB's
37 * arc-tdep.h for details.
38 *
39 * Registers are ordered for GDB 7.5. It is incompatible with GDB 6.8. */
40enum arc_linux_regnums {
37 _R0 = 0, 41 _R0 = 0,
38 _R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13, 42 _R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13,
39 _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24, 43 _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24,
40 _R25, _R26, 44 _R25, _R26,
41 _BTA = 27, 45 _FP = 27,
42 _LP_START = 28, 46 __SP = 28,
43 _LP_END = 29, 47 _R30 = 30,
44 _LP_COUNT = 30, 48 _BLINK = 31,
45 _STATUS32 = 31, 49 _LP_COUNT = 60,
46 _BLINK = 32, 50 _STOP_PC = 64,
47 _FP = 33, 51 _RET = 64,
48 __SP = 34, 52 _LP_START = 65,
49 _EFA = 35, 53 _LP_END = 66,
50 _RET = 36, 54 _STATUS32 = 67,
51 _ORIG_R8 = 37, 55 _ECR = 76,
52 _STOP_PC = 38 56 _BTA = 82,
53}; 57};
54 58
55#else 59#else
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 82588f3ba77f..210fe97464c3 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -14,12 +14,19 @@
14#ifndef __ASM_ARC_PROCESSOR_H 14#ifndef __ASM_ARC_PROCESSOR_H
15#define __ASM_ARC_PROCESSOR_H 15#define __ASM_ARC_PROCESSOR_H
16 16
17#ifdef __KERNEL__
18
19#ifndef __ASSEMBLY__ 17#ifndef __ASSEMBLY__
20 18
21#include <asm/ptrace.h> 19#include <asm/ptrace.h>
22 20
21#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
22/* These DPFP regs need to be saved/restored across ctx-sw */
23struct arc_fpu {
24 struct {
25 unsigned int l, h;
26 } aux_dpfp[2];
27};
28#endif
29
23/* Arch specific stuff which needs to be saved per task. 30/* Arch specific stuff which needs to be saved per task.
24 * However these items are not so important so as to earn a place in 31 * However these items are not so important so as to earn a place in
25 * struct thread_info 32 * struct thread_info
@@ -128,6 +135,4 @@ extern unsigned int get_wchan(struct task_struct *p);
128 */ 135 */
129#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) 136#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
130 137
131#endif /* __KERNEL__ */
132
133#endif /* __ASM_ARC_PROCESSOR_H */ 138#endif /* __ASM_ARC_PROCESSOR_H */
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index e10f8cef56a8..6e3ef5ba4f74 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -29,7 +29,6 @@ struct cpuinfo_data {
29}; 29};
30 30
31extern int root_mountflags, end_mem; 31extern int root_mountflags, end_mem;
32extern int running_on_hw;
33 32
34void setup_processor(void); 33void setup_processor(void);
35void __init setup_arch_memory(void); 34void __init setup_arch_memory(void);
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 5d06eee43ea9..3845b9e94f69 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -59,7 +59,15 @@ struct plat_smp_ops {
59/* TBD: stop exporting it for direct population by platform */ 59/* TBD: stop exporting it for direct population by platform */
60extern struct plat_smp_ops plat_smp_ops; 60extern struct plat_smp_ops plat_smp_ops;
61 61
62#endif /* CONFIG_SMP */ 62#else /* CONFIG_SMP */
63
64static inline void smp_init_cpus(void) {}
65static inline const char *arc_platform_smp_cpuinfo(void)
66{
67 return "";
68}
69
70#endif /* !CONFIG_SMP */
63 71
64/* 72/*
65 * ARC700 doesn't support atomic Read-Modify-Write ops. 73 * ARC700 doesn't support atomic Read-Modify-Write ops.
diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h
index 87676c8f1412..95822b550a18 100644
--- a/arch/arc/include/asm/string.h
+++ b/arch/arc/include/asm/string.h
@@ -17,8 +17,6 @@
17 17
18#include <linux/types.h> 18#include <linux/types.h>
19 19
20#ifdef __KERNEL__
21
22#define __HAVE_ARCH_MEMSET 20#define __HAVE_ARCH_MEMSET
23#define __HAVE_ARCH_MEMCPY 21#define __HAVE_ARCH_MEMCPY
24#define __HAVE_ARCH_MEMCMP 22#define __HAVE_ARCH_MEMCMP
@@ -36,5 +34,4 @@ extern char *strcpy(char *dest, const char *src);
36extern int strcmp(const char *cs, const char *ct); 34extern int strcmp(const char *cs, const char *ct);
37extern __kernel_size_t strlen(const char *); 35extern __kernel_size_t strlen(const char *);
38 36
39#endif /* __KERNEL__ */
40#endif /* _ASM_ARC_STRING_H */ 37#endif /* _ASM_ARC_STRING_H */
diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
index dd785befe7fd..e56f9fcc5581 100644
--- a/arch/arc/include/asm/syscalls.h
+++ b/arch/arc/include/asm/syscalls.h
@@ -9,8 +9,6 @@
9#ifndef _ASM_ARC_SYSCALLS_H 9#ifndef _ASM_ARC_SYSCALLS_H
10#define _ASM_ARC_SYSCALLS_H 1 10#define _ASM_ARC_SYSCALLS_H 1
11 11
12#ifdef __KERNEL__
13
14#include <linux/compiler.h> 12#include <linux/compiler.h>
15#include <linux/linkage.h> 13#include <linux/linkage.h>
16#include <linux/types.h> 14#include <linux/types.h>
@@ -22,6 +20,4 @@ int sys_arc_gettls(void);
22 20
23#include <asm-generic/syscalls.h> 21#include <asm-generic/syscalls.h>
24 22
25#endif /* __KERNEL__ */
26
27#endif 23#endif
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 45be21672011..02bc5ec0fb2e 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -16,8 +16,6 @@
16#ifndef _ASM_THREAD_INFO_H 16#ifndef _ASM_THREAD_INFO_H
17#define _ASM_THREAD_INFO_H 17#define _ASM_THREAD_INFO_H
18 18
19#ifdef __KERNEL__
20
21#include <asm/page.h> 19#include <asm/page.h>
22 20
23#ifdef CONFIG_16KSTACKS 21#ifdef CONFIG_16KSTACKS
@@ -114,6 +112,4 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
114 * syscall, so all that reamins to be tested is _TIF_WORK_MASK 112 * syscall, so all that reamins to be tested is _TIF_WORK_MASK
115 */ 113 */
116 114
117#endif /* __KERNEL__ */
118
119#endif /* _ASM_THREAD_INFO_H */ 115#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h
index 3e5f071bc00c..6da6b4edaeda 100644
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -14,7 +14,7 @@
14#include <asm-generic/unaligned.h> 14#include <asm-generic/unaligned.h>
15#include <asm/ptrace.h> 15#include <asm/ptrace.h>
16 16
17#ifdef CONFIG_ARC_MISALIGN_ACCESS 17#ifdef CONFIG_ARC_EMUL_UNALIGNED
18int misaligned_fixup(unsigned long address, struct pt_regs *regs, 18int misaligned_fixup(unsigned long address, struct pt_regs *regs,
19 struct callee_regs *cregs); 19 struct callee_regs *cregs);
20#else 20#else
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 8004b4fa6461..113f2033da9f 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_MODULES) += arcksyms.o module.o
16obj-$(CONFIG_SMP) += smp.o 16obj-$(CONFIG_SMP) += smp.o
17obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o 17obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o
18obj-$(CONFIG_KPROBES) += kprobes.o 18obj-$(CONFIG_KPROBES) += kprobes.o
19obj-$(CONFIG_ARC_MISALIGN_ACCESS) += unaligned.o 19obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o
20obj-$(CONFIG_KGDB) += kgdb.o 20obj-$(CONFIG_KGDB) += kgdb.o
21obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o 21obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o
22obj-$(CONFIG_PERF_EVENTS) += perf_event.o 22obj-$(CONFIG_PERF_EVENTS) += perf_event.o
diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c
index b8a549c4f540..3b7cd4864ba2 100644
--- a/arch/arc/kernel/disasm.c
+++ b/arch/arc/kernel/disasm.c
@@ -15,7 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <asm/disasm.h> 16#include <asm/disasm.h>
17 17
18#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_MISALIGN_ACCESS) || \ 18#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_EMUL_UNALIGNED) || \
19 defined(CONFIG_KPROBES) 19 defined(CONFIG_KPROBES)
20 20
21/* disasm_instr: Analyses instruction at addr, stores 21/* disasm_instr: Analyses instruction at addr, stores
@@ -535,4 +535,4 @@ int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs,
535 return instr.is_branch; 535 return instr.is_branch;
536} 536}
537 537
538#endif /* CONFIG_KGDB || CONFIG_ARC_MISALIGN_ACCESS || CONFIG_KPROBES */ 538#endif /* CONFIG_KGDB || CONFIG_ARC_EMUL_UNALIGNED || CONFIG_KPROBES */
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 4d2481bd8b98..b0e8666fdccc 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -91,16 +91,6 @@ stext:
91 st r0, [@uboot_tag] 91 st r0, [@uboot_tag]
92 st r2, [@uboot_arg] 92 st r2, [@uboot_arg]
93 93
94 ; Identify if running on ISS vs Silicon
95 ; IDENTITY Reg [ 3 2 1 0 ]
96 ; (chip-id) ^^^^^ ==> 0xffff for ISS
97 lr r0, [identity]
98 lsr r3, r0, 16
99 cmp r3, 0xffff
100 mov.z r4, 0
101 mov.nz r4, 1
102 st r4, [@running_on_hw]
103
104 ; setup "current" tsk and optionally cache it in dedicated r25 94 ; setup "current" tsk and optionally cache it in dedicated r25
105 mov r9, @init_task 95 mov r9, @init_task
106 SET_CURR_TASK_ON_CPU r9, r0 ; r9 = tsk, r0 = scratch 96 SET_CURR_TASK_ON_CPU r9, r0 ; r9 = tsk, r0 = scratch
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index a2ff5c5d1450..ecf6a7869375 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -158,11 +158,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
158 return -1; 158 return -1;
159} 159}
160 160
161unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
162{
163 return instruction_pointer(regs);
164}
165
166int kgdb_arch_init(void) 161int kgdb_arch_init(void)
167{ 162{
168 single_step_data.armed = 0; 163 single_step_data.armed = 0;
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index b9a5685a990e..ae1c485cbc68 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -244,25 +244,23 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
244 pr_err("This core does not have performance counters!\n"); 244 pr_err("This core does not have performance counters!\n");
245 return -ENODEV; 245 return -ENODEV;
246 } 246 }
247 BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS);
247 248
248 arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), 249 READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
249 GFP_KERNEL); 250 if (!cc_bcr.v) {
251 pr_err("Performance counters exist, but no countable conditions?\n");
252 return -ENODEV;
253 }
254
255 arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
250 if (!arc_pmu) 256 if (!arc_pmu)
251 return -ENOMEM; 257 return -ENOMEM;
252 258
253 arc_pmu->n_counters = pct_bcr.c; 259 arc_pmu->n_counters = pct_bcr.c;
254 BUG_ON(arc_pmu->n_counters > ARC_PMU_MAX_HWEVENTS);
255
256 arc_pmu->counter_size = 32 + (pct_bcr.s << 4); 260 arc_pmu->counter_size = 32 + (pct_bcr.s << 4);
257 pr_info("ARC PMU found with %d counters of size %d bits\n",
258 arc_pmu->n_counters, arc_pmu->counter_size);
259
260 READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
261
262 if (!cc_bcr.v)
263 pr_err("Strange! Performance counters exist, but no countable conditions?\n");
264 261
265 pr_info("ARC PMU has %d countable conditions\n", cc_bcr.c); 262 pr_info("ARC perf\t: %d counters (%d bits), %d countable conditions\n",
263 arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
266 264
267 cc_name.str[8] = 0; 265 cc_name.str[8] = 0;
268 for (i = 0; i < PERF_COUNT_HW_MAX; i++) 266 for (i = 0; i < PERF_COUNT_HW_MAX; i++)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 119dddb752b2..252bf603db9c 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -13,7 +13,9 @@
13#include <linux/console.h> 13#include <linux/console.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/cpu.h> 15#include <linux/cpu.h>
16#include <linux/clk-provider.h>
16#include <linux/of_fdt.h> 17#include <linux/of_fdt.h>
18#include <linux/of_platform.h>
17#include <linux/cache.h> 19#include <linux/cache.h>
18#include <asm/sections.h> 20#include <asm/sections.h>
19#include <asm/arcregs.h> 21#include <asm/arcregs.h>
@@ -24,11 +26,10 @@
24#include <asm/unwind.h> 26#include <asm/unwind.h>
25#include <asm/clk.h> 27#include <asm/clk.h>
26#include <asm/mach_desc.h> 28#include <asm/mach_desc.h>
29#include <asm/smp.h>
27 30
28#define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x)) 31#define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x))
29 32
30int running_on_hw = 1; /* vs. on ISS */
31
32/* Part of U-boot ABI: see head.S */ 33/* Part of U-boot ABI: see head.S */
33int __initdata uboot_tag; 34int __initdata uboot_tag;
34char __initdata *uboot_arg; 35char __initdata *uboot_arg;
@@ -42,26 +43,26 @@ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
42static void read_arc_build_cfg_regs(void) 43static void read_arc_build_cfg_regs(void)
43{ 44{
44 struct bcr_perip uncached_space; 45 struct bcr_perip uncached_space;
46 struct bcr_generic bcr;
45 struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; 47 struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
46 FIX_PTR(cpu); 48 FIX_PTR(cpu);
47 49
48 READ_BCR(AUX_IDENTITY, cpu->core); 50 READ_BCR(AUX_IDENTITY, cpu->core);
51 READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa);
49 52
50 cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); 53 READ_BCR(ARC_REG_TIMERS_BCR, cpu->timers);
51 cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); 54 cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
52 55
53 READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); 56 READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
54 cpu->uncached_base = uncached_space.start << 24; 57 cpu->uncached_base = uncached_space.start << 24;
55 58
56 cpu->extn.mul = read_aux_reg(ARC_REG_MUL_BCR); 59 READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
57 cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR);
58 cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR);
59 cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR);
60 cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR);
61 READ_BCR(ARC_REG_MAC_BCR, cpu->extn_mac_mul);
62 60
63 cpu->extn.ext_arith = read_aux_reg(ARC_REG_EXTARITH_BCR); 61 cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR) > 1 ? 1 : 0; /* 2,3 */
64 cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR); 62 cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR) > 1 ? 1 : 0; /* 2,3 */
63 cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */
64 cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
65 cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
65 66
66 /* Note that we read the CCM BCRs independent of kernel config 67 /* Note that we read the CCM BCRs independent of kernel config
67 * This is to catch the cases where user doesn't know that 68 * This is to catch the cases where user doesn't know that
@@ -95,43 +96,76 @@ static void read_arc_build_cfg_regs(void)
95 read_decode_mmu_bcr(); 96 read_decode_mmu_bcr();
96 read_decode_cache_bcr(); 97 read_decode_cache_bcr();
97 98
98 READ_BCR(ARC_REG_FP_BCR, cpu->fp); 99 {
99 READ_BCR(ARC_REG_DPFP_BCR, cpu->dpfp); 100 struct bcr_fp_arcompact sp, dp;
101 struct bcr_bpu_arcompact bpu;
102
103 READ_BCR(ARC_REG_FP_BCR, sp);
104 READ_BCR(ARC_REG_DPFP_BCR, dp);
105 cpu->extn.fpu_sp = sp.ver ? 1 : 0;
106 cpu->extn.fpu_dp = dp.ver ? 1 : 0;
107
108 READ_BCR(ARC_REG_BPU_BCR, bpu);
109 cpu->bpu.ver = bpu.ver;
110 cpu->bpu.full = bpu.fam ? 1 : 0;
111 if (bpu.ent) {
112 cpu->bpu.num_cache = 256 << (bpu.ent - 1);
113 cpu->bpu.num_pred = 256 << (bpu.ent - 1);
114 }
115 }
116
117 READ_BCR(ARC_REG_AP_BCR, bcr);
118 cpu->extn.ap = bcr.ver ? 1 : 0;
119
120 READ_BCR(ARC_REG_SMART_BCR, bcr);
121 cpu->extn.smart = bcr.ver ? 1 : 0;
122
123 cpu->extn.debug = cpu->extn.ap | cpu->extn.smart;
100} 124}
101 125
102static const struct cpuinfo_data arc_cpu_tbl[] = { 126static const struct cpuinfo_data arc_cpu_tbl[] = {
103 { {0x10, "ARCTangent A5"}, 0x1F},
104 { {0x20, "ARC 600" }, 0x2F}, 127 { {0x20, "ARC 600" }, 0x2F},
105 { {0x30, "ARC 700" }, 0x33}, 128 { {0x30, "ARC 700" }, 0x33},
106 { {0x34, "ARC 700 R4.10"}, 0x34}, 129 { {0x34, "ARC 700 R4.10"}, 0x34},
130 { {0x35, "ARC 700 R4.11"}, 0x35},
107 { {0x00, NULL } } 131 { {0x00, NULL } }
108}; 132};
109 133
134#define IS_AVAIL1(v, str) ((v) ? str : "")
135#define IS_USED(cfg) (IS_ENABLED(cfg) ? "" : "(not used) ")
136#define IS_AVAIL2(v, str, cfg) IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg))
137
110static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) 138static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
111{ 139{
112 int n = 0;
113 struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; 140 struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
114 struct bcr_identity *core = &cpu->core; 141 struct bcr_identity *core = &cpu->core;
115 const struct cpuinfo_data *tbl; 142 const struct cpuinfo_data *tbl;
116 int be = 0; 143 char *isa_nm;
117#ifdef CONFIG_CPU_BIG_ENDIAN 144 int i, be, atomic;
118 be = 1; 145 int n = 0;
119#endif 146
120 FIX_PTR(cpu); 147 FIX_PTR(cpu);
121 148
149 {
150 isa_nm = "ARCompact";
151 be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
152
153 atomic = cpu->isa.atomic1;
154 if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */
155 atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
156 }
157
122 n += scnprintf(buf + n, len - n, 158 n += scnprintf(buf + n, len - n,
123 "\nARC IDENTITY\t: Family [%#02x]" 159 "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
124 " Cpu-id [%#02x] Chip-id [%#4x]\n", 160 core->family, core->cpu_id, core->chip_id);
125 core->family, core->cpu_id,
126 core->chip_id);
127 161
128 for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { 162 for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) {
129 if ((core->family >= tbl->info.id) && 163 if ((core->family >= tbl->info.id) &&
130 (core->family <= tbl->up_range)) { 164 (core->family <= tbl->up_range)) {
131 n += scnprintf(buf + n, len - n, 165 n += scnprintf(buf + n, len - n,
132 "processor\t: %s %s\n", 166 "processor [%d]\t: %s (%s ISA) %s\n",
133 tbl->info.str, 167 cpu_id, tbl->info.str, isa_nm,
134 be ? "[Big Endian]" : ""); 168 IS_AVAIL1(be, "[Big-Endian]"));
135 break; 169 break;
136 } 170 }
137 } 171 }
@@ -143,34 +177,35 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
143 (unsigned int)(arc_get_core_freq() / 1000000), 177 (unsigned int)(arc_get_core_freq() / 1000000),
144 (unsigned int)(arc_get_core_freq() / 10000) % 100); 178 (unsigned int)(arc_get_core_freq() / 10000) % 100);
145 179
146 n += scnprintf(buf + n, len - n, "Timers\t\t: %s %s\n", 180 n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
147 (cpu->timers & 0x200) ? "TIMER1" : "", 181 IS_AVAIL1(cpu->timers.t0, "Timer0 "),
148 (cpu->timers & 0x100) ? "TIMER0" : ""); 182 IS_AVAIL1(cpu->timers.t1, "Timer1 "),
183 IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC));
149 184
150 n += scnprintf(buf + n, len - n, "Vect Tbl Base\t: %#x\n", 185 n += i = scnprintf(buf + n, len - n, "%s%s",
151 cpu->vec_base); 186 IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC));
152 187
153 n += scnprintf(buf + n, len - n, "UNCACHED Base\t: %#x\n", 188 if (i)
154 cpu->uncached_base); 189 n += scnprintf(buf + n, len - n, "\n\t\t: ");
155 190
156 return buf; 191 n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
157} 192 IS_AVAIL1(cpu->extn_mpy.ver, "mpy "),
193 IS_AVAIL1(cpu->extn.norm, "norm "),
194 IS_AVAIL1(cpu->extn.barrel, "barrel-shift "),
195 IS_AVAIL1(cpu->extn.swap, "swap "),
196 IS_AVAIL1(cpu->extn.minmax, "minmax "),
197 IS_AVAIL1(cpu->extn.crc, "crc "),
198 IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE));
158 199
159static const struct id_to_str mul_type_nm[] = { 200 if (cpu->bpu.ver)
160 { 0x0, "N/A"}, 201 n += scnprintf(buf + n, len - n,
161 { 0x1, "32x32 (spl Result Reg)" }, 202 "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
162 { 0x2, "32x32 (ANY Result Reg)" } 203 IS_AVAIL1(cpu->bpu.full, "full"),
163}; 204 IS_AVAIL1(!cpu->bpu.full, "partial"),
205 cpu->bpu.num_cache, cpu->bpu.num_pred);
164 206
165static const struct id_to_str mac_mul_nm[] = { 207 return buf;
166 {0x0, "N/A"}, 208}
167 {0x1, "N/A"},
168 {0x2, "Dual 16 x 16"},
169 {0x3, "N/A"},
170 {0x4, "32x16"},
171 {0x5, "N/A"},
172 {0x6, "Dual 16x16 and 32x16"}
173};
174 209
175static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) 210static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
176{ 211{
@@ -178,67 +213,46 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
178 struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; 213 struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
179 214
180 FIX_PTR(cpu); 215 FIX_PTR(cpu);
181#define IS_AVAIL1(var, str) ((var) ? str : "")
182#define IS_AVAIL2(var, str) ((var == 0x2) ? str : "")
183#define IS_USED(cfg) (IS_ENABLED(cfg) ? "(in-use)" : "(not used)")
184 216
185 n += scnprintf(buf + n, len - n, 217 n += scnprintf(buf + n, len - n,
186 "Extn [700-Base]\t: %s %s %s %s %s %s\n", 218 "Vector Table\t: %#x\nUncached Base\t: %#x\n",
187 IS_AVAIL2(cpu->extn.norm, "norm,"), 219 cpu->vec_base, cpu->uncached_base);
188 IS_AVAIL2(cpu->extn.barrel, "barrel-shift,"), 220
189 IS_AVAIL1(cpu->extn.swap, "swap,"), 221 if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
190 IS_AVAIL2(cpu->extn.minmax, "minmax,"), 222 n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
191 IS_AVAIL1(cpu->extn.crc, "crc,"), 223 IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
192 IS_AVAIL2(cpu->extn.ext_arith, "ext-arith")); 224 IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
193 225
194 n += scnprintf(buf + n, len - n, "Extn [700-MPY]\t: %s", 226 if (cpu->extn.debug)
195 mul_type_nm[cpu->extn.mul].str); 227 n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
196 228 IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
197 n += scnprintf(buf + n, len - n, " MAC MPY: %s\n", 229 IS_AVAIL1(cpu->extn.smart, "smaRT "),
198 mac_mul_nm[cpu->extn_mac_mul.type].str); 230 IS_AVAIL1(cpu->extn.rtt, "RTT "));
199 231
200 if (cpu->core.family == 0x34) { 232 if (cpu->dccm.sz || cpu->iccm.sz)
201 n += scnprintf(buf + n, len - n, 233 n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
202 "Extn [700-4.10]\t: LLOCK/SCOND %s, SWAPE %s, RTSC %s\n", 234 cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
203 IS_USED(CONFIG_ARC_HAS_LLSC),
204 IS_USED(CONFIG_ARC_HAS_SWAPE),
205 IS_USED(CONFIG_ARC_HAS_RTSC));
206 }
207
208 n += scnprintf(buf + n, len - n, "Extn [CCM]\t: %s",
209 !(cpu->dccm.sz || cpu->iccm.sz) ? "N/A" : "");
210
211 if (cpu->dccm.sz)
212 n += scnprintf(buf + n, len - n, "DCCM: @ %x, %d KB ",
213 cpu->dccm.base_addr, TO_KB(cpu->dccm.sz));
214
215 if (cpu->iccm.sz)
216 n += scnprintf(buf + n, len - n, "ICCM: @ %x, %d KB",
217 cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); 235 cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
218 236
219 n += scnprintf(buf + n, len - n, "\nExtn [FPU]\t: %s",
220 !(cpu->fp.ver || cpu->dpfp.ver) ? "N/A" : "");
221
222 if (cpu->fp.ver)
223 n += scnprintf(buf + n, len - n, "SP [v%d] %s",
224 cpu->fp.ver, cpu->fp.fast ? "(fast)" : "");
225
226 if (cpu->dpfp.ver)
227 n += scnprintf(buf + n, len - n, "DP [v%d] %s",
228 cpu->dpfp.ver, cpu->dpfp.fast ? "(fast)" : "");
229
230 n += scnprintf(buf + n, len - n, "\n");
231
232 n += scnprintf(buf + n, len - n, 237 n += scnprintf(buf + n, len - n,
233 "OS ABI [v3]\t: no-legacy-syscalls\n"); 238 "OS ABI [v3]\t: no-legacy-syscalls\n");
234 239
235 return buf; 240 return buf;
236} 241}
237 242
238static void arc_chk_ccms(void) 243static void arc_chk_core_config(void)
239{ 244{
240#if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
241 struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; 245 struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
246 int fpu_enabled;
247
248 if (!cpu->timers.t0)
249 panic("Timer0 is not present!\n");
250
251 if (!cpu->timers.t1)
252 panic("Timer1 is not present!\n");
253
254 if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc)
255 panic("RTSC is not present\n");
242 256
243#ifdef CONFIG_ARC_HAS_DCCM 257#ifdef CONFIG_ARC_HAS_DCCM
244 /* 258 /*
@@ -256,33 +270,20 @@ static void arc_chk_ccms(void)
256 if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz) 270 if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz)
257 panic("Linux built with incorrect ICCM Size\n"); 271 panic("Linux built with incorrect ICCM Size\n");
258#endif 272#endif
259#endif
260}
261 273
262/* 274 /*
263 * Ensure that FP hardware and kernel config match 275 * FP hardware/software config sanity
264 * -If hardware contains DPFP, kernel needs to save/restore FPU state 276 * -If hardware contains DPFP, kernel needs to save/restore FPU state
265 * across context switches 277 * -If not, it will crash trying to save/restore the non-existant regs
266 * -If hardware lacks DPFP, but kernel configured to save FPU state then 278 *
267 * kernel trying to access non-existant DPFP regs will crash 279 * (only DPDP checked since SP has no arch visible regs)
268 * 280 */
269 * We only check for Dbl precision Floating Point, because only DPFP 281 fpu_enabled = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE);
270 * hardware has dedicated regs which need to be saved/restored on ctx-sw
271 * (Single Precision uses core regs), thus kernel is kind of oblivious to it
272 */
273static void arc_chk_fpu(void)
274{
275 struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
276 282
277 if (cpu->dpfp.ver) { 283 if (cpu->extn.fpu_dp && !fpu_enabled)
278#ifndef CONFIG_ARC_FPU_SAVE_RESTORE 284 pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
279 pr_warn("DPFP support broken in this kernel...\n"); 285 else if (!cpu->extn.fpu_dp && fpu_enabled)
280#endif 286 panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
281 } else {
282#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
283 panic("H/w lacks DPFP support, apps won't work\n");
284#endif
285 }
286} 287}
287 288
288/* 289/*
@@ -303,15 +304,11 @@ void setup_processor(void)
303 304
304 arc_mmu_init(); 305 arc_mmu_init();
305 arc_cache_init(); 306 arc_cache_init();
306 arc_chk_ccms();
307 307
308 printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str))); 308 printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
309
310#ifdef CONFIG_SMP
311 printk(arc_platform_smp_cpuinfo()); 309 printk(arc_platform_smp_cpuinfo());
312#endif
313 310
314 arc_chk_fpu(); 311 arc_chk_core_config();
315} 312}
316 313
317static inline int is_kernel(unsigned long addr) 314static inline int is_kernel(unsigned long addr)
@@ -360,11 +357,7 @@ void __init setup_arch(char **cmdline_p)
360 machine_desc->init_early(); 357 machine_desc->init_early();
361 358
362 setup_processor(); 359 setup_processor();
363
364#ifdef CONFIG_SMP
365 smp_init_cpus(); 360 smp_init_cpus();
366#endif
367
368 setup_arch_memory(); 361 setup_arch_memory();
369 362
370 /* copy flat DT out of .init and then unflatten it */ 363 /* copy flat DT out of .init and then unflatten it */
@@ -385,7 +378,13 @@ void __init setup_arch(char **cmdline_p)
385 378
386static int __init customize_machine(void) 379static int __init customize_machine(void)
387{ 380{
388 /* Add platform devices */ 381 of_clk_init(NULL);
382 /*
383 * Traverses flattened DeviceTree - registering platform devices
384 * (if any) complete with their resources
385 */
386 of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
387
389 if (machine_desc->init_machine) 388 if (machine_desc->init_machine)
390 machine_desc->init_machine(); 389 machine_desc->init_machine();
391 390
@@ -419,19 +418,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
419 418
420 seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE)); 419 seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
421 420
422 seq_printf(m, "Bogo MIPS : \t%lu.%02lu\n", 421 seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",
423 loops_per_jiffy / (500000 / HZ), 422 loops_per_jiffy / (500000 / HZ),
424 (loops_per_jiffy / (5000 / HZ)) % 100); 423 (loops_per_jiffy / (5000 / HZ)) % 100);
425 424
426 seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE)); 425 seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
427
428 seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE)); 426 seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
429
430 seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE)); 427 seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
431
432#ifdef CONFIG_SMP
433 seq_printf(m, arc_platform_smp_cpuinfo()); 428 seq_printf(m, arc_platform_smp_cpuinfo());
434#endif
435 429
436 free_page((unsigned long)str); 430 free_page((unsigned long)str);
437done: 431done:
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index dcd317c47d09..d01df0c517a2 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -101,7 +101,7 @@ void __weak arc_platform_smp_wait_to_boot(int cpu)
101 101
102const char *arc_platform_smp_cpuinfo(void) 102const char *arc_platform_smp_cpuinfo(void)
103{ 103{
104 return plat_smp_ops.info; 104 return plat_smp_ops.info ? : "";
105} 105}
106 106
107/* 107/*
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 9e1142729fd1..8c3a3e02ba92 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -530,16 +530,9 @@ EXPORT_SYMBOL(dma_cache_wback);
530 */ 530 */
531void flush_icache_range(unsigned long kstart, unsigned long kend) 531void flush_icache_range(unsigned long kstart, unsigned long kend)
532{ 532{
533 unsigned int tot_sz, off, sz; 533 unsigned int tot_sz;
534 unsigned long phy, pfn;
535 534
536 /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */ 535 WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__);
537
538 /* This is not the right API for user virtual address */
539 if (kstart < TASK_SIZE) {
540 BUG_ON("Flush icache range for user virtual addr space");
541 return;
542 }
543 536
544 /* Shortcut for bigger flush ranges. 537 /* Shortcut for bigger flush ranges.
545 * Here we don't care if this was kernel virtual or phy addr 538 * Here we don't care if this was kernel virtual or phy addr
@@ -572,6 +565,9 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
572 * straddles across 2 virtual pages and hence need for loop 565 * straddles across 2 virtual pages and hence need for loop
573 */ 566 */
574 while (tot_sz > 0) { 567 while (tot_sz > 0) {
568 unsigned int off, sz;
569 unsigned long phy, pfn;
570
575 off = kstart % PAGE_SIZE; 571 off = kstart % PAGE_SIZE;
576 pfn = vmalloc_to_pfn((void *)kstart); 572 pfn = vmalloc_to_pfn((void *)kstart);
577 phy = (pfn << PAGE_SHIFT) + off; 573 phy = (pfn << PAGE_SHIFT) + off;
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index e1acf0ce5647..7f47d2a56f44 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -609,14 +609,12 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
609 int n = 0; 609 int n = 0;
610 struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; 610 struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
611 611
612 n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
613 p_mmu->ver, TO_KB(p_mmu->pg_sz));
614
615 n += scnprintf(buf + n, len - n, 612 n += scnprintf(buf + n, len - n,
616 "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n", 613 "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
614 p_mmu->ver, TO_KB(p_mmu->pg_sz),
617 p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, 615 p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
618 p_mmu->u_dtlb, p_mmu->u_itlb, 616 p_mmu->u_dtlb, p_mmu->u_itlb,
619 IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : ""); 617 IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
620 618
621 return buf; 619 return buf;
622} 620}
diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig
index b9f34cf55acf..217593a70751 100644
--- a/arch/arc/plat-arcfpga/Kconfig
+++ b/arch/arc/plat-arcfpga/Kconfig
@@ -8,7 +8,7 @@
8 8
9menuconfig ARC_PLAT_FPGA_LEGACY 9menuconfig ARC_PLAT_FPGA_LEGACY
10 bool "\"Legacy\" ARC FPGA dev Boards" 10 bool "\"Legacy\" ARC FPGA dev Boards"
11 select ISS_SMP_EXTN if SMP 11 select ARC_HAS_COH_CACHES if SMP
12 help 12 help
13 Support for ARC development boards, provided by Synopsys. 13 Support for ARC development boards, provided by Synopsys.
14 These are based on FPGA or ISS. e.g. 14 These are based on FPGA or ISS. e.g.
@@ -18,17 +18,6 @@ menuconfig ARC_PLAT_FPGA_LEGACY
18 18
19if ARC_PLAT_FPGA_LEGACY 19if ARC_PLAT_FPGA_LEGACY
20 20
21config ARC_BOARD_ANGEL4
22 bool "ARC Angel4"
23 default y
24 help
25 ARC Angel4 FPGA Ref Platform (Xilinx Virtex Based)
26
27config ARC_BOARD_ML509
28 bool "ML509"
29 help
30 ARC ML509 FPGA Ref Platform (Xilinx Virtex-5 Based)
31
32config ISS_SMP_EXTN 21config ISS_SMP_EXTN
33 bool "ARC SMP Extensions (ISS Models only)" 22 bool "ARC SMP Extensions (ISS Models only)"
34 default n 23 default n
diff --git a/arch/arc/plat-arcfpga/include/plat/irq.h b/arch/arc/plat-arcfpga/include/plat/irq.h
deleted file mode 100644
index 2c9dea690ac4..000000000000
--- a/arch/arc/plat-arcfpga/include/plat/irq.h
+++ /dev/null
@@ -1,27 +0,0 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * vineetg: Feb 2009
9 * -For AA4 board, IRQ assignments to peripherals
10 */
11
12#ifndef __PLAT_IRQ_H
13#define __PLAT_IRQ_H
14
15#define UART0_IRQ 5
16#define UART1_IRQ 10
17#define UART2_IRQ 11
18
19#define IDE_IRQ 13
20#define PCI_IRQ 14
21#define PS2_IRQ 15
22
23#ifdef CONFIG_SMP
24#define IDU_INTERRUPT_0 16
25#endif
26
27#endif
diff --git a/arch/arc/plat-arcfpga/include/plat/memmap.h b/arch/arc/plat-arcfpga/include/plat/memmap.h
deleted file mode 100644
index 5c78e6135a1f..000000000000
--- a/arch/arc/plat-arcfpga/include/plat/memmap.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * vineetg: Feb 2009
9 * -For AA4 board, System Memory Map for Peripherals etc
10 */
11
12#ifndef __PLAT_MEMMAP_H
13#define __PLAT_MEMMAP_H
14
15#define UART0_BASE 0xC0FC1000
16#define UART1_BASE 0xC0FC1100
17
18#define IDE_CONTROLLER_BASE 0xC0FC9000
19
20#define AHB_PCI_HOST_BRG_BASE 0xC0FD0000
21
22#define PGU_BASEADDR 0xC0FC8000
23#define VLCK_ADDR 0xC0FCF028
24
25#define BVCI_LAT_UNIT_BASE 0xC0FED000
26
27#define PS2_BASE_ADDR 0xC0FCC000
28
29#endif
diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c
index 1038949a99a1..afc88254acc1 100644
--- a/arch/arc/plat-arcfpga/platform.c
+++ b/arch/arc/plat-arcfpga/platform.c
@@ -8,37 +8,9 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/types.h>
12#include <linux/init.h> 11#include <linux/init.h>
13#include <linux/device.h>
14#include <linux/platform_device.h>
15#include <linux/io.h>
16#include <linux/console.h>
17#include <linux/of_platform.h>
18#include <asm/setup.h>
19#include <asm/clk.h>
20#include <asm/mach_desc.h> 12#include <asm/mach_desc.h>
21#include <plat/memmap.h>
22#include <plat/smp.h> 13#include <plat/smp.h>
23#include <plat/irq.h>
24
25static void __init plat_fpga_early_init(void)
26{
27 pr_info("[plat-arcfpga]: registering early dev resources\n");
28
29#ifdef CONFIG_ISS_SMP_EXTN
30 iss_model_init_early_smp();
31#endif
32}
33
34static void __init plat_fpga_populate_dev(void)
35{
36 /*
37 * Traverses flattened DeviceTree - registering platform devices
38 * (if any) complete with their resources
39 */
40 of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
41}
42 14
43/*----------------------- Machine Descriptions ------------------------------ 15/*----------------------- Machine Descriptions ------------------------------
44 * 16 *
@@ -48,41 +20,26 @@ static void __init plat_fpga_populate_dev(void)
48 * callback set, by matching the DT compatible name. 20 * callback set, by matching the DT compatible name.
49 */ 21 */
50 22
51static const char *aa4_compat[] __initconst = { 23static const char *legacy_fpga_compat[] __initconst = {
52 "snps,arc-angel4", 24 "snps,arc-angel4",
53 NULL,
54};
55
56MACHINE_START(ANGEL4, "angel4")
57 .dt_compat = aa4_compat,
58 .init_early = plat_fpga_early_init,
59 .init_machine = plat_fpga_populate_dev,
60#ifdef CONFIG_ISS_SMP_EXTN
61 .init_smp = iss_model_init_smp,
62#endif
63MACHINE_END
64
65static const char *ml509_compat[] __initconst = {
66 "snps,arc-ml509", 25 "snps,arc-ml509",
67 NULL, 26 NULL,
68}; 27};
69 28
70MACHINE_START(ML509, "ml509") 29MACHINE_START(LEGACY_FPGA, "legacy_fpga")
71 .dt_compat = ml509_compat, 30 .dt_compat = legacy_fpga_compat,
72 .init_early = plat_fpga_early_init, 31#ifdef CONFIG_ISS_SMP_EXTN
73 .init_machine = plat_fpga_populate_dev, 32 .init_early = iss_model_init_early_smp,
74#ifdef CONFIG_SMP
75 .init_smp = iss_model_init_smp, 33 .init_smp = iss_model_init_smp,
76#endif 34#endif
77MACHINE_END 35MACHINE_END
78 36
79static const char *nsimosci_compat[] __initconst = { 37static const char *simulation_compat[] __initconst = {
38 "snps,nsim",
80 "snps,nsimosci", 39 "snps,nsimosci",
81 NULL, 40 NULL,
82}; 41};
83 42
84MACHINE_START(NSIMOSCI, "nsimosci") 43MACHINE_START(SIMULATION, "simulation")
85 .dt_compat = nsimosci_compat, 44 .dt_compat = simulation_compat,
86 .init_early = NULL,
87 .init_machine = plat_fpga_populate_dev,
88MACHINE_END 45MACHINE_END
diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c
index 92bad9122077..64797ba3bbe3 100644
--- a/arch/arc/plat-arcfpga/smp.c
+++ b/arch/arc/plat-arcfpga/smp.c
@@ -13,9 +13,10 @@
13 13
14#include <linux/smp.h> 14#include <linux/smp.h>
15#include <linux/irq.h> 15#include <linux/irq.h>
16#include <plat/irq.h>
17#include <plat/smp.h> 16#include <plat/smp.h>
18 17
18#define IDU_INTERRUPT_0 16
19
19static char smp_cpuinfo_buf[128]; 20static char smp_cpuinfo_buf[128];
20 21
21/* 22/*
diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig
index 6994c188dc88..d14b3d3c5dfd 100644
--- a/arch/arc/plat-tb10x/Kconfig
+++ b/arch/arc/plat-tb10x/Kconfig
@@ -18,7 +18,6 @@
18 18
19menuconfig ARC_PLAT_TB10X 19menuconfig ARC_PLAT_TB10X
20 bool "Abilis TB10x" 20 bool "Abilis TB10x"
21 select COMMON_CLK
22 select PINCTRL 21 select PINCTRL
23 select PINCTRL_TB10X 22 select PINCTRL_TB10X
24 select PINMUX 23 select PINMUX
diff --git a/arch/arc/plat-tb10x/tb10x.c b/arch/arc/plat-tb10x/tb10x.c
index 06cb30929460..da0ac0960a4b 100644
--- a/arch/arc/plat-tb10x/tb10x.c
+++ b/arch/arc/plat-tb10x/tb10x.c
@@ -19,21 +19,9 @@
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */ 20 */
21 21
22
23#include <linux/init.h> 22#include <linux/init.h>
24#include <linux/of_platform.h>
25#include <linux/clk-provider.h>
26#include <linux/pinctrl/consumer.h>
27
28#include <asm/mach_desc.h> 23#include <asm/mach_desc.h>
29 24
30
31static void __init tb10x_platform_init(void)
32{
33 of_clk_init(NULL);
34 of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
35}
36
37static const char *tb10x_compat[] __initdata = { 25static const char *tb10x_compat[] __initdata = {
38 "abilis,arc-tb10x", 26 "abilis,arc-tb10x",
39 NULL, 27 NULL,
@@ -41,5 +29,4 @@ static const char *tb10x_compat[] __initdata = {
41 29
42MACHINE_START(TB10x, "tb10x") 30MACHINE_START(TB10x, "tb10x")
43 .dt_compat = tb10x_compat, 31 .dt_compat = tb10x_compat,
44 .init_machine = tb10x_platform_init,
45MACHINE_END 32MACHINE_END
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 2fab4f6cc6e6..3487046d8a78 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -262,6 +262,7 @@ CONFIG_WATCHDOG=y
262CONFIG_XILINX_WATCHDOG=y 262CONFIG_XILINX_WATCHDOG=y
263CONFIG_ORION_WATCHDOG=y 263CONFIG_ORION_WATCHDOG=y
264CONFIG_SUNXI_WATCHDOG=y 264CONFIG_SUNXI_WATCHDOG=y
265CONFIG_MESON_WATCHDOG=y
265CONFIG_MFD_AS3722=y 266CONFIG_MFD_AS3722=y
266CONFIG_MFD_BCM590XX=y 267CONFIG_MFD_BCM590XX=y
267CONFIG_MFD_CROS_EC=y 268CONFIG_MFD_CROS_EC=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 16e719c268dd..b3f86670d2eb 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -86,7 +86,6 @@ CONFIG_IP_PNP_DHCP=y
86CONFIG_IP_PNP_BOOTP=y 86CONFIG_IP_PNP_BOOTP=y
87CONFIG_IP_PNP_RARP=y 87CONFIG_IP_PNP_RARP=y
88# CONFIG_INET_LRO is not set 88# CONFIG_INET_LRO is not set
89CONFIG_IPV6=y
90CONFIG_NETFILTER=y 89CONFIG_NETFILTER=y
91CONFIG_CAN=m 90CONFIG_CAN=m
92CONFIG_CAN_C_CAN=m 91CONFIG_CAN_C_CAN=m
@@ -112,6 +111,7 @@ CONFIG_MTD_OOPS=y
112CONFIG_MTD_CFI=y 111CONFIG_MTD_CFI=y
113CONFIG_MTD_CFI_INTELEXT=y 112CONFIG_MTD_CFI_INTELEXT=y
114CONFIG_MTD_NAND=y 113CONFIG_MTD_NAND=y
114CONFIG_MTD_NAND_ECC_BCH=y
115CONFIG_MTD_NAND_OMAP2=y 115CONFIG_MTD_NAND_OMAP2=y
116CONFIG_MTD_ONENAND=y 116CONFIG_MTD_ONENAND=y
117CONFIG_MTD_ONENAND_VERIFY_WRITE=y 117CONFIG_MTD_ONENAND_VERIFY_WRITE=y
@@ -317,7 +317,7 @@ CONFIG_EXT4_FS=y
317CONFIG_FANOTIFY=y 317CONFIG_FANOTIFY=y
318CONFIG_QUOTA=y 318CONFIG_QUOTA=y
319CONFIG_QFMT_V2=y 319CONFIG_QFMT_V2=y
320CONFIG_AUTOFS4_FS=y 320CONFIG_AUTOFS4_FS=m
321CONFIG_MSDOS_FS=y 321CONFIG_MSDOS_FS=y
322CONFIG_VFAT_FS=y 322CONFIG_VFAT_FS=y
323CONFIG_TMPFS=y 323CONFIG_TMPFS=y
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 8c35ae4ff176..07a09570175d 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -20,7 +20,7 @@
20#include <linux/input.h> 20#include <linux/input.h>
21#include <linux/io.h> 21#include <linux/io.h>
22#include <linux/irqchip.h> 22#include <linux/irqchip.h>
23#include <linux/mailbox.h> 23#include <linux/pl320-ipc.h>
24#include <linux/of.h> 24#include <linux/of.h>
25#include <linux/of_irq.h> 25#include <linux/of_irq.h>
26#include <linux/of_platform.h> 26#include <linux/of_platform.h>
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index d22c30d3ccfa..8c58b71c2727 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -917,6 +917,10 @@ static int __init omap_device_late_idle(struct device *dev, void *data)
917static int __init omap_device_late_init(void) 917static int __init omap_device_late_init(void)
918{ 918{
919 bus_for_each_dev(&platform_bus_type, NULL, NULL, omap_device_late_idle); 919 bus_for_each_dev(&platform_bus_type, NULL, NULL, omap_device_late_idle);
920
921 WARN(!of_have_populated_dt(),
922 "legacy booting deprecated, please update to boot with .dts\n");
923
920 return 0; 924 return 0;
921} 925}
922omap_late_initcall_sync(omap_device_late_init); 926omap_late_initcall_sync(omap_device_late_init);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ac9afde76dea..9532f8d5857e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,5 +1,6 @@
1config ARM64 1config ARM64
2 def_bool y 2 def_bool y
3 select ARCH_BINFMT_ELF_RANDOMIZE_PIE
3 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 4 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
4 select ARCH_HAS_SG_CHAIN 5 select ARCH_HAS_SG_CHAIN
5 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 6 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
@@ -232,7 +233,7 @@ config ARM64_VA_BITS_42
232 233
233config ARM64_VA_BITS_48 234config ARM64_VA_BITS_48
234 bool "48-bit" 235 bool "48-bit"
235 depends on BROKEN 236 depends on !ARM_SMMU
236 237
237endchoice 238endchoice
238 239
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index ac2cb2418025..c46cbb29f3c6 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -22,7 +22,7 @@
22 bank-width = <4>; 22 bank-width = <4>;
23 }; 23 };
24 24
25 vram@2,00000000 { 25 v2m_video_ram: vram@2,00000000 {
26 compatible = "arm,vexpress-vram"; 26 compatible = "arm,vexpress-vram";
27 reg = <2 0x00000000 0x00800000>; 27 reg = <2 0x00000000 0x00800000>;
28 }; 28 };
@@ -179,9 +179,42 @@
179 clcd@1f0000 { 179 clcd@1f0000 {
180 compatible = "arm,pl111", "arm,primecell"; 180 compatible = "arm,pl111", "arm,primecell";
181 reg = <0x1f0000 0x1000>; 181 reg = <0x1f0000 0x1000>;
182 interrupt-names = "combined";
182 interrupts = <14>; 183 interrupts = <14>;
183 clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>; 184 clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
184 clock-names = "clcdclk", "apb_pclk"; 185 clock-names = "clcdclk", "apb_pclk";
186 arm,pl11x,framebuffer = <0x18000000 0x00180000>;
187 memory-region = <&v2m_video_ram>;
188 max-memory-bandwidth = <130000000>; /* 16bpp @ 63.5MHz */
189
190 port {
191 v2m_clcd_pads: endpoint {
192 remote-endpoint = <&v2m_clcd_panel>;
193 arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
194 };
195 };
196
197 panel {
198 compatible = "panel-dpi";
199
200 port {
201 v2m_clcd_panel: endpoint {
202 remote-endpoint = <&v2m_clcd_pads>;
203 };
204 };
205
206 panel-timing {
207 clock-frequency = <63500127>;
208 hactive = <1024>;
209 hback-porch = <152>;
210 hfront-porch = <48>;
211 hsync-len = <104>;
212 vactive = <768>;
213 vback-porch = <23>;
214 vfront-porch = <3>;
215 vsync-len = <4>;
216 };
217 };
185 }; 218 };
186 219
187 virtio_block@0130000 { 220 virtio_block@0130000 {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 9cd37de9aa8d..4ce602c2c6de 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -78,6 +78,7 @@ CONFIG_NET_XGENE=y
78# CONFIG_WLAN is not set 78# CONFIG_WLAN is not set
79CONFIG_INPUT_EVDEV=y 79CONFIG_INPUT_EVDEV=y
80# CONFIG_SERIO_SERPORT is not set 80# CONFIG_SERIO_SERPORT is not set
81CONFIG_SERIO_AMBAKMI=y
81CONFIG_LEGACY_PTY_COUNT=16 82CONFIG_LEGACY_PTY_COUNT=16
82CONFIG_SERIAL_8250=y 83CONFIG_SERIAL_8250=y
83CONFIG_SERIAL_8250_CONSOLE=y 84CONFIG_SERIAL_8250_CONSOLE=y
@@ -90,6 +91,7 @@ CONFIG_VIRTIO_CONSOLE=y
90CONFIG_REGULATOR=y 91CONFIG_REGULATOR=y
91CONFIG_REGULATOR_FIXED_VOLTAGE=y 92CONFIG_REGULATOR_FIXED_VOLTAGE=y
92CONFIG_FB=y 93CONFIG_FB=y
94CONFIG_FB_ARMCLCD=y
93CONFIG_FRAMEBUFFER_CONSOLE=y 95CONFIG_FRAMEBUFFER_CONSOLE=y
94CONFIG_LOGO=y 96CONFIG_LOGO=y
95# CONFIG_LOGO_LINUX_MONO is not set 97# CONFIG_LOGO_LINUX_MONO is not set
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 253e33bc94fb..56de5aadede2 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -37,8 +37,8 @@ typedef s32 compat_ssize_t;
37typedef s32 compat_time_t; 37typedef s32 compat_time_t;
38typedef s32 compat_clock_t; 38typedef s32 compat_clock_t;
39typedef s32 compat_pid_t; 39typedef s32 compat_pid_t;
40typedef u32 __compat_uid_t; 40typedef u16 __compat_uid_t;
41typedef u32 __compat_gid_t; 41typedef u16 __compat_gid_t;
42typedef u16 __compat_uid16_t; 42typedef u16 __compat_uid16_t;
43typedef u16 __compat_gid16_t; 43typedef u16 __compat_gid16_t;
44typedef u32 __compat_uid32_t; 44typedef u32 __compat_uid32_t;
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 01d3aab64b79..1f65be393139 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -126,7 +126,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
126 * that it will "exec", and that there is sufficient room for the brk. 126 * that it will "exec", and that there is sufficient room for the brk.
127 */ 127 */
128extern unsigned long randomize_et_dyn(unsigned long base); 128extern unsigned long randomize_et_dyn(unsigned long base);
129#define ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_64 / 3)) 129#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
130 130
131/* 131/*
132 * When the program starts, a1 contains a pointer to a function to be 132 * When the program starts, a1 contains a pointer to a function to be
@@ -169,7 +169,7 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm);
169#define COMPAT_ELF_PLATFORM ("v8l") 169#define COMPAT_ELF_PLATFORM ("v8l")
170#endif 170#endif
171 171
172#define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) 172#define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3)
173 173
174/* AArch32 registers. */ 174/* AArch32 registers. */
175#define COMPAT_ELF_NGREG 18 175#define COMPAT_ELF_NGREG 18
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index 8e24ef3f7c82..b4f6b19a8a68 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -1,6 +1,8 @@
1#ifndef __ASM_IRQ_WORK_H 1#ifndef __ASM_IRQ_WORK_H
2#define __ASM_IRQ_WORK_H 2#define __ASM_IRQ_WORK_H
3 3
4#ifdef CONFIG_SMP
5
4#include <asm/smp.h> 6#include <asm/smp.h>
5 7
6static inline bool arch_irq_work_has_interrupt(void) 8static inline bool arch_irq_work_has_interrupt(void)
@@ -8,4 +10,13 @@ static inline bool arch_irq_work_has_interrupt(void)
8 return !!__smp_cross_call; 10 return !!__smp_cross_call;
9} 11}
10 12
13#else
14
15static inline bool arch_irq_work_has_interrupt(void)
16{
17 return false;
18}
19
20#endif
21
11#endif /* __ASM_IRQ_WORK_H */ 22#endif /* __ASM_IRQ_WORK_H */
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 03aaa99e1ea0..95c49ebc660d 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -89,7 +89,8 @@ static int __init uefi_init(void)
89 */ 89 */
90 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { 90 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
91 pr_err("System table signature incorrect\n"); 91 pr_err("System table signature incorrect\n");
92 return -EINVAL; 92 retval = -EINVAL;
93 goto out;
93 } 94 }
94 if ((efi.systab->hdr.revision >> 16) < 2) 95 if ((efi.systab->hdr.revision >> 16) < 2)
95 pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", 96 pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n",
@@ -103,6 +104,7 @@ static int __init uefi_init(void)
103 for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) 104 for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
104 vendor[i] = c16[i]; 105 vendor[i] = c16[i];
105 vendor[i] = '\0'; 106 vendor[i] = '\0';
107 early_memunmap(c16, sizeof(vendor));
106 } 108 }
107 109
108 pr_info("EFI v%u.%.02u by %s\n", 110 pr_info("EFI v%u.%.02u by %s\n",
@@ -113,29 +115,11 @@ static int __init uefi_init(void)
113 if (retval == 0) 115 if (retval == 0)
114 set_bit(EFI_CONFIG_TABLES, &efi.flags); 116 set_bit(EFI_CONFIG_TABLES, &efi.flags);
115 117
116 early_memunmap(c16, sizeof(vendor)); 118out:
117 early_memunmap(efi.systab, sizeof(efi_system_table_t)); 119 early_memunmap(efi.systab, sizeof(efi_system_table_t));
118
119 return retval; 120 return retval;
120} 121}
121 122
122static __initdata char memory_type_name[][32] = {
123 {"Reserved"},
124 {"Loader Code"},
125 {"Loader Data"},
126 {"Boot Code"},
127 {"Boot Data"},
128 {"Runtime Code"},
129 {"Runtime Data"},
130 {"Conventional Memory"},
131 {"Unusable Memory"},
132 {"ACPI Reclaim Memory"},
133 {"ACPI Memory NVS"},
134 {"Memory Mapped I/O"},
135 {"MMIO Port Space"},
136 {"PAL Code"},
137};
138
139/* 123/*
140 * Return true for RAM regions we want to permanently reserve. 124 * Return true for RAM regions we want to permanently reserve.
141 */ 125 */
@@ -166,10 +150,13 @@ static __init void reserve_regions(void)
166 paddr = md->phys_addr; 150 paddr = md->phys_addr;
167 npages = md->num_pages; 151 npages = md->num_pages;
168 152
169 if (uefi_debug) 153 if (uefi_debug) {
170 pr_info(" 0x%012llx-0x%012llx [%s]", 154 char buf[64];
155
156 pr_info(" 0x%012llx-0x%012llx %s",
171 paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, 157 paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
172 memory_type_name[md->type]); 158 efi_md_typeattr_format(buf, sizeof(buf), md));
159 }
173 160
174 memrange_efi_to_native(&paddr, &npages); 161 memrange_efi_to_native(&paddr, &npages);
175 size = npages << PAGE_SHIFT; 162 size = npages << PAGE_SHIFT;
@@ -393,11 +380,16 @@ static int __init arm64_enter_virtual_mode(void)
393 return -1; 380 return -1;
394 } 381 }
395 382
396 pr_info("Remapping and enabling EFI services.\n");
397
398 /* replace early memmap mapping with permanent mapping */
399 mapsize = memmap.map_end - memmap.map; 383 mapsize = memmap.map_end - memmap.map;
400 early_memunmap(memmap.map, mapsize); 384 early_memunmap(memmap.map, mapsize);
385
386 if (efi_runtime_disabled()) {
387 pr_info("EFI runtime services will be disabled.\n");
388 return -1;
389 }
390
391 pr_info("Remapping and enabling EFI services.\n");
392 /* replace early memmap mapping with permanent mapping */
401 memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, 393 memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
402 mapsize); 394 mapsize);
403 memmap.map_end = memmap.map + mapsize; 395 memmap.map_end = memmap.map + mapsize;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c3065dbc4fa2..fde9923af859 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -378,8 +378,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
378{ 378{
379 return randomize_base(mm->brk); 379 return randomize_base(mm->brk);
380} 380}
381
382unsigned long randomize_et_dyn(unsigned long base)
383{
384 return randomize_base(base);
385}
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index fa324bd5a5c4..4a07630a6616 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -105,10 +105,10 @@ EXPORT_SYMBOL(ioremap_cache);
105 105
106static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; 106static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
107#if CONFIG_ARM64_PGTABLE_LEVELS > 2 107#if CONFIG_ARM64_PGTABLE_LEVELS > 2
108static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; 108static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
109#endif 109#endif
110#if CONFIG_ARM64_PGTABLE_LEVELS > 3 110#if CONFIG_ARM64_PGTABLE_LEVELS > 3
111static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; 111static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
112#endif 112#endif
113 113
114static inline pud_t * __init early_ioremap_pud(unsigned long addr) 114static inline pud_t * __init early_ioremap_pud(unsigned long addr)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6894ef3e6234..0bf90d26e745 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -297,11 +297,15 @@ static void __init map_mem(void)
297 * create_mapping requires puds, pmds and ptes to be allocated from 297 * create_mapping requires puds, pmds and ptes to be allocated from
298 * memory addressable from the initial direct kernel mapping. 298 * memory addressable from the initial direct kernel mapping.
299 * 299 *
300 * The initial direct kernel mapping, located at swapper_pg_dir, 300 * The initial direct kernel mapping, located at swapper_pg_dir, gives
301 * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be 301 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
302 * aligned to 2MB as per Documentation/arm64/booting.txt). 302 * PHYS_OFFSET (which must be aligned to 2MB as per
303 * Documentation/arm64/booting.txt).
303 */ 304 */
304 limit = PHYS_OFFSET + PUD_SIZE; 305 if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
306 limit = PHYS_OFFSET + PMD_SIZE;
307 else
308 limit = PHYS_OFFSET + PUD_SIZE;
305 memblock_set_current_limit(limit); 309 memblock_set_current_limit(limit);
306 310
307 /* map all the memory banks */ 311 /* map all the memory banks */
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 62c6101df260..6682b361d3ac 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -30,12 +30,14 @@
30 30
31#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) 31#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
32 32
33static struct kmem_cache *pgd_cache;
34
33pgd_t *pgd_alloc(struct mm_struct *mm) 35pgd_t *pgd_alloc(struct mm_struct *mm)
34{ 36{
35 if (PGD_SIZE == PAGE_SIZE) 37 if (PGD_SIZE == PAGE_SIZE)
36 return (pgd_t *)get_zeroed_page(GFP_KERNEL); 38 return (pgd_t *)get_zeroed_page(GFP_KERNEL);
37 else 39 else
38 return kzalloc(PGD_SIZE, GFP_KERNEL); 40 return kmem_cache_zalloc(pgd_cache, GFP_KERNEL);
39} 41}
40 42
41void pgd_free(struct mm_struct *mm, pgd_t *pgd) 43void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -43,5 +45,17 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
43 if (PGD_SIZE == PAGE_SIZE) 45 if (PGD_SIZE == PAGE_SIZE)
44 free_page((unsigned long)pgd); 46 free_page((unsigned long)pgd);
45 else 47 else
46 kfree(pgd); 48 kmem_cache_free(pgd_cache, pgd);
49}
50
51static int __init pgd_cache_init(void)
52{
53 /*
54 * Naturally aligned pgds required by the architecture.
55 */
56 if (PGD_SIZE != PAGE_SIZE)
57 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
58 SLAB_PANIC, NULL);
59 return 0;
47} 60}
61core_initcall(pgd_cache_init);
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 2134f7e6c288..de0a81a539a0 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -144,8 +144,12 @@
144 144
145/* Data-processing (2 source) */ 145/* Data-processing (2 source) */
146/* Rd = Rn OP Rm */ 146/* Rd = Rn OP Rm */
147#define A64_UDIV(sf, Rd, Rn, Rm) aarch64_insn_gen_data2(Rd, Rn, Rm, \ 147#define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \
148 A64_VARIANT(sf), AARCH64_INSN_DATA2_UDIV) 148 A64_VARIANT(sf), AARCH64_INSN_DATA2_##type)
149#define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV)
150#define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
151#define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
152#define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
149 153
150/* Data-processing (3 source) */ 154/* Data-processing (3 source) */
151/* Rd = Ra + Rn * Rm */ 155/* Rd = Ra + Rn * Rm */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 7ae33545535b..41f1e3e2ea24 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -19,12 +19,13 @@
19#define pr_fmt(fmt) "bpf_jit: " fmt 19#define pr_fmt(fmt) "bpf_jit: " fmt
20 20
21#include <linux/filter.h> 21#include <linux/filter.h>
22#include <linux/moduleloader.h>
23#include <linux/printk.h> 22#include <linux/printk.h>
24#include <linux/skbuff.h> 23#include <linux/skbuff.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
25
26#include <asm/byteorder.h> 26#include <asm/byteorder.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/debug-monitors.h>
28 29
29#include "bpf_jit.h" 30#include "bpf_jit.h"
30 31
@@ -119,6 +120,14 @@ static inline int bpf2a64_offset(int bpf_to, int bpf_from,
119 return to - from; 120 return to - from;
120} 121}
121 122
123static void jit_fill_hole(void *area, unsigned int size)
124{
125 u32 *ptr;
126 /* We are guaranteed to have aligned memory. */
127 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
128 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
129}
130
122static inline int epilogue_offset(const struct jit_ctx *ctx) 131static inline int epilogue_offset(const struct jit_ctx *ctx)
123{ 132{
124 int to = ctx->offset[ctx->prog->len - 1]; 133 int to = ctx->offset[ctx->prog->len - 1];
@@ -196,6 +205,12 @@ static void build_epilogue(struct jit_ctx *ctx)
196 emit(A64_RET(A64_LR), ctx); 205 emit(A64_RET(A64_LR), ctx);
197} 206}
198 207
208/* JITs an eBPF instruction.
209 * Returns:
210 * 0 - successfully JITed an 8-byte eBPF instruction.
211 * >0 - successfully JITed a 16-byte eBPF instruction.
212 * <0 - failed to JIT.
213 */
199static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) 214static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
200{ 215{
201 const u8 code = insn->code; 216 const u8 code = insn->code;
@@ -252,6 +267,18 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
252 emit(A64_MUL(is64, tmp, tmp, src), ctx); 267 emit(A64_MUL(is64, tmp, tmp, src), ctx);
253 emit(A64_SUB(is64, dst, dst, tmp), ctx); 268 emit(A64_SUB(is64, dst, dst, tmp), ctx);
254 break; 269 break;
270 case BPF_ALU | BPF_LSH | BPF_X:
271 case BPF_ALU64 | BPF_LSH | BPF_X:
272 emit(A64_LSLV(is64, dst, dst, src), ctx);
273 break;
274 case BPF_ALU | BPF_RSH | BPF_X:
275 case BPF_ALU64 | BPF_RSH | BPF_X:
276 emit(A64_LSRV(is64, dst, dst, src), ctx);
277 break;
278 case BPF_ALU | BPF_ARSH | BPF_X:
279 case BPF_ALU64 | BPF_ARSH | BPF_X:
280 emit(A64_ASRV(is64, dst, dst, src), ctx);
281 break;
255 /* dst = -dst */ 282 /* dst = -dst */
256 case BPF_ALU | BPF_NEG: 283 case BPF_ALU | BPF_NEG:
257 case BPF_ALU64 | BPF_NEG: 284 case BPF_ALU64 | BPF_NEG:
@@ -443,6 +470,27 @@ emit_cond_jmp:
443 emit(A64_B(jmp_offset), ctx); 470 emit(A64_B(jmp_offset), ctx);
444 break; 471 break;
445 472
473 /* dst = imm64 */
474 case BPF_LD | BPF_IMM | BPF_DW:
475 {
476 const struct bpf_insn insn1 = insn[1];
477 u64 imm64;
478
479 if (insn1.code != 0 || insn1.src_reg != 0 ||
480 insn1.dst_reg != 0 || insn1.off != 0) {
481 /* Note: verifier in BPF core must catch invalid
482 * instructions.
483 */
484 pr_err_once("Invalid BPF_LD_IMM64 instruction\n");
485 return -EINVAL;
486 }
487
488 imm64 = (u64)insn1.imm << 32 | imm;
489 emit_a64_mov_i64(dst, imm64, ctx);
490
491 return 1;
492 }
493
446 /* LDX: dst = *(size *)(src + off) */ 494 /* LDX: dst = *(size *)(src + off) */
447 case BPF_LDX | BPF_MEM | BPF_W: 495 case BPF_LDX | BPF_MEM | BPF_W:
448 case BPF_LDX | BPF_MEM | BPF_H: 496 case BPF_LDX | BPF_MEM | BPF_H:
@@ -594,6 +642,10 @@ static int build_body(struct jit_ctx *ctx)
594 ctx->offset[i] = ctx->idx; 642 ctx->offset[i] = ctx->idx;
595 643
596 ret = build_insn(insn, ctx); 644 ret = build_insn(insn, ctx);
645 if (ret > 0) {
646 i++;
647 continue;
648 }
597 if (ret) 649 if (ret)
598 return ret; 650 return ret;
599 } 651 }
@@ -613,8 +665,10 @@ void bpf_jit_compile(struct bpf_prog *prog)
613 665
614void bpf_int_jit_compile(struct bpf_prog *prog) 666void bpf_int_jit_compile(struct bpf_prog *prog)
615{ 667{
668 struct bpf_binary_header *header;
616 struct jit_ctx ctx; 669 struct jit_ctx ctx;
617 int image_size; 670 int image_size;
671 u8 *image_ptr;
618 672
619 if (!bpf_jit_enable) 673 if (!bpf_jit_enable)
620 return; 674 return;
@@ -636,23 +690,25 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
636 goto out; 690 goto out;
637 691
638 build_prologue(&ctx); 692 build_prologue(&ctx);
639
640 build_epilogue(&ctx); 693 build_epilogue(&ctx);
641 694
642 /* Now we know the actual image size. */ 695 /* Now we know the actual image size. */
643 image_size = sizeof(u32) * ctx.idx; 696 image_size = sizeof(u32) * ctx.idx;
644 ctx.image = module_alloc(image_size); 697 header = bpf_jit_binary_alloc(image_size, &image_ptr,
645 if (unlikely(ctx.image == NULL)) 698 sizeof(u32), jit_fill_hole);
699 if (header == NULL)
646 goto out; 700 goto out;
647 701
648 /* 2. Now, the actual pass. */ 702 /* 2. Now, the actual pass. */
649 703
704 ctx.image = (u32 *)image_ptr;
650 ctx.idx = 0; 705 ctx.idx = 0;
706
651 build_prologue(&ctx); 707 build_prologue(&ctx);
652 708
653 ctx.body_offset = ctx.idx; 709 ctx.body_offset = ctx.idx;
654 if (build_body(&ctx)) { 710 if (build_body(&ctx)) {
655 module_free(NULL, ctx.image); 711 bpf_jit_binary_free(header);
656 goto out; 712 goto out;
657 } 713 }
658 714
@@ -663,17 +719,25 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
663 bpf_jit_dump(prog->len, image_size, 2, ctx.image); 719 bpf_jit_dump(prog->len, image_size, 2, ctx.image);
664 720
665 bpf_flush_icache(ctx.image, ctx.image + ctx.idx); 721 bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
666 prog->bpf_func = (void *)ctx.image;
667 prog->jited = 1;
668 722
723 set_memory_ro((unsigned long)header, header->pages);
724 prog->bpf_func = (void *)ctx.image;
725 prog->jited = true;
669out: 726out:
670 kfree(ctx.offset); 727 kfree(ctx.offset);
671} 728}
672 729
673void bpf_jit_free(struct bpf_prog *prog) 730void bpf_jit_free(struct bpf_prog *prog)
674{ 731{
675 if (prog->jited) 732 unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
676 module_free(NULL, prog->bpf_func); 733 struct bpf_binary_header *header = (void *)addr;
734
735 if (!prog->jited)
736 goto free_filter;
737
738 set_memory_rw(addr, header->pages);
739 bpf_jit_binary_free(header);
677 740
678 kfree(prog); 741free_filter:
742 bpf_prog_unlock_free(prog);
679} 743}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 741b99c1a0b1..c52d7540dc05 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -568,6 +568,7 @@ efi_init (void)
568 { 568 {
569 const char *unit; 569 const char *unit;
570 unsigned long size; 570 unsigned long size;
571 char buf[64];
571 572
572 md = p; 573 md = p;
573 size = md->num_pages << EFI_PAGE_SHIFT; 574 size = md->num_pages << EFI_PAGE_SHIFT;
@@ -586,9 +587,10 @@ efi_init (void)
586 unit = "KB"; 587 unit = "KB";
587 } 588 }
588 589
589 printk("mem%02d: type=%2u, attr=0x%016lx, " 590 printk("mem%02d: %s "
590 "range=[0x%016lx-0x%016lx) (%4lu%s)\n", 591 "range=[0x%016lx-0x%016lx) (%4lu%s)\n",
591 i, md->type, md->attribute, md->phys_addr, 592 i, efi_md_typeattr_format(buf, sizeof(buf), md),
593 md->phys_addr,
592 md->phys_addr + efi_md_size(md), size, unit); 594 md->phys_addr + efi_md_size(md), size, unit);
593 } 595 }
594 } 596 }
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ad6badb6be71..f43aa536c517 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2066,6 +2066,7 @@ config MIPS_CPS
2066 support is unavailable. 2066 support is unavailable.
2067 2067
2068config MIPS_CPS_PM 2068config MIPS_CPS_PM
2069 depends on MIPS_CPS
2069 select MIPS_CPC 2070 select MIPS_CPC
2070 bool 2071 bool
2071 2072
diff --git a/arch/mips/ath79/mach-db120.c b/arch/mips/ath79/mach-db120.c
index 4d661a1d2dae..9423f5aed287 100644
--- a/arch/mips/ath79/mach-db120.c
+++ b/arch/mips/ath79/mach-db120.c
@@ -113,7 +113,7 @@ static void __init db120_pci_init(u8 *eeprom)
113 ath79_register_pci(); 113 ath79_register_pci();
114} 114}
115#else 115#else
116static inline void db120_pci_init(void) {} 116static inline void db120_pci_init(u8 *eeprom) {}
117#endif /* CONFIG_PCI */ 117#endif /* CONFIG_PCI */
118 118
119static void __init db120_setup(void) 119static void __init db120_setup(void)
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 38f4c32e2816..5ebdb32d9a2b 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -806,15 +806,6 @@ void __init prom_init(void)
806#endif 806#endif
807 } 807 }
808 808
809 if (octeon_is_simulation()) {
810 /*
811 * The simulator uses a mtdram device pre filled with
812 * the filesystem. Also specify the calibration delay
813 * to avoid calculating it every time.
814 */
815 strcat(arcs_cmdline, " rw root=1f00 slram=root,0x40000000,+1073741824");
816 }
817
818 mips_hpt_frequency = octeon_get_clock_rate(); 809 mips_hpt_frequency = octeon_get_clock_rate();
819 810
820 octeon_init_cvmcount(); 811 octeon_init_cvmcount();
diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h
index 51f80bd36fcc..63b3468ede4c 100644
--- a/arch/mips/include/asm/cop2.h
+++ b/arch/mips/include/asm/cop2.h
@@ -37,15 +37,15 @@ extern void nlm_cop2_restore(struct nlm_cop2_state *);
37 37
38#define cop2_present 1 38#define cop2_present 1
39#define cop2_lazy_restore 1 39#define cop2_lazy_restore 1
40#define cop2_save(r) do { (r); } while (0) 40#define cop2_save(r) do { (void)(r); } while (0)
41#define cop2_restore(r) do { (r); } while (0) 41#define cop2_restore(r) do { (void)(r); } while (0)
42 42
43#else 43#else
44 44
45#define cop2_present 0 45#define cop2_present 0
46#define cop2_lazy_restore 0 46#define cop2_lazy_restore 0
47#define cop2_save(r) do { (r); } while (0) 47#define cop2_save(r) do { (void)(r); } while (0)
48#define cop2_restore(r) do { (r); } while (0) 48#define cop2_restore(r) do { (void)(r); } while (0)
49#endif 49#endif
50 50
51enum cu2_ops { 51enum cu2_ops {
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index 992aaba603b5..b463f2aa5a61 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -24,7 +24,7 @@ do { \
24 asm volatile ( \ 24 asm volatile ( \
25 "1: " load " %[tmp_dst], 0(%[tmp_src])\n" \ 25 "1: " load " %[tmp_dst], 0(%[tmp_src])\n" \
26 " li %[tmp_err], 0\n" \ 26 " li %[tmp_err], 0\n" \
27 "2:\n" \ 27 "2: .insn\n" \
28 \ 28 \
29 ".section .fixup, \"ax\"\n" \ 29 ".section .fixup, \"ax\"\n" \
30 "3: li %[tmp_err], 1\n" \ 30 "3: li %[tmp_err], 1\n" \
@@ -46,7 +46,7 @@ do { \
46 asm volatile ( \ 46 asm volatile ( \
47 "1: " store " %[tmp_src], 0(%[tmp_dst])\n"\ 47 "1: " store " %[tmp_src], 0(%[tmp_dst])\n"\
48 " li %[tmp_err], 0\n" \ 48 " li %[tmp_err], 0\n" \
49 "2:\n" \ 49 "2: .insn\n" \
50 \ 50 \
51 ".section .fixup, \"ax\"\n" \ 51 ".section .fixup, \"ax\"\n" \
52 "3: li %[tmp_err], 1\n" \ 52 "3: li %[tmp_err], 1\n" \
diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h
index d9f932de80e9..1c967abd545c 100644
--- a/arch/mips/include/asm/idle.h
+++ b/arch/mips/include/asm/idle.h
@@ -8,19 +8,12 @@ extern void (*cpu_wait)(void);
8extern void r4k_wait(void); 8extern void r4k_wait(void);
9extern asmlinkage void __r4k_wait(void); 9extern asmlinkage void __r4k_wait(void);
10extern void r4k_wait_irqoff(void); 10extern void r4k_wait_irqoff(void);
11extern void __pastwait(void);
12 11
13static inline int using_rollback_handler(void) 12static inline int using_rollback_handler(void)
14{ 13{
15 return cpu_wait == r4k_wait; 14 return cpu_wait == r4k_wait;
16} 15}
17 16
18static inline int address_is_in_r4k_wait_irqoff(unsigned long addr)
19{
20 return addr >= (unsigned long)r4k_wait_irqoff &&
21 addr < (unsigned long)__pastwait;
22}
23
24extern int mips_cpuidle_wait_enter(struct cpuidle_device *dev, 17extern int mips_cpuidle_wait_enter(struct cpuidle_device *dev,
25 struct cpuidle_driver *drv, int index); 18 struct cpuidle_driver *drv, int index);
26 19
diff --git a/arch/mips/include/uapi/asm/ptrace.h b/arch/mips/include/uapi/asm/ptrace.h
index bbcfb8ba8106..91a3d197ede3 100644
--- a/arch/mips/include/uapi/asm/ptrace.h
+++ b/arch/mips/include/uapi/asm/ptrace.h
@@ -9,6 +9,8 @@
9#ifndef _UAPI_ASM_PTRACE_H 9#ifndef _UAPI_ASM_PTRACE_H
10#define _UAPI_ASM_PTRACE_H 10#define _UAPI_ASM_PTRACE_H
11 11
12#include <linux/types.h>
13
12/* 0 - 31 are integer registers, 32 - 63 are fp registers. */ 14/* 0 - 31 are integer registers, 32 - 63 are fp registers. */
13#define FPR_BASE 32 15#define FPR_BASE 32
14#define PC 64 16#define PC 64
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 09ce45980758..0b9082b6b683 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -68,9 +68,6 @@ void r4k_wait_irqoff(void)
68 " wait \n" 68 " wait \n"
69 " .set pop \n"); 69 " .set pop \n");
70 local_irq_enable(); 70 local_irq_enable();
71 __asm__(
72 " .globl __pastwait \n"
73 "__pastwait: \n");
74} 71}
75 72
76/* 73/*
diff --git a/arch/mips/lasat/Kconfig b/arch/mips/lasat/Kconfig
index 1d2ee8a9be13..8776d0a34274 100644
--- a/arch/mips/lasat/Kconfig
+++ b/arch/mips/lasat/Kconfig
@@ -4,7 +4,7 @@ config PICVUE
4 4
5config PICVUE_PROC 5config PICVUE_PROC
6 tristate "PICVUE LCD display driver /proc interface" 6 tristate "PICVUE LCD display driver /proc interface"
7 depends on PICVUE 7 depends on PICVUE && PROC_FS
8 8
9config DS1603 9config DS1603
10 bool "DS1603 RTC driver" 10 bool "DS1603 RTC driver"
diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson/lemote-2f/clock.c
index a217061beee3..462e34d46b4a 100644
--- a/arch/mips/loongson/lemote-2f/clock.c
+++ b/arch/mips/loongson/lemote-2f/clock.c
@@ -91,6 +91,7 @@ EXPORT_SYMBOL(clk_put);
91 91
92int clk_set_rate(struct clk *clk, unsigned long rate) 92int clk_set_rate(struct clk *clk, unsigned long rate)
93{ 93{
94 unsigned int rate_khz = rate / 1000;
94 struct cpufreq_frequency_table *pos; 95 struct cpufreq_frequency_table *pos;
95 int ret = 0; 96 int ret = 0;
96 int regval; 97 int regval;
@@ -107,9 +108,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
107 propagate_rate(clk); 108 propagate_rate(clk);
108 109
109 cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table) 110 cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table)
110 if (rate == pos->frequency) 111 if (rate_khz == pos->frequency)
111 break; 112 break;
112 if (rate != pos->frequency) 113 if (rate_khz != pos->frequency)
113 return -ENOTSUPP; 114 return -ENOTSUPP;
114 115
115 clk->rate = rate; 116 clk->rate = rate;
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 7a4727795a70..51a0fde4bec1 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1023,7 +1023,7 @@ emul:
1023 goto emul; 1023 goto emul;
1024 1024
1025 case cop1x_op: 1025 case cop1x_op:
1026 if (cpu_has_mips_4_5 || cpu_has_mips64) 1026 if (cpu_has_mips_4_5 || cpu_has_mips64 || cpu_has_mips32r2)
1027 /* its one of ours */ 1027 /* its one of ours */
1028 goto emul; 1028 goto emul;
1029 1029
@@ -1068,7 +1068,7 @@ emul:
1068 break; 1068 break;
1069 1069
1070 case cop1x_op: 1070 case cop1x_op:
1071 if (!cpu_has_mips_4_5 && !cpu_has_mips64) 1071 if (!cpu_has_mips_4_5 && !cpu_has_mips64 && !cpu_has_mips32r2)
1072 return SIGILL; 1072 return SIGILL;
1073 1073
1074 sig = fpux_emu(xcp, ctx, ir, fault_addr); 1074 sig = fpux_emu(xcp, ctx, ir, fault_addr);
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index a08dd53a1cc5..b5f228e7eae6 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1062,6 +1062,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
1062struct mips_huge_tlb_info { 1062struct mips_huge_tlb_info {
1063 int huge_pte; 1063 int huge_pte;
1064 int restore_scratch; 1064 int restore_scratch;
1065 bool need_reload_pte;
1065}; 1066};
1066 1067
1067static struct mips_huge_tlb_info 1068static struct mips_huge_tlb_info
@@ -1076,6 +1077,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
1076 1077
1077 rv.huge_pte = scratch; 1078 rv.huge_pte = scratch;
1078 rv.restore_scratch = 0; 1079 rv.restore_scratch = 0;
1080 rv.need_reload_pte = false;
1079 1081
1080 if (check_for_high_segbits) { 1082 if (check_for_high_segbits) {
1081 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1083 UASM_i_MFC0(p, tmp, C0_BADVADDR);
@@ -1264,6 +1266,7 @@ static void build_r4000_tlb_refill_handler(void)
1264 } else { 1266 } else {
1265 htlb_info.huge_pte = K0; 1267 htlb_info.huge_pte = K0;
1266 htlb_info.restore_scratch = 0; 1268 htlb_info.restore_scratch = 0;
1269 htlb_info.need_reload_pte = true;
1267 vmalloc_mode = refill_noscratch; 1270 vmalloc_mode = refill_noscratch;
1268 /* 1271 /*
1269 * create the plain linear handler 1272 * create the plain linear handler
@@ -1300,7 +1303,8 @@ static void build_r4000_tlb_refill_handler(void)
1300 } 1303 }
1301#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 1304#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
1302 uasm_l_tlb_huge_update(&l, p); 1305 uasm_l_tlb_huge_update(&l, p);
1303 UASM_i_LW(&p, K0, 0, K1); 1306 if (htlb_info.need_reload_pte)
1307 UASM_i_LW(&p, htlb_info.huge_pte, 0, K1);
1304 build_huge_update_entries(&p, htlb_info.huge_pte, K1); 1308 build_huge_update_entries(&p, htlb_info.huge_pte, K1);
1305 build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, 1309 build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
1306 htlb_info.restore_scratch); 1310 htlb_info.restore_scratch);
diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index b9510ea8db56..6510ace272d4 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile
@@ -5,8 +5,9 @@
5# Copyright (C) 2008 Wind River Systems, Inc. 5# Copyright (C) 2008 Wind River Systems, Inc.
6# written by Ralf Baechle <ralf@linux-mips.org> 6# written by Ralf Baechle <ralf@linux-mips.org>
7# 7#
8obj-y := malta-amon.o malta-display.o malta-init.o \ 8obj-y := malta-display.o malta-init.o \
9 malta-int.o malta-memory.o malta-platform.o \ 9 malta-int.o malta-memory.o malta-platform.o \
10 malta-reset.o malta-setup.o malta-time.o 10 malta-reset.o malta-setup.o malta-time.o
11 11
12obj-$(CONFIG_MIPS_CMP) += malta-amon.o
12obj-$(CONFIG_MIPS_MALTA_PM) += malta-pm.o 13obj-$(CONFIG_MIPS_MALTA_PM) += malta-pm.o
diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile
index febf4334545e..2ae49e99eb67 100644
--- a/arch/mips/mti-sead3/Makefile
+++ b/arch/mips/mti-sead3/Makefile
@@ -14,7 +14,6 @@ obj-y := sead3-lcd.o sead3-display.o sead3-init.o \
14 sead3-setup.o sead3-time.o 14 sead3-setup.o sead3-time.o
15 15
16obj-y += sead3-i2c-dev.o sead3-i2c.o \ 16obj-y += sead3-i2c-dev.o sead3-i2c.o \
17 sead3-pic32-i2c-drv.o sead3-pic32-bus.o \
18 leds-sead3.o sead3-leds.o 17 leds-sead3.o sead3-leds.o
19 18
20obj-$(CONFIG_EARLY_PRINTK) += sead3-console.o 19obj-$(CONFIG_EARLY_PRINTK) += sead3-console.o
diff --git a/arch/mips/mti-sead3/sead3-i2c.c b/arch/mips/mti-sead3/sead3-i2c.c
index f70d5fc58ef5..795ae83894e0 100644
--- a/arch/mips/mti-sead3/sead3-i2c.c
+++ b/arch/mips/mti-sead3/sead3-i2c.c
@@ -5,10 +5,8 @@
5 * 5 *
6 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. 6 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
7 */ 7 */
8#include <linux/module.h>
9#include <linux/init.h> 8#include <linux/init.h>
10#include <linux/platform_device.h> 9#include <linux/platform_device.h>
11#include <irq.h>
12 10
13struct resource sead3_i2c_resources[] = { 11struct resource sead3_i2c_resources[] = {
14 { 12 {
@@ -30,8 +28,4 @@ static int __init sead3_i2c_init(void)
30 return platform_device_register(&sead3_i2c_device); 28 return platform_device_register(&sead3_i2c_device);
31} 29}
32 30
33module_init(sead3_i2c_init); 31device_initcall(sead3_i2c_init);
34
35MODULE_AUTHOR("Chris Dearman <chris@mips.com>");
36MODULE_LICENSE("GPL");
37MODULE_DESCRIPTION("I2C probe driver for SEAD3");
diff --git a/arch/mips/mti-sead3/sead3-pic32-bus.c b/arch/mips/mti-sead3/sead3-pic32-bus.c
deleted file mode 100644
index 3b12aa5a7c88..000000000000
--- a/arch/mips/mti-sead3/sead3-pic32-bus.c
+++ /dev/null
@@ -1,102 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
7 */
8#include <linux/delay.h>
9#include <linux/kernel.h>
10#include <linux/spinlock.h>
11#include <linux/io.h>
12#include <linux/errno.h>
13
14#define PIC32_NULL 0x00
15#define PIC32_RD 0x01
16#define PIC32_SYSRD 0x02
17#define PIC32_WR 0x10
18#define PIC32_SYSWR 0x20
19#define PIC32_IRQ_CLR 0x40
20#define PIC32_STATUS 0x80
21
22#define DELAY() udelay(100) /* FIXME: needed? */
23
24/* spinlock to ensure atomic access to PIC32 */
25static DEFINE_SPINLOCK(pic32_bus_lock);
26
27/* FIXME: io_remap these */
28static void __iomem *bus_xfer = (void __iomem *)0xbf000600;
29static void __iomem *bus_status = (void __iomem *)0xbf000060;
30
31static inline unsigned int ioready(void)
32{
33 return readl(bus_status) & 1;
34}
35
36static inline void wait_ioready(void)
37{
38 do { } while (!ioready());
39}
40
41static inline void wait_ioclear(void)
42{
43 do { } while (ioready());
44}
45
46static inline void check_ioclear(void)
47{
48 if (ioready()) {
49 pr_debug("ioclear: initially busy\n");
50 do {
51 (void) readl(bus_xfer);
52 DELAY();
53 } while (ioready());
54 pr_debug("ioclear: cleared busy\n");
55 }
56}
57
58u32 pic32_bus_readl(u32 reg)
59{
60 unsigned long flags;
61 u32 status, val;
62
63 spin_lock_irqsave(&pic32_bus_lock, flags);
64
65 check_ioclear();
66
67 writel((PIC32_RD << 24) | (reg & 0x00ffffff), bus_xfer);
68 DELAY();
69 wait_ioready();
70 status = readl(bus_xfer);
71 DELAY();
72 val = readl(bus_xfer);
73 wait_ioclear();
74
75 pr_debug("pic32_bus_readl: *%x -> %x (status=%x)\n", reg, val, status);
76
77 spin_unlock_irqrestore(&pic32_bus_lock, flags);
78
79 return val;
80}
81
82void pic32_bus_writel(u32 val, u32 reg)
83{
84 unsigned long flags;
85 u32 status;
86
87 spin_lock_irqsave(&pic32_bus_lock, flags);
88
89 check_ioclear();
90
91 writel((PIC32_WR << 24) | (reg & 0x00ffffff), bus_xfer);
92 DELAY();
93 writel(val, bus_xfer);
94 DELAY();
95 wait_ioready();
96 status = readl(bus_xfer);
97 wait_ioclear();
98
99 pr_debug("pic32_bus_writel: *%x <- %x (status=%x)\n", reg, val, status);
100
101 spin_unlock_irqrestore(&pic32_bus_lock, flags);
102}
diff --git a/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c b/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c
deleted file mode 100644
index 80fe194cfa53..000000000000
--- a/arch/mips/mti-sead3/sead3-pic32-i2c-drv.c
+++ /dev/null
@@ -1,423 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
7 */
8#include <linux/delay.h>
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/spinlock.h>
12#include <linux/platform_device.h>
13#include <linux/init.h>
14#include <linux/errno.h>
15#include <linux/i2c.h>
16#include <linux/slab.h>
17
18#define PIC32_I2CxCON 0x0000
19#define PIC32_I2CxCONCLR 0x0004
20#define PIC32_I2CxCONSET 0x0008
21#define PIC32_I2CxCONINV 0x000C
22#define I2CCON_ON (1<<15)
23#define I2CCON_FRZ (1<<14)
24#define I2CCON_SIDL (1<<13)
25#define I2CCON_SCLREL (1<<12)
26#define I2CCON_STRICT (1<<11)
27#define I2CCON_A10M (1<<10)
28#define I2CCON_DISSLW (1<<9)
29#define I2CCON_SMEN (1<<8)
30#define I2CCON_GCEN (1<<7)
31#define I2CCON_STREN (1<<6)
32#define I2CCON_ACKDT (1<<5)
33#define I2CCON_ACKEN (1<<4)
34#define I2CCON_RCEN (1<<3)
35#define I2CCON_PEN (1<<2)
36#define I2CCON_RSEN (1<<1)
37#define I2CCON_SEN (1<<0)
38
39#define PIC32_I2CxSTAT 0x0010
40#define PIC32_I2CxSTATCLR 0x0014
41#define PIC32_I2CxSTATSET 0x0018
42#define PIC32_I2CxSTATINV 0x001C
43#define I2CSTAT_ACKSTAT (1<<15)
44#define I2CSTAT_TRSTAT (1<<14)
45#define I2CSTAT_BCL (1<<10)
46#define I2CSTAT_GCSTAT (1<<9)
47#define I2CSTAT_ADD10 (1<<8)
48#define I2CSTAT_IWCOL (1<<7)
49#define I2CSTAT_I2COV (1<<6)
50#define I2CSTAT_DA (1<<5)
51#define I2CSTAT_P (1<<4)
52#define I2CSTAT_S (1<<3)
53#define I2CSTAT_RW (1<<2)
54#define I2CSTAT_RBF (1<<1)
55#define I2CSTAT_TBF (1<<0)
56
57#define PIC32_I2CxADD 0x0020
58#define PIC32_I2CxADDCLR 0x0024
59#define PIC32_I2CxADDSET 0x0028
60#define PIC32_I2CxADDINV 0x002C
61#define PIC32_I2CxMSK 0x0030
62#define PIC32_I2CxMSKCLR 0x0034
63#define PIC32_I2CxMSKSET 0x0038
64#define PIC32_I2CxMSKINV 0x003C
65#define PIC32_I2CxBRG 0x0040
66#define PIC32_I2CxBRGCLR 0x0044
67#define PIC32_I2CxBRGSET 0x0048
68#define PIC32_I2CxBRGINV 0x004C
69#define PIC32_I2CxTRN 0x0050
70#define PIC32_I2CxTRNCLR 0x0054
71#define PIC32_I2CxTRNSET 0x0058
72#define PIC32_I2CxTRNINV 0x005C
73#define PIC32_I2CxRCV 0x0060
74
75struct i2c_platform_data {
76 u32 base;
77 struct i2c_adapter adap;
78 u32 xfer_timeout;
79 u32 ack_timeout;
80 u32 ctl_timeout;
81};
82
83extern u32 pic32_bus_readl(u32 reg);
84extern void pic32_bus_writel(u32 val, u32 reg);
85
86static inline void
87StartI2C(struct i2c_platform_data *adap)
88{
89 pr_debug("StartI2C\n");
90 pic32_bus_writel(I2CCON_SEN, adap->base + PIC32_I2CxCONSET);
91}
92
93static inline void
94StopI2C(struct i2c_platform_data *adap)
95{
96 pr_debug("StopI2C\n");
97 pic32_bus_writel(I2CCON_PEN, adap->base + PIC32_I2CxCONSET);
98}
99
100static inline void
101AckI2C(struct i2c_platform_data *adap)
102{
103 pr_debug("AckI2C\n");
104 pic32_bus_writel(I2CCON_ACKDT, adap->base + PIC32_I2CxCONCLR);
105 pic32_bus_writel(I2CCON_ACKEN, adap->base + PIC32_I2CxCONSET);
106}
107
108static inline void
109NotAckI2C(struct i2c_platform_data *adap)
110{
111 pr_debug("NakI2C\n");
112 pic32_bus_writel(I2CCON_ACKDT, adap->base + PIC32_I2CxCONSET);
113 pic32_bus_writel(I2CCON_ACKEN, adap->base + PIC32_I2CxCONSET);
114}
115
116static inline int
117IdleI2C(struct i2c_platform_data *adap)
118{
119 int i;
120
121 pr_debug("IdleI2C\n");
122 for (i = 0; i < adap->ctl_timeout; i++) {
123 if (((pic32_bus_readl(adap->base + PIC32_I2CxCON) &
124 (I2CCON_ACKEN | I2CCON_RCEN | I2CCON_PEN | I2CCON_RSEN |
125 I2CCON_SEN)) == 0) &&
126 ((pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
127 (I2CSTAT_TRSTAT)) == 0))
128 return 0;
129 udelay(1);
130 }
131 return -ETIMEDOUT;
132}
133
134static inline u32
135MasterWriteI2C(struct i2c_platform_data *adap, u32 byte)
136{
137 pr_debug("MasterWriteI2C\n");
138
139 pic32_bus_writel(byte, adap->base + PIC32_I2CxTRN);
140
141 return pic32_bus_readl(adap->base + PIC32_I2CxSTAT) & I2CSTAT_IWCOL;
142}
143
144static inline u32
145MasterReadI2C(struct i2c_platform_data *adap)
146{
147 pr_debug("MasterReadI2C\n");
148
149 pic32_bus_writel(I2CCON_RCEN, adap->base + PIC32_I2CxCONSET);
150
151 while (pic32_bus_readl(adap->base + PIC32_I2CxCON) & I2CCON_RCEN)
152 ;
153
154 pic32_bus_writel(I2CSTAT_I2COV, adap->base + PIC32_I2CxSTATCLR);
155
156 return pic32_bus_readl(adap->base + PIC32_I2CxRCV);
157}
158
159static int
160do_address(struct i2c_platform_data *adap, unsigned int addr, int rd)
161{
162 pr_debug("doaddress\n");
163
164 IdleI2C(adap);
165 StartI2C(adap);
166 IdleI2C(adap);
167
168 addr <<= 1;
169 if (rd)
170 addr |= 1;
171
172 if (MasterWriteI2C(adap, addr))
173 return -EIO;
174 IdleI2C(adap);
175 if (pic32_bus_readl(adap->base + PIC32_I2CxSTAT) & I2CSTAT_ACKSTAT)
176 return -EIO;
177 return 0;
178}
179
180static int
181i2c_read(struct i2c_platform_data *adap, unsigned char *buf,
182 unsigned int len)
183{
184 int i;
185 u32 data;
186
187 pr_debug("i2c_read\n");
188
189 i = 0;
190 while (i < len) {
191 data = MasterReadI2C(adap);
192 buf[i++] = data;
193 if (i < len)
194 AckI2C(adap);
195 else
196 NotAckI2C(adap);
197 }
198
199 StopI2C(adap);
200 IdleI2C(adap);
201 return 0;
202}
203
204static int
205i2c_write(struct i2c_platform_data *adap, unsigned char *buf,
206 unsigned int len)
207{
208 int i;
209 u32 data;
210
211 pr_debug("i2c_write\n");
212
213 i = 0;
214 while (i < len) {
215 data = buf[i];
216 if (MasterWriteI2C(adap, data))
217 return -EIO;
218 IdleI2C(adap);
219 if (pic32_bus_readl(adap->base + PIC32_I2CxSTAT) &
220 I2CSTAT_ACKSTAT)
221 return -EIO;
222 i++;
223 }
224
225 StopI2C(adap);
226 IdleI2C(adap);
227 return 0;
228}
229
230static int
231platform_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num)
232{
233 struct i2c_platform_data *adap = i2c_adap->algo_data;
234 struct i2c_msg *p;
235 int i, err = 0;
236
237 pr_debug("platform_xfer\n");
238 for (i = 0; i < num; i++) {
239#define __BUFSIZE 80
240 int ii;
241 static char buf[__BUFSIZE];
242 char *b = buf;
243
244 p = &msgs[i];
245 b += sprintf(buf, " [%d bytes]", p->len);
246 if ((p->flags & I2C_M_RD) == 0) {
247 for (ii = 0; ii < p->len; ii++) {
248 if (b < &buf[__BUFSIZE-4]) {
249 b += sprintf(b, " %02x", p->buf[ii]);
250 } else {
251 strcat(b, "...");
252 break;
253 }
254 }
255 }
256 pr_debug("xfer%d: DevAddr: %04x Op:%s Data:%s\n", i, p->addr,
257 (p->flags & I2C_M_RD) ? "Rd" : "Wr", buf);
258 }
259
260
261 for (i = 0; !err && i < num; i++) {
262 p = &msgs[i];
263 err = do_address(adap, p->addr, p->flags & I2C_M_RD);
264 if (err || !p->len)
265 continue;
266 if (p->flags & I2C_M_RD)
267 err = i2c_read(adap, p->buf, p->len);
268 else
269 err = i2c_write(adap, p->buf, p->len);
270 }
271
272 /* Return the number of messages processed, or the error code. */
273 if (err == 0)
274 err = num;
275
276 return err;
277}
278
279static u32
280platform_func(struct i2c_adapter *adap)
281{
282 pr_debug("platform_algo\n");
283 return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
284}
285
286static const struct i2c_algorithm platform_algo = {
287 .master_xfer = platform_xfer,
288 .functionality = platform_func,
289};
290
291static void i2c_platform_setup(struct i2c_platform_data *priv)
292{
293 pr_debug("i2c_platform_setup\n");
294
295 pic32_bus_writel(500, priv->base + PIC32_I2CxBRG);
296 pic32_bus_writel(I2CCON_ON, priv->base + PIC32_I2CxCONCLR);
297 pic32_bus_writel(I2CCON_ON, priv->base + PIC32_I2CxCONSET);
298 pic32_bus_writel((I2CSTAT_BCL | I2CSTAT_IWCOL),
299 (priv->base + PIC32_I2CxSTATCLR));
300}
301
302static void i2c_platform_disable(struct i2c_platform_data *priv)
303{
304 pr_debug("i2c_platform_disable\n");
305}
306
307static int i2c_platform_probe(struct platform_device *pdev)
308{
309 struct i2c_platform_data *priv;
310 struct resource *r;
311 int ret;
312
313 pr_debug("i2c_platform_probe\n");
314 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
315 if (!r)
316 return -ENODEV;
317
318 priv = devm_kzalloc(&pdev->dev, sizeof(struct i2c_platform_data),
319 GFP_KERNEL);
320 if (!priv)
321 return -ENOMEM;
322
323 /* FIXME: need to allocate resource in PIC32 space */
324#if 0
325 priv->base = bus_request_region(r->start, resource_size(r),
326 pdev->name);
327#else
328 priv->base = r->start;
329#endif
330 if (!priv->base)
331 return -EBUSY;
332
333 priv->xfer_timeout = 200;
334 priv->ack_timeout = 200;
335 priv->ctl_timeout = 200;
336
337 priv->adap.nr = pdev->id;
338 priv->adap.algo = &platform_algo;
339 priv->adap.algo_data = priv;
340 priv->adap.dev.parent = &pdev->dev;
341 strlcpy(priv->adap.name, "PIC32 I2C", sizeof(priv->adap.name));
342
343 i2c_platform_setup(priv);
344
345 ret = i2c_add_numbered_adapter(&priv->adap);
346 if (ret) {
347 i2c_platform_disable(priv);
348 return ret;
349 }
350
351 platform_set_drvdata(pdev, priv);
352 return 0;
353}
354
355static int i2c_platform_remove(struct platform_device *pdev)
356{
357 struct i2c_platform_data *priv = platform_get_drvdata(pdev);
358
359 pr_debug("i2c_platform_remove\n");
360 platform_set_drvdata(pdev, NULL);
361 i2c_del_adapter(&priv->adap);
362 i2c_platform_disable(priv);
363 return 0;
364}
365
366#ifdef CONFIG_PM
367static int
368i2c_platform_suspend(struct platform_device *pdev, pm_message_t state)
369{
370 struct i2c_platform_data *priv = platform_get_drvdata(pdev);
371
372 dev_dbg(&pdev->dev, "i2c_platform_disable\n");
373 i2c_platform_disable(priv);
374
375 return 0;
376}
377
378static int
379i2c_platform_resume(struct platform_device *pdev)
380{
381 struct i2c_platform_data *priv = platform_get_drvdata(pdev);
382
383 dev_dbg(&pdev->dev, "i2c_platform_setup\n");
384 i2c_platform_setup(priv);
385
386 return 0;
387}
388#else
389#define i2c_platform_suspend NULL
390#define i2c_platform_resume NULL
391#endif
392
393static struct platform_driver i2c_platform_driver = {
394 .driver = {
395 .name = "i2c_pic32",
396 .owner = THIS_MODULE,
397 },
398 .probe = i2c_platform_probe,
399 .remove = i2c_platform_remove,
400 .suspend = i2c_platform_suspend,
401 .resume = i2c_platform_resume,
402};
403
404static int __init
405i2c_platform_init(void)
406{
407 pr_debug("i2c_platform_init\n");
408 return platform_driver_register(&i2c_platform_driver);
409}
410
411static void __exit
412i2c_platform_exit(void)
413{
414 pr_debug("i2c_platform_exit\n");
415 platform_driver_unregister(&i2c_platform_driver);
416}
417
418MODULE_AUTHOR("Chris Dearman, MIPS Technologies INC.");
419MODULE_DESCRIPTION("PIC32 I2C driver");
420MODULE_LICENSE("GPL");
421
422module_init(i2c_platform_init);
423module_exit(i2c_platform_exit);
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 37fe8e7887e2..d3ed15b2b2d1 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -215,17 +215,12 @@ static int ltq_pci_probe(struct platform_device *pdev)
215 215
216 pci_clear_flags(PCI_PROBE_ONLY); 216 pci_clear_flags(PCI_PROBE_ONLY);
217 217
218 res_cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
219 res_bridge = platform_get_resource(pdev, IORESOURCE_MEM, 1); 218 res_bridge = platform_get_resource(pdev, IORESOURCE_MEM, 1);
220 if (!res_cfg || !res_bridge) {
221 dev_err(&pdev->dev, "missing memory resources\n");
222 return -EINVAL;
223 }
224
225 ltq_pci_membase = devm_ioremap_resource(&pdev->dev, res_bridge); 219 ltq_pci_membase = devm_ioremap_resource(&pdev->dev, res_bridge);
226 if (IS_ERR(ltq_pci_membase)) 220 if (IS_ERR(ltq_pci_membase))
227 return PTR_ERR(ltq_pci_membase); 221 return PTR_ERR(ltq_pci_membase);
228 222
223 res_cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
229 ltq_pci_mapped_cfg = devm_ioremap_resource(&pdev->dev, res_cfg); 224 ltq_pci_mapped_cfg = devm_ioremap_resource(&pdev->dev, res_cfg);
230 if (IS_ERR(ltq_pci_mapped_cfg)) 225 if (IS_ERR(ltq_pci_mapped_cfg))
231 return PTR_ERR(ltq_pci_mapped_cfg); 226 return PTR_ERR(ltq_pci_mapped_cfg);
diff --git a/arch/mips/pmcs-msp71xx/msp_irq.c b/arch/mips/pmcs-msp71xx/msp_irq.c
index f914c753de21..8d53d7a2ed45 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq.c
@@ -16,6 +16,7 @@
16#include <linux/time.h> 16#include <linux/time.h>
17 17
18#include <asm/irq_cpu.h> 18#include <asm/irq_cpu.h>
19#include <asm/setup.h>
19 20
20#include <msp_int.h> 21#include <msp_int.h>
21 22
diff --git a/arch/mips/pmcs-msp71xx/msp_irq_cic.c b/arch/mips/pmcs-msp71xx/msp_irq_cic.c
index b8df2f7b3328..1207ec4dfb77 100644
--- a/arch/mips/pmcs-msp71xx/msp_irq_cic.c
+++ b/arch/mips/pmcs-msp71xx/msp_irq_cic.c
@@ -131,11 +131,11 @@ static int msp_cic_irq_set_affinity(struct irq_data *d,
131 int cpu; 131 int cpu;
132 unsigned long flags; 132 unsigned long flags;
133 unsigned int mtflags; 133 unsigned int mtflags;
134 unsigned long imask = (1 << (irq - MSP_CIC_INTBASE)); 134 unsigned long imask = (1 << (d->irq - MSP_CIC_INTBASE));
135 volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG; 135 volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG;
136 136
137 /* timer balancing should be disabled in kernel code */ 137 /* timer balancing should be disabled in kernel code */
138 BUG_ON(irq == MSP_INT_VPE0_TIMER || irq == MSP_INT_VPE1_TIMER); 138 BUG_ON(d->irq == MSP_INT_VPE0_TIMER || d->irq == MSP_INT_VPE1_TIMER);
139 139
140 LOCK_CORE(flags, mtflags); 140 LOCK_CORE(flags, mtflags);
141 /* enable if any of each VPE's TCs require this IRQ */ 141 /* enable if any of each VPE's TCs require this IRQ */
diff --git a/arch/mips/sibyte/Makefile b/arch/mips/sibyte/Makefile
index c8ed2c807e69..455c40d6d625 100644
--- a/arch/mips/sibyte/Makefile
+++ b/arch/mips/sibyte/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_SIBYTE_RHONE) += swarm/
25obj-$(CONFIG_SIBYTE_SENTOSA) += swarm/ 25obj-$(CONFIG_SIBYTE_SENTOSA) += swarm/
26obj-$(CONFIG_SIBYTE_SWARM) += swarm/ 26obj-$(CONFIG_SIBYTE_SWARM) += swarm/
27obj-$(CONFIG_SIBYTE_BIGSUR) += swarm/ 27obj-$(CONFIG_SIBYTE_BIGSUR) += swarm/
28obj-$(CONFIG_SIBYTE_LITTLESUR) += swarm/
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
index 63392f4b29a4..d2008887eb8c 100644
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ b/arch/powerpc/configs/pseries_le_defconfig
@@ -48,7 +48,6 @@ CONFIG_KEXEC=y
48CONFIG_IRQ_ALL_CPUS=y 48CONFIG_IRQ_ALL_CPUS=y
49CONFIG_MEMORY_HOTPLUG=y 49CONFIG_MEMORY_HOTPLUG=y
50CONFIG_MEMORY_HOTREMOVE=y 50CONFIG_MEMORY_HOTREMOVE=y
51CONFIG_CMA=y
52CONFIG_PPC_64K_PAGES=y 51CONFIG_PPC_64K_PAGES=y
53CONFIG_PPC_SUBPAGE_PROT=y 52CONFIG_PPC_SUBPAGE_PROT=y
54CONFIG_SCHED_SMT=y 53CONFIG_SCHED_SMT=y
@@ -138,6 +137,7 @@ CONFIG_NETCONSOLE=y
138CONFIG_NETPOLL_TRAP=y 137CONFIG_NETPOLL_TRAP=y
139CONFIG_TUN=m 138CONFIG_TUN=m
140CONFIG_VIRTIO_NET=m 139CONFIG_VIRTIO_NET=m
140CONFIG_VHOST_NET=m
141CONFIG_VORTEX=y 141CONFIG_VORTEX=y
142CONFIG_ACENIC=m 142CONFIG_ACENIC=m
143CONFIG_ACENIC_OMIT_TIGON_I=y 143CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -303,4 +303,9 @@ CONFIG_CRYPTO_LZO=m
303# CONFIG_CRYPTO_ANSI_CPRNG is not set 303# CONFIG_CRYPTO_ANSI_CPRNG is not set
304CONFIG_CRYPTO_DEV_NX=y 304CONFIG_CRYPTO_DEV_NX=y
305CONFIG_CRYPTO_DEV_NX_ENCRYPT=m 305CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
306CONFIG_VIRTUALIZATION=y
307CONFIG_KVM_BOOK3S_64=m
308CONFIG_KVM_BOOK3S_64_HV=y
309CONFIG_TRANSPARENT_HUGEPAGE=y
310CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
306CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y 311CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3b260efbfbf9..ca07f9c27335 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -71,9 +71,10 @@ struct device_node;
71 71
72#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ 72#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
73#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ 73#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
74#define EEH_PE_RESET (1 << 2) /* PE reset in progress */ 74#define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */
75 75
76#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ 76#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
77#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
77 78
78struct eeh_pe { 79struct eeh_pe {
79 int type; /* PE type: PHB/Bus/Device */ 80 int type; /* PE type: PHB/Bus/Device */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 0bb23725b1e7..8bf1b6351716 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -34,7 +34,7 @@
34 do { \ 34 do { \
35 (regs)->result = 0; \ 35 (regs)->result = 0; \
36 (regs)->nip = __ip; \ 36 (regs)->nip = __ip; \
37 (regs)->gpr[1] = *(unsigned long *)__get_SP(); \ 37 (regs)->gpr[1] = current_stack_pointer(); \
38 asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \ 38 asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
39 } while (0) 39 } while (0)
40#endif 40#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fe3f9488f321..c998279bd85b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1265,8 +1265,7 @@ static inline unsigned long mfvtb (void)
1265 1265
1266#define proc_trap() asm volatile("trap") 1266#define proc_trap() asm volatile("trap")
1267 1267
1268#define __get_SP() ({unsigned long sp; \ 1268extern unsigned long current_stack_pointer(void);
1269 asm volatile("mr %0,1": "=r" (sp)); sp;})
1270 1269
1271extern unsigned long scom970_read(unsigned int address); 1270extern unsigned long scom970_read(unsigned int address);
1272extern void scom970_write(unsigned int address, unsigned long value); 1271extern void scom970_write(unsigned int address, unsigned long value);
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6fa2708da153..6240698fee9a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -19,7 +19,7 @@
19 19
20/* ftrace syscalls requires exporting the sys_call_table */ 20/* ftrace syscalls requires exporting the sys_call_table */
21#ifdef CONFIG_FTRACE_SYSCALLS 21#ifdef CONFIG_FTRACE_SYSCALLS
22extern const unsigned long *sys_call_table; 22extern const unsigned long sys_call_table[];
23#endif /* CONFIG_FTRACE_SYSCALLS */ 23#endif /* CONFIG_FTRACE_SYSCALLS */
24 24
25static inline long syscall_get_nr(struct task_struct *task, 25static inline long syscall_get_nr(struct task_struct *task,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index adac9dc54aee..484b2d4462c1 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -53,9 +53,16 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
53#else 53#else
54 struct page *page; 54 struct page *page;
55 int node = dev_to_node(dev); 55 int node = dev_to_node(dev);
56#ifdef CONFIG_FSL_SOC
56 u64 pfn = get_pfn_limit(dev); 57 u64 pfn = get_pfn_limit(dev);
57 int zone; 58 int zone;
58 59
60 /*
61 * This code should be OK on other platforms, but we have drivers that
62 * don't set coherent_dma_mask. As a workaround we just ifdef it. This
63 * whole routine needs some serious cleanup.
64 */
65
59 zone = dma_pfn_limit_to_zone(pfn); 66 zone = dma_pfn_limit_to_zone(pfn);
60 if (zone < 0) { 67 if (zone < 0) {
61 dev_err(dev, "%s: No suitable zone for pfn %#llx\n", 68 dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
@@ -73,6 +80,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
73 break; 80 break;
74#endif 81#endif
75 }; 82 };
83#endif /* CONFIG_FSL_SOC */
76 84
77 /* ignore region specifiers */ 85 /* ignore region specifiers */
78 flag &= ~(__GFP_HIGHMEM); 86 flag &= ~(__GFP_HIGHMEM);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index d543e4179c18..2248a1999c64 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -257,6 +257,13 @@ static void *eeh_dump_pe_log(void *data, void *flag)
257 struct eeh_dev *edev, *tmp; 257 struct eeh_dev *edev, *tmp;
258 size_t *plen = flag; 258 size_t *plen = flag;
259 259
260 /* If the PE's config space is blocked, 0xFF's will be
261 * returned. It's pointless to collect the log in this
262 * case.
263 */
264 if (pe->state & EEH_PE_CFG_BLOCKED)
265 return NULL;
266
260 eeh_pe_for_each_dev(pe, edev, tmp) 267 eeh_pe_for_each_dev(pe, edev, tmp)
261 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 268 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
262 EEH_PCI_REGS_LOG_LEN - *plen); 269 EEH_PCI_REGS_LOG_LEN - *plen);
@@ -673,18 +680,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
673 switch (state) { 680 switch (state) {
674 case pcie_deassert_reset: 681 case pcie_deassert_reset:
675 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 682 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
676 eeh_pe_state_clear(pe, EEH_PE_RESET); 683 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
677 break; 684 break;
678 case pcie_hot_reset: 685 case pcie_hot_reset:
679 eeh_pe_state_mark(pe, EEH_PE_RESET); 686 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
680 eeh_ops->reset(pe, EEH_RESET_HOT); 687 eeh_ops->reset(pe, EEH_RESET_HOT);
681 break; 688 break;
682 case pcie_warm_reset: 689 case pcie_warm_reset:
683 eeh_pe_state_mark(pe, EEH_PE_RESET); 690 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
684 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 691 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
685 break; 692 break;
686 default: 693 default:
687 eeh_pe_state_clear(pe, EEH_PE_RESET); 694 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
688 return -EINVAL; 695 return -EINVAL;
689 }; 696 };
690 697
@@ -1523,7 +1530,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
1523 switch (option) { 1530 switch (option) {
1524 case EEH_RESET_DEACTIVATE: 1531 case EEH_RESET_DEACTIVATE:
1525 ret = eeh_ops->reset(pe, option); 1532 ret = eeh_ops->reset(pe, option);
1526 eeh_pe_state_clear(pe, EEH_PE_RESET); 1533 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
1527 if (ret) 1534 if (ret)
1528 break; 1535 break;
1529 1536
@@ -1538,7 +1545,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
1538 */ 1545 */
1539 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1546 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
1540 1547
1541 eeh_pe_state_mark(pe, EEH_PE_RESET); 1548 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
1542 ret = eeh_ops->reset(pe, option); 1549 ret = eeh_ops->reset(pe, option);
1543 break; 1550 break;
1544 default: 1551 default:
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 3fd514f8e4b2..6535936bdf27 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -528,13 +528,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
528 eeh_pe_dev_traverse(pe, eeh_report_error, &result); 528 eeh_pe_dev_traverse(pe, eeh_report_error, &result);
529 529
530 /* Issue reset */ 530 /* Issue reset */
531 eeh_pe_state_mark(pe, EEH_PE_RESET); 531 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
532 ret = eeh_reset_pe(pe); 532 ret = eeh_reset_pe(pe);
533 if (ret) { 533 if (ret) {
534 eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET); 534 eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED);
535 return ret; 535 return ret;
536 } 536 }
537 eeh_pe_state_clear(pe, EEH_PE_RESET); 537 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
538 538
539 /* Unfreeze the PE */ 539 /* Unfreeze the PE */
540 ret = eeh_clear_pe_frozen_state(pe, true); 540 ret = eeh_clear_pe_frozen_state(pe, true);
@@ -601,10 +601,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
601 * config accesses. So we prefer to block them. However, controlled 601 * config accesses. So we prefer to block them. However, controlled
602 * PCI config accesses initiated from EEH itself are allowed. 602 * PCI config accesses initiated from EEH itself are allowed.
603 */ 603 */
604 eeh_pe_state_mark(pe, EEH_PE_RESET); 604 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
605 rc = eeh_reset_pe(pe); 605 rc = eeh_reset_pe(pe);
606 if (rc) { 606 if (rc) {
607 eeh_pe_state_clear(pe, EEH_PE_RESET); 607 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
608 return rc; 608 return rc;
609 } 609 }
610 610
@@ -613,7 +613,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
613 /* Restore PE */ 613 /* Restore PE */
614 eeh_ops->configure_bridge(pe); 614 eeh_ops->configure_bridge(pe);
615 eeh_pe_restore_bars(pe); 615 eeh_pe_restore_bars(pe);
616 eeh_pe_state_clear(pe, EEH_PE_RESET); 616 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
617 617
618 /* Clear frozen state */ 618 /* Clear frozen state */
619 rc = eeh_clear_pe_frozen_state(pe, false); 619 rc = eeh_clear_pe_frozen_state(pe, false);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 53dd0915e690..5a63e2b0f65b 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -525,7 +525,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
525 pe->state |= state; 525 pe->state |= state;
526 526
527 /* Offline PCI devices if applicable */ 527 /* Offline PCI devices if applicable */
528 if (state != EEH_PE_ISOLATED) 528 if (!(state & EEH_PE_ISOLATED))
529 return NULL; 529 return NULL;
530 530
531 eeh_pe_for_each_dev(pe, edev, tmp) { 531 eeh_pe_for_each_dev(pe, edev, tmp) {
@@ -534,6 +534,10 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
534 pdev->error_state = pci_channel_io_frozen; 534 pdev->error_state = pci_channel_io_frozen;
535 } 535 }
536 536
537 /* Block PCI config access if required */
538 if (pe->state & EEH_PE_CFG_RESTRICTED)
539 pe->state |= EEH_PE_CFG_BLOCKED;
540
537 return NULL; 541 return NULL;
538} 542}
539 543
@@ -611,6 +615,10 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
611 pdev->error_state = pci_channel_io_normal; 615 pdev->error_state = pci_channel_io_normal;
612 } 616 }
613 617
618 /* Unblock PCI config access if required */
619 if (pe->state & EEH_PE_CFG_RESTRICTED)
620 pe->state &= ~EEH_PE_CFG_BLOCKED;
621
614 return NULL; 622 return NULL;
615} 623}
616 624
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 050f79a4a168..72e783ea0681 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1270,11 +1270,6 @@ hmi_exception_early:
1270 addi r3,r1,STACK_FRAME_OVERHEAD 1270 addi r3,r1,STACK_FRAME_OVERHEAD
1271 bl hmi_exception_realmode 1271 bl hmi_exception_realmode
1272 /* Windup the stack. */ 1272 /* Windup the stack. */
1273 /* Clear MSR_RI before setting SRR0 and SRR1. */
1274 li r0,MSR_RI
1275 mfmsr r9 /* get MSR value */
1276 andc r9,r9,r0
1277 mtmsrd r9,1 /* Clear MSR_RI */
1278 /* Move original HSRR0 and HSRR1 into the respective regs */ 1273 /* Move original HSRR0 and HSRR1 into the respective regs */
1279 ld r9,_MSR(r1) 1274 ld r9,_MSR(r1)
1280 mtspr SPRN_HSRR1,r9 1275 mtspr SPRN_HSRR1,r9
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8eb857f216c1..c14383575fe8 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -466,7 +466,7 @@ static inline void check_stack_overflow(void)
466#ifdef CONFIG_DEBUG_STACKOVERFLOW 466#ifdef CONFIG_DEBUG_STACKOVERFLOW
467 long sp; 467 long sp;
468 468
469 sp = __get_SP() & (THREAD_SIZE-1); 469 sp = current_stack_pointer() & (THREAD_SIZE-1);
470 470
471 /* check for stack overflow: is there less than 2KB free? */ 471 /* check for stack overflow: is there less than 2KB free? */
472 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { 472 if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 7ce26d45777e..0d432194c018 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -114,3 +114,7 @@ _GLOBAL(longjmp)
114 mtlr r0 114 mtlr r0
115 mr r3,r4 115 mr r3,r4
116 blr 116 blr
117
118_GLOBAL(current_stack_pointer)
119 PPC_LL r3,0(r1)
120 blr
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index c4dfff6c2719..202963ee013a 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -41,3 +41,5 @@ EXPORT_SYMBOL(giveup_spe);
41#ifdef CONFIG_EPAPR_PARAVIRT 41#ifdef CONFIG_EPAPR_PARAVIRT
42EXPORT_SYMBOL(epapr_hypercall_start); 42EXPORT_SYMBOL(epapr_hypercall_start);
43#endif 43#endif
44
45EXPORT_SYMBOL(current_stack_pointer);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index aa1df89c8b2a..923cd2daba89 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1545,7 +1545,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
1545 tsk = current; 1545 tsk = current;
1546 if (sp == 0) { 1546 if (sp == 0) {
1547 if (tsk == current) 1547 if (tsk == current)
1548 asm("mr %0,1" : "=r" (sp)); 1548 sp = current_stack_pointer();
1549 else 1549 else
1550 sp = tsk->thread.ksp; 1550 sp = tsk->thread.ksp;
1551 } 1551 }
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index c168337aef9d..7c55b86206b3 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -66,6 +66,11 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
66 return PCIBIOS_DEVICE_NOT_FOUND; 66 return PCIBIOS_DEVICE_NOT_FOUND;
67 if (!config_access_valid(pdn, where)) 67 if (!config_access_valid(pdn, where))
68 return PCIBIOS_BAD_REGISTER_NUMBER; 68 return PCIBIOS_BAD_REGISTER_NUMBER;
69#ifdef CONFIG_EEH
70 if (pdn->edev && pdn->edev->pe &&
71 (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
72 return PCIBIOS_SET_FAILED;
73#endif
69 74
70 addr = rtas_config_addr(pdn->busno, pdn->devfn, where); 75 addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
71 buid = pdn->phb->buid; 76 buid = pdn->phb->buid;
@@ -90,9 +95,6 @@ static int rtas_pci_read_config(struct pci_bus *bus,
90 struct device_node *busdn, *dn; 95 struct device_node *busdn, *dn;
91 struct pci_dn *pdn; 96 struct pci_dn *pdn;
92 bool found = false; 97 bool found = false;
93#ifdef CONFIG_EEH
94 struct eeh_dev *edev;
95#endif
96 int ret; 98 int ret;
97 99
98 /* Search only direct children of the bus */ 100 /* Search only direct children of the bus */
@@ -109,11 +111,6 @@ static int rtas_pci_read_config(struct pci_bus *bus,
109 111
110 if (!found) 112 if (!found)
111 return PCIBIOS_DEVICE_NOT_FOUND; 113 return PCIBIOS_DEVICE_NOT_FOUND;
112#ifdef CONFIG_EEH
113 edev = of_node_to_eeh_dev(dn);
114 if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
115 return PCIBIOS_DEVICE_NOT_FOUND;
116#endif
117 114
118 ret = rtas_read_config(pdn, where, size, val); 115 ret = rtas_read_config(pdn, where, size, val);
119 if (*val == EEH_IO_ERROR_VALUE(size) && 116 if (*val == EEH_IO_ERROR_VALUE(size) &&
@@ -132,6 +129,11 @@ int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
132 return PCIBIOS_DEVICE_NOT_FOUND; 129 return PCIBIOS_DEVICE_NOT_FOUND;
133 if (!config_access_valid(pdn, where)) 130 if (!config_access_valid(pdn, where))
134 return PCIBIOS_BAD_REGISTER_NUMBER; 131 return PCIBIOS_BAD_REGISTER_NUMBER;
132#ifdef CONFIG_EEH
133 if (pdn->edev && pdn->edev->pe &&
134 (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
135 return PCIBIOS_SET_FAILED;
136#endif
135 137
136 addr = rtas_config_addr(pdn->busno, pdn->devfn, where); 138 addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
137 buid = pdn->phb->buid; 139 buid = pdn->phb->buid;
@@ -155,10 +157,6 @@ static int rtas_pci_write_config(struct pci_bus *bus,
155 struct device_node *busdn, *dn; 157 struct device_node *busdn, *dn;
156 struct pci_dn *pdn; 158 struct pci_dn *pdn;
157 bool found = false; 159 bool found = false;
158#ifdef CONFIG_EEH
159 struct eeh_dev *edev;
160#endif
161 int ret;
162 160
163 /* Search only direct children of the bus */ 161 /* Search only direct children of the bus */
164 busdn = pci_bus_to_OF_node(bus); 162 busdn = pci_bus_to_OF_node(bus);
@@ -173,14 +171,8 @@ static int rtas_pci_write_config(struct pci_bus *bus,
173 171
174 if (!found) 172 if (!found)
175 return PCIBIOS_DEVICE_NOT_FOUND; 173 return PCIBIOS_DEVICE_NOT_FOUND;
176#ifdef CONFIG_EEH
177 edev = of_node_to_eeh_dev(dn);
178 if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
179 return PCIBIOS_DEVICE_NOT_FOUND;
180#endif
181 ret = rtas_write_config(pdn, where, size, val);
182 174
183 return ret; 175 return rtas_write_config(pdn, where, size, val);
184} 176}
185 177
186static struct pci_ops rtas_pci_ops = { 178static struct pci_ops rtas_pci_ops = {
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index cd07d79ad21c..4f3cfe1b6a33 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -522,36 +522,36 @@ void __init setup_system(void)
522 smp_release_cpus(); 522 smp_release_cpus();
523#endif 523#endif
524 524
525 printk("Starting Linux PPC64 %s\n", init_utsname()->version); 525 pr_info("Starting Linux PPC64 %s\n", init_utsname()->version);
526 526
527 printk("-----------------------------------------------------\n"); 527 pr_info("-----------------------------------------------------\n");
528 printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); 528 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
529 printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); 529 pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size());
530 530
531 if (ppc64_caches.dline_size != 0x80) 531 if (ppc64_caches.dline_size != 0x80)
532 printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); 532 pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size);
533 if (ppc64_caches.iline_size != 0x80) 533 if (ppc64_caches.iline_size != 0x80)
534 printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); 534 pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size);
535 535
536 printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); 536 pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
537 printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); 537 pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE);
538 printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); 538 pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS);
539 printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, 539 pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
540 cur_cpu_spec->cpu_user_features2); 540 cur_cpu_spec->cpu_user_features2);
541 printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); 541 pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
542 printk("firmware_features = 0x%016lx\n", powerpc_firmware_features); 542 pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
543 543
544#ifdef CONFIG_PPC_STD_MMU_64 544#ifdef CONFIG_PPC_STD_MMU_64
545 if (htab_address) 545 if (htab_address)
546 printk("htab_address = 0x%p\n", htab_address); 546 pr_info("htab_address = 0x%p\n", htab_address);
547 547
548 printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); 548 pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
549#endif 549#endif
550 550
551 if (PHYSICAL_START > 0) 551 if (PHYSICAL_START > 0)
552 printk("physical_start = 0x%llx\n", 552 pr_info("physical_start = 0x%llx\n",
553 (unsigned long long)PHYSICAL_START); 553 (unsigned long long)PHYSICAL_START);
554 printk("-----------------------------------------------------\n"); 554 pr_info("-----------------------------------------------------\n");
555 555
556 DBG(" <- setup_system()\n"); 556 DBG(" <- setup_system()\n");
557} 557}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 3d30ef1038e5..ea43a347a104 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -50,7 +50,7 @@ void save_stack_trace(struct stack_trace *trace)
50{ 50{
51 unsigned long sp; 51 unsigned long sp;
52 52
53 asm("mr %0,1" : "=r" (sp)); 53 sp = current_stack_pointer();
54 54
55 save_context_stack(trace, sp, current, 1); 55 save_context_stack(trace, sp, current, 1);
56} 56}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 649666d5d1c2..e5236c24dc07 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -8,6 +8,8 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11#define pr_fmt(fmt) "numa: " fmt
12
11#include <linux/threads.h> 13#include <linux/threads.h>
12#include <linux/bootmem.h> 14#include <linux/bootmem.h>
13#include <linux/init.h> 15#include <linux/init.h>
@@ -1153,6 +1155,22 @@ static int __init early_numa(char *p)
1153} 1155}
1154early_param("numa", early_numa); 1156early_param("numa", early_numa);
1155 1157
1158static bool topology_updates_enabled = true;
1159
1160static int __init early_topology_updates(char *p)
1161{
1162 if (!p)
1163 return 0;
1164
1165 if (!strcmp(p, "off")) {
1166 pr_info("Disabling topology updates\n");
1167 topology_updates_enabled = false;
1168 }
1169
1170 return 0;
1171}
1172early_param("topology_updates", early_topology_updates);
1173
1156#ifdef CONFIG_MEMORY_HOTPLUG 1174#ifdef CONFIG_MEMORY_HOTPLUG
1157/* 1175/*
1158 * Find the node associated with a hot added memory section for 1176 * Find the node associated with a hot added memory section for
@@ -1442,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
1442 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 1460 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
1443 u64 flags = 1; 1461 u64 flags = 1;
1444 int hwcpu = get_hard_smp_processor_id(cpu); 1462 int hwcpu = get_hard_smp_processor_id(cpu);
1463 int i;
1445 1464
1446 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); 1465 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
1466 for (i = 0; i < 6; i++)
1467 retbuf[i] = cpu_to_be64(retbuf[i]);
1447 vphn_unpack_associativity(retbuf, associativity); 1468 vphn_unpack_associativity(retbuf, associativity);
1448 1469
1449 return rc; 1470 return rc;
@@ -1539,6 +1560,9 @@ int arch_update_cpu_topology(void)
1539 struct device *dev; 1560 struct device *dev;
1540 int weight, new_nid, i = 0; 1561 int weight, new_nid, i = 0;
1541 1562
1563 if (!prrn_enabled && !vphn_enabled)
1564 return 0;
1565
1542 weight = cpumask_weight(&cpu_associativity_changes_mask); 1566 weight = cpumask_weight(&cpu_associativity_changes_mask);
1543 if (!weight) 1567 if (!weight)
1544 return 0; 1568 return 0;
@@ -1592,6 +1616,15 @@ int arch_update_cpu_topology(void)
1592 cpu = cpu_last_thread_sibling(cpu); 1616 cpu = cpu_last_thread_sibling(cpu);
1593 } 1617 }
1594 1618
1619 pr_debug("Topology update for the following CPUs:\n");
1620 if (cpumask_weight(&updated_cpus)) {
1621 for (ud = &updates[0]; ud; ud = ud->next) {
1622 pr_debug("cpu %d moving from node %d "
1623 "to %d\n", ud->cpu,
1624 ud->old_nid, ud->new_nid);
1625 }
1626 }
1627
1595 /* 1628 /*
1596 * In cases where we have nothing to update (because the updates list 1629 * In cases where we have nothing to update (because the updates list
1597 * is too short or because the new topology is same as the old one), 1630 * is too short or because the new topology is same as the old one),
@@ -1800,8 +1833,12 @@ static const struct file_operations topology_ops = {
1800 1833
1801static int topology_update_init(void) 1834static int topology_update_init(void)
1802{ 1835{
1803 start_topology_update(); 1836 /* Do not poll for changes if disabled at boot */
1804 proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops); 1837 if (topology_updates_enabled)
1838 start_topology_update();
1839
1840 if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
1841 return -ENOMEM;
1805 1842
1806 return 0; 1843 return 0;
1807} 1844}
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 426814a2ede3..eba9cb10619c 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -373,7 +373,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
373 * moving forward, we have to return operational 373 * moving forward, we have to return operational
374 * state during PE reset. 374 * state during PE reset.
375 */ 375 */
376 if (pe->state & EEH_PE_RESET) { 376 if (pe->state & EEH_PE_CFG_BLOCKED) {
377 result = (EEH_STATE_MMIO_ACTIVE | 377 result = (EEH_STATE_MMIO_ACTIVE |
378 EEH_STATE_DMA_ACTIVE | 378 EEH_STATE_DMA_ACTIVE |
379 EEH_STATE_MMIO_ENABLED | 379 EEH_STATE_MMIO_ENABLED |
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3e89cbf55885..1d19e7917d7f 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -169,6 +169,26 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
169 } 169 }
170 170
171 /* 171 /*
172 * If the PE contains any one of following adapters, the
173 * PCI config space can't be accessed when dumping EEH log.
174 * Otherwise, we will run into fenced PHB caused by shortage
175 * of outbound credits in the adapter. The PCI config access
176 * should be blocked until PE reset. MMIO access is dropped
177 * by hardware certainly. In order to drop PCI config requests,
178 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
179 * will be checked in the backend for PE state retrival. If
180 * the PE becomes frozen for the first time and the flag has
181 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
182 * that PE to block its config space.
183 *
184 * Broadcom Austin 4-ports NICs (14e4:1657)
185 * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
186 */
187 if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
188 (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
189 edev->pe->state |= EEH_PE_CFG_RESTRICTED;
190
191 /*
172 * Cache the PE primary bus, which can't be fetched when 192 * Cache the PE primary bus, which can't be fetched when
173 * full hotplug is in progress. In that case, all child 193 * full hotplug is in progress. In that case, all child
174 * PCI devices of the PE are expected to be removed prior 194 * PCI devices of the PE are expected to be removed prior
@@ -383,6 +403,39 @@ static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
383 return ret; 403 return ret;
384} 404}
385 405
406static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
407{
408 struct eeh_dev *edev = of_node_to_eeh_dev(dn);
409
410 if (!edev || !edev->pe)
411 return false;
412
413 if (edev->pe->state & EEH_PE_CFG_BLOCKED)
414 return true;
415
416 return false;
417}
418
419static int powernv_eeh_read_config(struct device_node *dn,
420 int where, int size, u32 *val)
421{
422 if (powernv_eeh_cfg_blocked(dn)) {
423 *val = 0xFFFFFFFF;
424 return PCIBIOS_SET_FAILED;
425 }
426
427 return pnv_pci_cfg_read(dn, where, size, val);
428}
429
430static int powernv_eeh_write_config(struct device_node *dn,
431 int where, int size, u32 val)
432{
433 if (powernv_eeh_cfg_blocked(dn))
434 return PCIBIOS_SET_FAILED;
435
436 return pnv_pci_cfg_write(dn, where, size, val);
437}
438
386/** 439/**
387 * powernv_eeh_next_error - Retrieve next EEH error to handle 440 * powernv_eeh_next_error - Retrieve next EEH error to handle
388 * @pe: Affected PE 441 * @pe: Affected PE
@@ -440,8 +493,8 @@ static struct eeh_ops powernv_eeh_ops = {
440 .get_log = powernv_eeh_get_log, 493 .get_log = powernv_eeh_get_log,
441 .configure_bridge = powernv_eeh_configure_bridge, 494 .configure_bridge = powernv_eeh_configure_bridge,
442 .err_inject = powernv_eeh_err_inject, 495 .err_inject = powernv_eeh_err_inject,
443 .read_config = pnv_pci_cfg_read, 496 .read_config = powernv_eeh_read_config,
444 .write_config = pnv_pci_cfg_write, 497 .write_config = powernv_eeh_write_config,
445 .next_error = powernv_eeh_next_error, 498 .next_error = powernv_eeh_next_error,
446 .restore_config = powernv_eeh_restore_config 499 .restore_config = powernv_eeh_restore_config
447}; 500};
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index b642b0562f5a..d019b081df9d 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -194,6 +194,27 @@ static int __init opal_register_exception_handlers(void)
194 * fwnmi area at 0x7000 to provide the glue space to OPAL 194 * fwnmi area at 0x7000 to provide the glue space to OPAL
195 */ 195 */
196 glue = 0x7000; 196 glue = 0x7000;
197
198 /*
199 * Check if we are running on newer firmware that exports
200 * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
201 * the HMI interrupt and we catch it directly in Linux.
202 *
203 * For older firmware (i.e currently released POWER8 System Firmware
204 * as of today <= SV810_087), we fallback to old behavior and let OPAL
205 * patch the HMI vector and handle it inside OPAL firmware.
206 *
207 * For newer firmware (in development/yet to be released) we will
208 * start catching/handling HMI directly in Linux.
209 */
210 if (!opal_check_token(OPAL_HANDLE_HMI)) {
211 pr_info("opal: Old firmware detected, OPAL handles HMIs.\n");
212 opal_register_exception_handler(
213 OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
214 0, glue);
215 glue += 128;
216 }
217
197 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); 218 opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
198#endif 219#endif
199 220
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b3ca77ddf36d..b2187d0068b8 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -505,7 +505,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
505 edev = of_node_to_eeh_dev(dn); 505 edev = of_node_to_eeh_dev(dn);
506 if (edev) { 506 if (edev) {
507 if (edev->pe && 507 if (edev->pe &&
508 (edev->pe->state & EEH_PE_RESET)) 508 (edev->pe->state & EEH_PE_CFG_BLOCKED))
509 return false; 509 return false;
510 510
511 if (edev->mode & EEH_DEV_REMOVED) 511 if (edev->mode & EEH_DEV_REMOVED)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index fdf01b660d59..6ad83bd11fe2 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -25,11 +25,11 @@
25#include <asm/rtas.h> 25#include <asm/rtas.h>
26 26
27struct cc_workarea { 27struct cc_workarea {
28 u32 drc_index; 28 __be32 drc_index;
29 u32 zero; 29 __be32 zero;
30 u32 name_offset; 30 __be32 name_offset;
31 u32 prop_length; 31 __be32 prop_length;
32 u32 prop_offset; 32 __be32 prop_offset;
33}; 33};
34 34
35void dlpar_free_cc_property(struct property *prop) 35void dlpar_free_cc_property(struct property *prop)
@@ -49,11 +49,11 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
49 if (!prop) 49 if (!prop)
50 return NULL; 50 return NULL;
51 51
52 name = (char *)ccwa + ccwa->name_offset; 52 name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
53 prop->name = kstrdup(name, GFP_KERNEL); 53 prop->name = kstrdup(name, GFP_KERNEL);
54 54
55 prop->length = ccwa->prop_length; 55 prop->length = be32_to_cpu(ccwa->prop_length);
56 value = (char *)ccwa + ccwa->prop_offset; 56 value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
57 prop->value = kmemdup(value, prop->length, GFP_KERNEL); 57 prop->value = kmemdup(value, prop->length, GFP_KERNEL);
58 if (!prop->value) { 58 if (!prop->value) {
59 dlpar_free_cc_property(prop); 59 dlpar_free_cc_property(prop);
@@ -79,7 +79,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa,
79 if (!dn) 79 if (!dn)
80 return NULL; 80 return NULL;
81 81
82 name = (char *)ccwa + ccwa->name_offset; 82 name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
83 dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name); 83 dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name);
84 if (!dn->full_name) { 84 if (!dn->full_name) {
85 kfree(dn); 85 kfree(dn);
@@ -126,7 +126,7 @@ void dlpar_free_cc_nodes(struct device_node *dn)
126#define CALL_AGAIN -2 126#define CALL_AGAIN -2
127#define ERR_CFG_USE -9003 127#define ERR_CFG_USE -9003
128 128
129struct device_node *dlpar_configure_connector(u32 drc_index, 129struct device_node *dlpar_configure_connector(__be32 drc_index,
130 struct device_node *parent) 130 struct device_node *parent)
131{ 131{
132 struct device_node *dn; 132 struct device_node *dn;
@@ -414,7 +414,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
414 if (!parent) 414 if (!parent)
415 return -ENODEV; 415 return -ENODEV;
416 416
417 dn = dlpar_configure_connector(drc_index, parent); 417 dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
418 if (!dn) 418 if (!dn)
419 return -EINVAL; 419 return -EINVAL;
420 420
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index b174fa751d26..5c375f93c669 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -247,7 +247,7 @@ static int pseries_add_processor(struct device_node *np)
247 unsigned int cpu; 247 unsigned int cpu;
248 cpumask_var_t candidate_mask, tmp; 248 cpumask_var_t candidate_mask, tmp;
249 int err = -ENOSPC, len, nthreads, i; 249 int err = -ENOSPC, len, nthreads, i;
250 const u32 *intserv; 250 const __be32 *intserv;
251 251
252 intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); 252 intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
253 if (!intserv) 253 if (!intserv)
@@ -293,7 +293,7 @@ static int pseries_add_processor(struct device_node *np)
293 for_each_cpu(cpu, tmp) { 293 for_each_cpu(cpu, tmp) {
294 BUG_ON(cpu_present(cpu)); 294 BUG_ON(cpu_present(cpu));
295 set_cpu_present(cpu, true); 295 set_cpu_present(cpu, true);
296 set_hard_smp_processor_id(cpu, *intserv++); 296 set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
297 } 297 }
298 err = 0; 298 err = 0;
299out_unlock: 299out_unlock:
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index de1ec54a2a57..e32e00976a94 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -30,7 +30,6 @@
30#include <linux/mm.h> 30#include <linux/mm.h>
31#include <linux/memblock.h> 31#include <linux/memblock.h>
32#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33#include <linux/sched.h> /* for show_stack */
34#include <linux/string.h> 33#include <linux/string.h>
35#include <linux/pci.h> 34#include <linux/pci.h>
36#include <linux/dma-mapping.h> 35#include <linux/dma-mapping.h>
@@ -168,7 +167,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
168 printk("\tindex = 0x%llx\n", (u64)tbl->it_index); 167 printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
169 printk("\ttcenum = 0x%llx\n", (u64)tcenum); 168 printk("\ttcenum = 0x%llx\n", (u64)tcenum);
170 printk("\ttce val = 0x%llx\n", tce ); 169 printk("\ttce val = 0x%llx\n", tce );
171 show_stack(current, (unsigned long *)__get_SP()); 170 dump_stack();
172 } 171 }
173 172
174 tcenum++; 173 tcenum++;
@@ -257,7 +256,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
257 printk("\tindex = 0x%llx\n", (u64)tbl->it_index); 256 printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
258 printk("\tnpages = 0x%llx\n", (u64)npages); 257 printk("\tnpages = 0x%llx\n", (u64)npages);
259 printk("\ttce[0] val = 0x%llx\n", tcep[0]); 258 printk("\ttce[0] val = 0x%llx\n", tcep[0]);
260 show_stack(current, (unsigned long *)__get_SP()); 259 dump_stack();
261 } 260 }
262 return ret; 261 return ret;
263} 262}
@@ -273,7 +272,7 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages
273 printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); 272 printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
274 printk("\tindex = 0x%llx\n", (u64)tbl->it_index); 273 printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
275 printk("\ttcenum = 0x%llx\n", (u64)tcenum); 274 printk("\ttcenum = 0x%llx\n", (u64)tcenum);
276 show_stack(current, (unsigned long *)__get_SP()); 275 dump_stack();
277 } 276 }
278 277
279 tcenum++; 278 tcenum++;
@@ -292,7 +291,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
292 printk("\trc = %lld\n", rc); 291 printk("\trc = %lld\n", rc);
293 printk("\tindex = 0x%llx\n", (u64)tbl->it_index); 292 printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
294 printk("\tnpages = 0x%llx\n", (u64)npages); 293 printk("\tnpages = 0x%llx\n", (u64)npages);
295 show_stack(current, (unsigned long *)__get_SP()); 294 dump_stack();
296 } 295 }
297} 296}
298 297
@@ -307,7 +306,7 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
307 printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); 306 printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
308 printk("\tindex = 0x%llx\n", (u64)tbl->it_index); 307 printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
309 printk("\ttcenum = 0x%llx\n", (u64)tcenum); 308 printk("\ttcenum = 0x%llx\n", (u64)tcenum);
310 show_stack(current, (unsigned long *)__get_SP()); 309 dump_stack();
311 } 310 }
312 311
313 return tce_ret; 312 return tce_ret;
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 361add62abf1..1796c5438cc6 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -56,7 +56,8 @@ extern void hvc_vio_init_early(void);
56/* Dynamic logical Partitioning/Mobility */ 56/* Dynamic logical Partitioning/Mobility */
57extern void dlpar_free_cc_nodes(struct device_node *); 57extern void dlpar_free_cc_nodes(struct device_node *);
58extern void dlpar_free_cc_property(struct property *); 58extern void dlpar_free_cc_property(struct property *);
59extern struct device_node *dlpar_configure_connector(u32, struct device_node *); 59extern struct device_node *dlpar_configure_connector(__be32,
60 struct device_node *);
60extern int dlpar_attach_node(struct device_node *); 61extern int dlpar_attach_node(struct device_node *);
61extern int dlpar_detach_node(struct device_node *); 62extern int dlpar_detach_node(struct device_node *);
62 63
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 0c75214b6f92..73b64c73505b 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -145,59 +145,64 @@ void msi_bitmap_free(struct msi_bitmap *bmp)
145 145
146#ifdef CONFIG_MSI_BITMAP_SELFTEST 146#ifdef CONFIG_MSI_BITMAP_SELFTEST
147 147
148#define check(x) \
149 if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__);
150
151static void __init test_basics(void) 148static void __init test_basics(void)
152{ 149{
153 struct msi_bitmap bmp; 150 struct msi_bitmap bmp;
154 int i, size = 512; 151 int rc, i, size = 512;
155 152
156 /* Can't allocate a bitmap of 0 irqs */ 153 /* Can't allocate a bitmap of 0 irqs */
157 check(msi_bitmap_alloc(&bmp, 0, NULL) != 0); 154 WARN_ON(msi_bitmap_alloc(&bmp, 0, NULL) == 0);
158 155
159 /* of_node may be NULL */ 156 /* of_node may be NULL */
160 check(0 == msi_bitmap_alloc(&bmp, size, NULL)); 157 WARN_ON(msi_bitmap_alloc(&bmp, size, NULL));
161 158
162 /* Should all be free by default */ 159 /* Should all be free by default */
163 check(0 == bitmap_find_free_region(bmp.bitmap, size, 160 WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
164 get_count_order(size)));
165 bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); 161 bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
166 162
167 /* With no node, there's no msi-available-ranges, so expect > 0 */ 163 /* With no node, there's no msi-available-ranges, so expect > 0 */
168 check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0); 164 WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
169 165
170 /* Should all still be free */ 166 /* Should all still be free */
171 check(0 == bitmap_find_free_region(bmp.bitmap, size, 167 WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
172 get_count_order(size)));
173 bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); 168 bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
174 169
175 /* Check we can fill it up and then no more */ 170 /* Check we can fill it up and then no more */
176 for (i = 0; i < size; i++) 171 for (i = 0; i < size; i++)
177 check(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0); 172 WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
178 173
179 check(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0); 174 WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
180 175
181 /* Should all be allocated */ 176 /* Should all be allocated */
182 check(bitmap_find_free_region(bmp.bitmap, size, 0) < 0); 177 WARN_ON(bitmap_find_free_region(bmp.bitmap, size, 0) >= 0);
183 178
184 /* And if we free one we can then allocate another */ 179 /* And if we free one we can then allocate another */
185 msi_bitmap_free_hwirqs(&bmp, size / 2, 1); 180 msi_bitmap_free_hwirqs(&bmp, size / 2, 1);
186 check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2); 181 WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) != size / 2);
182
183 /* Free most of them for the alignment tests */
184 msi_bitmap_free_hwirqs(&bmp, 3, size - 3);
187 185
188 /* Check we get a naturally aligned offset */ 186 /* Check we get a naturally aligned offset */
189 check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0); 187 rc = msi_bitmap_alloc_hwirqs(&bmp, 2);
190 check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0); 188 WARN_ON(rc < 0 && rc % 2 != 0);
191 check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0); 189 rc = msi_bitmap_alloc_hwirqs(&bmp, 4);
192 check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0); 190 WARN_ON(rc < 0 && rc % 4 != 0);
193 check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0); 191 rc = msi_bitmap_alloc_hwirqs(&bmp, 8);
194 check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0); 192 WARN_ON(rc < 0 && rc % 8 != 0);
195 check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0); 193 rc = msi_bitmap_alloc_hwirqs(&bmp, 9);
194 WARN_ON(rc < 0 && rc % 16 != 0);
195 rc = msi_bitmap_alloc_hwirqs(&bmp, 3);
196 WARN_ON(rc < 0 && rc % 4 != 0);
197 rc = msi_bitmap_alloc_hwirqs(&bmp, 7);
198 WARN_ON(rc < 0 && rc % 8 != 0);
199 rc = msi_bitmap_alloc_hwirqs(&bmp, 121);
200 WARN_ON(rc < 0 && rc % 128 != 0);
196 201
197 msi_bitmap_free(&bmp); 202 msi_bitmap_free(&bmp);
198 203
199 /* Clients may check bitmap == NULL for "not-allocated" */ 204 /* Clients may WARN_ON bitmap == NULL for "not-allocated" */
200 check(bmp.bitmap == NULL); 205 WARN_ON(bmp.bitmap != NULL);
201 206
202 kfree(bmp.bitmap); 207 kfree(bmp.bitmap);
203} 208}
@@ -219,14 +224,13 @@ static void __init test_of_node(void)
219 of_node_init(&of_node); 224 of_node_init(&of_node);
220 of_node.full_name = node_name; 225 of_node.full_name = node_name;
221 226
222 check(0 == msi_bitmap_alloc(&bmp, size, &of_node)); 227 WARN_ON(msi_bitmap_alloc(&bmp, size, &of_node));
223 228
224 /* No msi-available-ranges, so expect > 0 */ 229 /* No msi-available-ranges, so expect > 0 */
225 check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0); 230 WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
226 231
227 /* Should all still be free */ 232 /* Should all still be free */
228 check(0 == bitmap_find_free_region(bmp.bitmap, size, 233 WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
229 get_count_order(size)));
230 bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); 234 bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
231 235
232 /* Now create a fake msi-available-ranges property */ 236 /* Now create a fake msi-available-ranges property */
@@ -240,11 +244,11 @@ static void __init test_of_node(void)
240 of_node.properties = &prop; 244 of_node.properties = &prop;
241 245
242 /* msi-available-ranges, so expect == 0 */ 246 /* msi-available-ranges, so expect == 0 */
243 check(msi_bitmap_reserve_dt_hwirqs(&bmp) == 0); 247 WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp));
244 248
245 /* Check we got the expected result */ 249 /* Check we got the expected result */
246 check(0 == bitmap_parselist(expected_str, expected, size)); 250 WARN_ON(bitmap_parselist(expected_str, expected, size));
247 check(bitmap_equal(expected, bmp.bitmap, size)); 251 WARN_ON(!bitmap_equal(expected, bmp.bitmap, size));
248 252
249 msi_bitmap_free(&bmp); 253 msi_bitmap_free(&bmp);
250 kfree(bmp.bitmap); 254 kfree(bmp.bitmap);
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 940ac49198db..4197c89c52d4 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -286,7 +286,8 @@
286#define __NR_seccomp 348 286#define __NR_seccomp 348
287#define __NR_getrandom 349 287#define __NR_getrandom 349
288#define __NR_memfd_create 350 288#define __NR_memfd_create 350
289#define NR_syscalls 351 289#define __NR_bpf 351
290#define NR_syscalls 352
290 291
291/* 292/*
292 * There are some system calls that are not present on 64 bit, some 293 * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index faf6caa510dc..c4f7a3d655b8 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -217,3 +217,4 @@ COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int,
217COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs) 217COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs)
218COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) 218COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
219COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags) 219COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
220COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 6fe886ac2db5..9f7087fd58de 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -359,3 +359,4 @@ SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
359SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp) 359SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
360SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom) 360SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
361SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */ 361SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
362SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf)
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index 956f4f7a591c..f6b3cd056ec2 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -5,13 +5,13 @@
5 * Author(s): Jan Willeke, 5 * Author(s): Jan Willeke,
6 */ 6 */
7 7
8#include <linux/kprobes.h>
9#include <linux/uaccess.h> 8#include <linux/uaccess.h>
10#include <linux/uprobes.h> 9#include <linux/uprobes.h>
11#include <linux/compat.h> 10#include <linux/compat.h>
12#include <linux/kdebug.h> 11#include <linux/kdebug.h>
13#include <asm/switch_to.h> 12#include <asm/switch_to.h>
14#include <asm/facility.h> 13#include <asm/facility.h>
14#include <asm/kprobes.h>
15#include <asm/dis.h> 15#include <asm/dis.h>
16#include "entry.h" 16#include "entry.h"
17 17
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
index c5d64a099719..ae90e1ae3607 100644
--- a/arch/s390/lib/probes.c
+++ b/arch/s390/lib/probes.c
@@ -4,7 +4,7 @@
4 * Copyright IBM Corp. 2014 4 * Copyright IBM Corp. 2014
5 */ 5 */
6 6
7#include <linux/kprobes.h> 7#include <asm/kprobes.h>
8#include <asm/dis.h> 8#include <asm/dis.h>
9 9
10int probe_is_prohibited_opcode(u16 *insn) 10int probe_is_prohibited_opcode(u16 *insn)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 296b61a4af59..1b79ca67392f 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -656,7 +656,7 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
656 } 656 }
657 pgste_set_unlock(ptep, pgste); 657 pgste_set_unlock(ptep, pgste);
658out_pte: 658out_pte:
659 pte_unmap_unlock(*ptep, ptl); 659 pte_unmap_unlock(ptep, ptl);
660} 660}
661EXPORT_SYMBOL_GPL(__gmap_zap); 661EXPORT_SYMBOL_GPL(__gmap_zap);
662 662
@@ -943,7 +943,7 @@ retry:
943 } 943 }
944 if (!(pte_val(*ptep) & _PAGE_INVALID) && 944 if (!(pte_val(*ptep) & _PAGE_INVALID) &&
945 (pte_val(*ptep) & _PAGE_PROTECT)) { 945 (pte_val(*ptep) & _PAGE_PROTECT)) {
946 pte_unmap_unlock(*ptep, ptl); 946 pte_unmap_unlock(ptep, ptl);
947 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { 947 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
948 up_read(&mm->mmap_sem); 948 up_read(&mm->mmap_sem);
949 return -EFAULT; 949 return -EFAULT;
@@ -974,7 +974,7 @@ retry:
974 pgste_val(new) |= PGSTE_UC_BIT; 974 pgste_val(new) |= PGSTE_UC_BIT;
975 975
976 pgste_set_unlock(ptep, new); 976 pgste_set_unlock(ptep, new);
977 pte_unmap_unlock(*ptep, ptl); 977 pte_unmap_unlock(ptep, ptl);
978 up_read(&mm->mmap_sem); 978 up_read(&mm->mmap_sem);
979 return 0; 979 return 0;
980} 980}
diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h
index f34682430fcf..2e3a4add8591 100644
--- a/arch/sparc/include/asm/oplib_64.h
+++ b/arch/sparc/include/asm/oplib_64.h
@@ -62,7 +62,8 @@ struct linux_mem_p1275 {
62/* You must call prom_init() before using any of the library services, 62/* You must call prom_init() before using any of the library services,
63 * preferably as early as possible. Pass it the romvec pointer. 63 * preferably as early as possible. Pass it the romvec pointer.
64 */ 64 */
65void prom_init(void *cif_handler, void *cif_stack); 65void prom_init(void *cif_handler);
66void prom_init_report(void);
66 67
67/* Boot argument acquisition, returns the boot command line string. */ 68/* Boot argument acquisition, returns the boot command line string. */
68char *prom_getbootargs(void); 69char *prom_getbootargs(void);
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index f5fffd84d0dd..29d64b1758ed 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -48,6 +48,8 @@ unsigned long safe_compute_effective_address(struct pt_regs *, unsigned int);
48#endif 48#endif
49 49
50#ifdef CONFIG_SPARC64 50#ifdef CONFIG_SPARC64
51void __init start_early_boot(void);
52
51/* unaligned_64.c */ 53/* unaligned_64.c */
52int handle_ldf_stq(u32 insn, struct pt_regs *regs); 54int handle_ldf_stq(u32 insn, struct pt_regs *regs);
53void handle_ld_nf(u32 insn, struct pt_regs *regs); 55void handle_ld_nf(u32 insn, struct pt_regs *regs);
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index ebaba6167dd4..88d322b67fac 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -65,13 +65,10 @@ struct pause_patch_entry {
65extern struct pause_patch_entry __pause_3insn_patch, 65extern struct pause_patch_entry __pause_3insn_patch,
66 __pause_3insn_patch_end; 66 __pause_3insn_patch_end;
67 67
68void __init per_cpu_patch(void);
69void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, 68void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
70 struct sun4v_1insn_patch_entry *); 69 struct sun4v_1insn_patch_entry *);
71void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, 70void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
72 struct sun4v_2insn_patch_entry *); 71 struct sun4v_2insn_patch_entry *);
73void __init sun4v_patch(void);
74void __init boot_cpu_id_too_large(int cpu);
75extern unsigned int dcache_parity_tl1_occurred; 72extern unsigned int dcache_parity_tl1_occurred;
76extern unsigned int icache_parity_tl1_occurred; 73extern unsigned int icache_parity_tl1_occurred;
77 74
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4fdeb8040d4d..3d61fcae7ee3 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -672,14 +672,12 @@ tlb_fixup_done:
672 sethi %hi(init_thread_union), %g6 672 sethi %hi(init_thread_union), %g6
673 or %g6, %lo(init_thread_union), %g6 673 or %g6, %lo(init_thread_union), %g6
674 ldx [%g6 + TI_TASK], %g4 674 ldx [%g6 + TI_TASK], %g4
675 mov %sp, %l6
676 675
677 wr %g0, ASI_P, %asi 676 wr %g0, ASI_P, %asi
678 mov 1, %g1 677 mov 1, %g1
679 sllx %g1, THREAD_SHIFT, %g1 678 sllx %g1, THREAD_SHIFT, %g1
680 sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 679 sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
681 add %g6, %g1, %sp 680 add %g6, %g1, %sp
682 mov 0, %fp
683 681
684 /* Set per-cpu pointer initially to zero, this makes 682 /* Set per-cpu pointer initially to zero, this makes
685 * the boot-cpu use the in-kernel-image per-cpu areas 683 * the boot-cpu use the in-kernel-image per-cpu areas
@@ -706,44 +704,14 @@ tlb_fixup_done:
706 nop 704 nop
707#endif 705#endif
708 706
709 mov %l6, %o1 ! OpenPROM stack
710 call prom_init 707 call prom_init
711 mov %l7, %o0 ! OpenPROM cif handler 708 mov %l7, %o0 ! OpenPROM cif handler
712 709
713 /* Initialize current_thread_info()->cpu as early as possible. 710 /* To create a one-register-window buffer between the kernel's
714 * In order to do that accurately we have to patch up the get_cpuid() 711 * initial stack and the last stack frame we use from the firmware,
715 * assembler sequences. And that, in turn, requires that we know 712 * do the rest of the boot from a C helper function.
716 * if we are on a Starfire box or not. While we're here, patch up
717 * the sun4v sequences as well.
718 */ 713 */
719 call check_if_starfire 714 call start_early_boot
720 nop
721 call per_cpu_patch
722 nop
723 call sun4v_patch
724 nop
725
726#ifdef CONFIG_SMP
727 call hard_smp_processor_id
728 nop
729 cmp %o0, NR_CPUS
730 blu,pt %xcc, 1f
731 nop
732 call boot_cpu_id_too_large
733 nop
734 /* Not reached... */
735
7361:
737#else
738 mov 0, %o0
739#endif
740 sth %o0, [%g6 + TI_CPU]
741
742 call prom_init_report
743 nop
744
745 /* Off we go.... */
746 call start_kernel
747 nop 715 nop
748 /* Not reached... */ 716 /* Not reached... */
749 717
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
index b7ddcdd1dea9..cdbfec299f2f 100644
--- a/arch/sparc/kernel/hvtramp.S
+++ b/arch/sparc/kernel/hvtramp.S
@@ -109,7 +109,6 @@ hv_cpu_startup:
109 sllx %g5, THREAD_SHIFT, %g5 109 sllx %g5, THREAD_SHIFT, %g5
110 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 110 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
111 add %g6, %g5, %sp 111 add %g6, %g5, %sp
112 mov 0, %fp
113 112
114 call init_irqwork_curcpu 113 call init_irqwork_curcpu
115 nop 114 nop
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index e629b8377587..c38d19fc27ba 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -30,6 +30,7 @@
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/initrd.h> 31#include <linux/initrd.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/start_kernel.h>
33 34
34#include <asm/io.h> 35#include <asm/io.h>
35#include <asm/processor.h> 36#include <asm/processor.h>
@@ -162,7 +163,7 @@ char reboot_command[COMMAND_LINE_SIZE];
162 163
163static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; 164static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
164 165
165void __init per_cpu_patch(void) 166static void __init per_cpu_patch(void)
166{ 167{
167 struct cpuid_patch_entry *p; 168 struct cpuid_patch_entry *p;
168 unsigned long ver; 169 unsigned long ver;
@@ -254,7 +255,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
254 } 255 }
255} 256}
256 257
257void __init sun4v_patch(void) 258static void __init sun4v_patch(void)
258{ 259{
259 extern void sun4v_hvapi_init(void); 260 extern void sun4v_hvapi_init(void);
260 261
@@ -323,14 +324,25 @@ static void __init pause_patch(void)
323 } 324 }
324} 325}
325 326
326#ifdef CONFIG_SMP 327void __init start_early_boot(void)
327void __init boot_cpu_id_too_large(int cpu)
328{ 328{
329 prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", 329 int cpu;
330 cpu, NR_CPUS); 330
331 prom_halt(); 331 check_if_starfire();
332 per_cpu_patch();
333 sun4v_patch();
334
335 cpu = hard_smp_processor_id();
336 if (cpu >= NR_CPUS) {
337 prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
338 cpu, NR_CPUS);
339 prom_halt();
340 }
341 current_thread_info()->cpu = cpu;
342
343 prom_init_report();
344 start_kernel();
332} 345}
333#endif
334 346
335/* On Ultra, we support all of the v8 capabilities. */ 347/* On Ultra, we support all of the v8 capabilities. */
336unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | 348unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S
index 737f8cbc7d56..88ede1d53b4c 100644
--- a/arch/sparc/kernel/trampoline_64.S
+++ b/arch/sparc/kernel/trampoline_64.S
@@ -109,10 +109,13 @@ startup_continue:
109 brnz,pn %g1, 1b 109 brnz,pn %g1, 1b
110 nop 110 nop
111 111
112 sethi %hi(p1275buf), %g2 112 /* Get onto temporary stack which will be in the locked
113 or %g2, %lo(p1275buf), %g2 113 * kernel image.
114 ldx [%g2 + 0x10], %l2 114 */
115 add %l2, -(192 + 128), %sp 115 sethi %hi(tramp_stack), %g1
116 or %g1, %lo(tramp_stack), %g1
117 add %g1, TRAMP_STACK_SIZE, %g1
118 sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
116 flushw 119 flushw
117 120
118 /* Setup the loop variables: 121 /* Setup the loop variables:
@@ -394,7 +397,6 @@ after_lock_tlb:
394 sllx %g5, THREAD_SHIFT, %g5 397 sllx %g5, THREAD_SHIFT, %g5
395 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 398 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
396 add %g6, %g5, %sp 399 add %g6, %g5, %sp
397 mov 0, %fp
398 400
399 rdpr %pstate, %o1 401 rdpr %pstate, %o1
400 or %o1, PSTATE_IE, %o1 402 or %o1, PSTATE_IE, %o1
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 1aed0432c64b..ae6ce383d4df 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -160,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
160 return 1; 160 return 1;
161} 161}
162 162
163int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
164 struct page **pages)
165{
166 struct mm_struct *mm = current->mm;
167 unsigned long addr, len, end;
168 unsigned long next, flags;
169 pgd_t *pgdp;
170 int nr = 0;
171
172 start &= PAGE_MASK;
173 addr = start;
174 len = (unsigned long) nr_pages << PAGE_SHIFT;
175 end = start + len;
176
177 local_irq_save(flags);
178 pgdp = pgd_offset(mm, addr);
179 do {
180 pgd_t pgd = *pgdp;
181
182 next = pgd_addr_end(addr, end);
183 if (pgd_none(pgd))
184 break;
185 if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
186 break;
187 } while (pgdp++, addr = next, addr != end);
188 local_irq_restore(flags);
189
190 return nr;
191}
192
163int get_user_pages_fast(unsigned long start, int nr_pages, int write, 193int get_user_pages_fast(unsigned long start, int nr_pages, int write,
164 struct page **pages) 194 struct page **pages)
165{ 195{
diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S
index 9c86b4b7d429..8050f381f518 100644
--- a/arch/sparc/prom/cif.S
+++ b/arch/sparc/prom/cif.S
@@ -11,11 +11,10 @@
11 .text 11 .text
12 .globl prom_cif_direct 12 .globl prom_cif_direct
13prom_cif_direct: 13prom_cif_direct:
14 save %sp, -192, %sp
14 sethi %hi(p1275buf), %o1 15 sethi %hi(p1275buf), %o1
15 or %o1, %lo(p1275buf), %o1 16 or %o1, %lo(p1275buf), %o1
16 ldx [%o1 + 0x0010], %o2 ! prom_cif_stack 17 ldx [%o1 + 0x0008], %l2 ! prom_cif_handler
17 save %o2, -192, %sp
18 ldx [%i1 + 0x0008], %l2 ! prom_cif_handler
19 mov %g4, %l0 18 mov %g4, %l0
20 mov %g5, %l1 19 mov %g5, %l1
21 mov %g6, %l3 20 mov %g6, %l3
diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c
index d95db755828f..110b0d78b864 100644
--- a/arch/sparc/prom/init_64.c
+++ b/arch/sparc/prom/init_64.c
@@ -26,13 +26,13 @@ phandle prom_chosen_node;
26 * It gets passed the pointer to the PROM vector. 26 * It gets passed the pointer to the PROM vector.
27 */ 27 */
28 28
29extern void prom_cif_init(void *, void *); 29extern void prom_cif_init(void *);
30 30
31void __init prom_init(void *cif_handler, void *cif_stack) 31void __init prom_init(void *cif_handler)
32{ 32{
33 phandle node; 33 phandle node;
34 34
35 prom_cif_init(cif_handler, cif_stack); 35 prom_cif_init(cif_handler);
36 36
37 prom_chosen_node = prom_finddevice(prom_chosen_path); 37 prom_chosen_node = prom_finddevice(prom_chosen_path);
38 if (!prom_chosen_node || (s32)prom_chosen_node == -1) 38 if (!prom_chosen_node || (s32)prom_chosen_node == -1)
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
index b2340f008ae0..545d8bb79b65 100644
--- a/arch/sparc/prom/p1275.c
+++ b/arch/sparc/prom/p1275.c
@@ -20,7 +20,6 @@
20struct { 20struct {
21 long prom_callback; /* 0x00 */ 21 long prom_callback; /* 0x00 */
22 void (*prom_cif_handler)(long *); /* 0x08 */ 22 void (*prom_cif_handler)(long *); /* 0x08 */
23 unsigned long prom_cif_stack; /* 0x10 */
24} p1275buf; 23} p1275buf;
25 24
26extern void prom_world(int); 25extern void prom_world(int);
@@ -52,5 +51,4 @@ void p1275_cmd_direct(unsigned long *args)
52void prom_cif_init(void *cif_handler, void *cif_stack) 51void prom_cif_init(void *cif_handler, void *cif_stack)
53{ 52{
54 p1275buf.prom_cif_handler = (void (*)(long *))cif_handler; 53 p1275buf.prom_cif_handler = (void (*)(long *))cif_handler;
55 p1275buf.prom_cif_stack = (unsigned long)cif_stack;
56} 54}
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index de8eebd6f67c..1acf605a646d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -330,8 +330,10 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
330 size = pci->romsize + sizeof(*rom); 330 size = pci->romsize + sizeof(*rom);
331 331
332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 332 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
333 if (status != EFI_SUCCESS) 333 if (status != EFI_SUCCESS) {
334 efi_printk(sys_table, "Failed to alloc mem for rom\n");
334 return status; 335 return status;
336 }
335 337
336 memset(rom, 0, sizeof(*rom)); 338 memset(rom, 0, sizeof(*rom));
337 339
@@ -344,14 +346,18 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
344 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 346 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
345 PCI_VENDOR_ID, 1, &(rom->vendor)); 347 PCI_VENDOR_ID, 1, &(rom->vendor));
346 348
347 if (status != EFI_SUCCESS) 349 if (status != EFI_SUCCESS) {
350 efi_printk(sys_table, "Failed to read rom->vendor\n");
348 goto free_struct; 351 goto free_struct;
352 }
349 353
350 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 354 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
351 PCI_DEVICE_ID, 1, &(rom->devid)); 355 PCI_DEVICE_ID, 1, &(rom->devid));
352 356
353 if (status != EFI_SUCCESS) 357 if (status != EFI_SUCCESS) {
358 efi_printk(sys_table, "Failed to read rom->devid\n");
354 goto free_struct; 359 goto free_struct;
360 }
355 361
356 status = efi_early->call(pci->get_location, pci, &(rom->segment), 362 status = efi_early->call(pci->get_location, pci, &(rom->segment),
357 &(rom->bus), &(rom->device), &(rom->function)); 363 &(rom->bus), &(rom->device), &(rom->function));
@@ -432,8 +438,10 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
432 size = pci->romsize + sizeof(*rom); 438 size = pci->romsize + sizeof(*rom);
433 439
434 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); 440 status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
435 if (status != EFI_SUCCESS) 441 if (status != EFI_SUCCESS) {
442 efi_printk(sys_table, "Failed to alloc mem for rom\n");
436 return status; 443 return status;
444 }
437 445
438 rom->data.type = SETUP_PCI; 446 rom->data.type = SETUP_PCI;
439 rom->data.len = size - sizeof(struct setup_data); 447 rom->data.len = size - sizeof(struct setup_data);
@@ -444,14 +452,18 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
444 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 452 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
445 PCI_VENDOR_ID, 1, &(rom->vendor)); 453 PCI_VENDOR_ID, 1, &(rom->vendor));
446 454
447 if (status != EFI_SUCCESS) 455 if (status != EFI_SUCCESS) {
456 efi_printk(sys_table, "Failed to read rom->vendor\n");
448 goto free_struct; 457 goto free_struct;
458 }
449 459
450 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, 460 status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
451 PCI_DEVICE_ID, 1, &(rom->devid)); 461 PCI_DEVICE_ID, 1, &(rom->devid));
452 462
453 if (status != EFI_SUCCESS) 463 if (status != EFI_SUCCESS) {
464 efi_printk(sys_table, "Failed to read rom->devid\n");
454 goto free_struct; 465 goto free_struct;
466 }
455 467
456 status = efi_early->call(pci->get_location, pci, &(rom->segment), 468 status = efi_early->call(pci->get_location, pci, &(rom->segment),
457 &(rom->bus), &(rom->device), &(rom->function)); 469 &(rom->bus), &(rom->device), &(rom->function));
@@ -538,8 +550,10 @@ static void setup_efi_pci(struct boot_params *params)
538 EFI_LOADER_DATA, 550 EFI_LOADER_DATA,
539 size, (void **)&pci_handle); 551 size, (void **)&pci_handle);
540 552
541 if (status != EFI_SUCCESS) 553 if (status != EFI_SUCCESS) {
554 efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
542 return; 555 return;
556 }
543 557
544 status = efi_call_early(locate_handle, 558 status = efi_call_early(locate_handle,
545 EFI_LOCATE_BY_PROTOCOL, &pci_proto, 559 EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -1105,6 +1119,10 @@ struct boot_params *make_boot_params(struct efi_config *c)
1105 1119
1106 memset(sdt, 0, sizeof(*sdt)); 1120 memset(sdt, 0, sizeof(*sdt));
1107 1121
1122 status = efi_parse_options(cmdline_ptr);
1123 if (status != EFI_SUCCESS)
1124 goto fail2;
1125
1108 status = handle_cmdline_files(sys_table, image, 1126 status = handle_cmdline_files(sys_table, image,
1109 (char *)(unsigned long)hdr->cmd_line_ptr, 1127 (char *)(unsigned long)hdr->cmd_line_ptr,
1110 "initrd=", hdr->initrd_addr_max, 1128 "initrd=", hdr->initrd_addr_max,
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0ec241ede5a2..9b11757975d0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -81,24 +81,23 @@ extern u64 asmlinkage efi_call(void *fp, ...);
81 */ 81 */
82#define __efi_call_virt(f, args...) efi_call_virt(f, args) 82#define __efi_call_virt(f, args...) efi_call_virt(f, args)
83 83
84extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, 84extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
85 u32 type, u64 attribute); 85 u32 type, u64 attribute);
86 86
87#endif /* CONFIG_X86_32 */ 87#endif /* CONFIG_X86_32 */
88 88
89extern int add_efi_memmap;
90extern struct efi_scratch efi_scratch; 89extern struct efi_scratch efi_scratch;
91extern void efi_set_executable(efi_memory_desc_t *md, bool executable); 90extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
92extern int efi_memblock_x86_reserve_range(void); 91extern int __init efi_memblock_x86_reserve_range(void);
93extern void efi_call_phys_prelog(void); 92extern void __init efi_call_phys_prolog(void);
94extern void efi_call_phys_epilog(void); 93extern void __init efi_call_phys_epilog(void);
95extern void efi_unmap_memmap(void); 94extern void __init efi_unmap_memmap(void);
96extern void efi_memory_uc(u64 addr, unsigned long size); 95extern void __init efi_memory_uc(u64 addr, unsigned long size);
97extern void __init efi_map_region(efi_memory_desc_t *md); 96extern void __init efi_map_region(efi_memory_desc_t *md);
98extern void __init efi_map_region_fixed(efi_memory_desc_t *md); 97extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
99extern void efi_sync_low_kernel_mappings(void); 98extern void efi_sync_low_kernel_mappings(void);
100extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); 99extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
101extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); 100extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
102extern void __init old_map_region(efi_memory_desc_t *md); 101extern void __init old_map_region(efi_memory_desc_t *md);
103extern void __init runtime_code_page_mkexec(void); 102extern void __init runtime_code_page_mkexec(void);
104extern void __init efi_runtime_mkexec(void); 103extern void __init efi_runtime_mkexec(void);
@@ -162,16 +161,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
162extern bool efi_reboot_required(void); 161extern bool efi_reboot_required(void);
163 162
164#else 163#else
165/*
166 * IF EFI is not configured, have the EFI calls return -ENOSYS.
167 */
168#define efi_call0(_f) (-ENOSYS)
169#define efi_call1(_f, _a1) (-ENOSYS)
170#define efi_call2(_f, _a1, _a2) (-ENOSYS)
171#define efi_call3(_f, _a1, _a2, _a3) (-ENOSYS)
172#define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS)
173#define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS)
174#define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS)
175static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} 164static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
176static inline bool efi_reboot_required(void) 165static inline bool efi_reboot_required(void)
177{ 166{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d603a71ab3a..6ed0c30d6a0c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
990} 990}
991 991
992static inline u64 get_canonical(u64 la)
993{
994 return ((int64_t)la << 16) >> 16;
995}
996
997static inline bool is_noncanonical_address(u64 la)
998{
999#ifdef CONFIG_X86_64
1000 return get_canonical(la) != la;
1001#else
1002 return false;
1003#endif
1004}
1005
992#define TSS_IOPB_BASE_OFFSET 0x66 1006#define TSS_IOPB_BASE_OFFSET 0x66
993#define TSS_BASE_SIZE 0x68 1007#define TSS_BASE_SIZE 0x68
994#define TSS_IOPB_SIZE (65536 / 8) 1008#define TSS_IOPB_SIZE (65536 / 8)
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1050 unsigned long address); 1064 unsigned long address);
1051 1065
1052void kvm_define_shared_msr(unsigned index, u32 msr); 1066void kvm_define_shared_msr(unsigned index, u32 msr);
1053void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1067int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1054 1068
1055bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1069bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1056 1070
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420376eb..990a2fe1588d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
67#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
68#define EXIT_REASON_INVEPT 50 68#define EXIT_REASON_INVEPT 50
69#define EXIT_REASON_PREEMPTION_TIMER 52 69#define EXIT_REASON_PREEMPTION_TIMER 52
70#define EXIT_REASON_INVVPID 53
70#define EXIT_REASON_WBINVD 54 71#define EXIT_REASON_WBINVD 54
71#define EXIT_REASON_XSETBV 55 72#define EXIT_REASON_XSETBV 55
72#define EXIT_REASON_APIC_WRITE 56 73#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
114 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 115 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
115 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
116 { EXIT_REASON_INVD, "INVD" }, \ 117 { EXIT_REASON_INVD, "INVD" }, \
118 { EXIT_REASON_INVVPID, "INVVPID" }, \
117 { EXIT_REASON_INVPCID, "INVPCID" } 119 { EXIT_REASON_INVPCID, "INVPCID" }
118 120
119#endif /* _UAPIVMX_H */ 121#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a46207a05835..749f9fa38254 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); 504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
505} 505}
506 506
507static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
508{
509 register_address_increment(ctxt, &ctxt->_eip, rel);
510}
511
512static u32 desc_limit_scaled(struct desc_struct *desc) 507static u32 desc_limit_scaled(struct desc_struct *desc)
513{ 508{
514 u32 limit = get_desc_limit(desc); 509 u32 limit = get_desc_limit(desc);
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
569 return emulate_exception(ctxt, NM_VECTOR, 0, false); 564 return emulate_exception(ctxt, NM_VECTOR, 0, false);
570} 565}
571 566
567static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
568 int cs_l)
569{
570 switch (ctxt->op_bytes) {
571 case 2:
572 ctxt->_eip = (u16)dst;
573 break;
574 case 4:
575 ctxt->_eip = (u32)dst;
576 break;
577 case 8:
578 if ((cs_l && is_noncanonical_address(dst)) ||
579 (!cs_l && (dst & ~(u32)-1)))
580 return emulate_gp(ctxt, 0);
581 ctxt->_eip = dst;
582 break;
583 default:
584 WARN(1, "unsupported eip assignment size\n");
585 }
586 return X86EMUL_CONTINUE;
587}
588
589static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
590{
591 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
592}
593
594static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
595{
596 return assign_eip_near(ctxt, ctxt->_eip + rel);
597}
598
572static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 599static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
573{ 600{
574 u16 selector; 601 u16 selector;
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
751static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, 778static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
752 unsigned size) 779 unsigned size)
753{ 780{
754 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) 781 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
755 return __do_insn_fetch_bytes(ctxt, size); 782
783 if (unlikely(done_size < size))
784 return __do_insn_fetch_bytes(ctxt, size - done_size);
756 else 785 else
757 return X86EMUL_CONTINUE; 786 return X86EMUL_CONTINUE;
758} 787}
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1416 1445
1417/* Does not support long mode */ 1446/* Does not support long mode */
1418static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1447static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1419 u16 selector, int seg, u8 cpl, bool in_task_switch) 1448 u16 selector, int seg, u8 cpl,
1449 bool in_task_switch,
1450 struct desc_struct *desc)
1420{ 1451{
1421 struct desc_struct seg_desc, old_desc; 1452 struct desc_struct seg_desc, old_desc;
1422 u8 dpl, rpl; 1453 u8 dpl, rpl;
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1557 } 1588 }
1558load: 1589load:
1559 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1590 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1591 if (desc)
1592 *desc = seg_desc;
1560 return X86EMUL_CONTINUE; 1593 return X86EMUL_CONTINUE;
1561exception: 1594exception:
1562 return emulate_exception(ctxt, err_vec, err_code, true); 1595 return emulate_exception(ctxt, err_vec, err_code, true);
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1566 u16 selector, int seg) 1599 u16 selector, int seg)
1567{ 1600{
1568 u8 cpl = ctxt->ops->cpl(ctxt); 1601 u8 cpl = ctxt->ops->cpl(ctxt);
1569 return __load_segment_descriptor(ctxt, selector, seg, cpl, false); 1602 return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1570} 1603}
1571 1604
1572static void write_register_operand(struct operand *op) 1605static void write_register_operand(struct operand *op)
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
1960static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 1993static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1961{ 1994{
1962 int rc; 1995 int rc;
1963 unsigned short sel; 1996 unsigned short sel, old_sel;
1997 struct desc_struct old_desc, new_desc;
1998 const struct x86_emulate_ops *ops = ctxt->ops;
1999 u8 cpl = ctxt->ops->cpl(ctxt);
2000
2001 /* Assignment of RIP may only fail in 64-bit mode */
2002 if (ctxt->mode == X86EMUL_MODE_PROT64)
2003 ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
2004 VCPU_SREG_CS);
1964 2005
1965 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2006 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1966 2007
1967 rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); 2008 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2009 &new_desc);
1968 if (rc != X86EMUL_CONTINUE) 2010 if (rc != X86EMUL_CONTINUE)
1969 return rc; 2011 return rc;
1970 2012
1971 ctxt->_eip = 0; 2013 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
1972 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 2014 if (rc != X86EMUL_CONTINUE) {
1973 return X86EMUL_CONTINUE; 2015 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2016 /* assigning eip failed; restore the old cs */
2017 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
2018 return rc;
2019 }
2020 return rc;
1974} 2021}
1975 2022
1976static int em_grp45(struct x86_emulate_ctxt *ctxt) 2023static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
1981 case 2: /* call near abs */ { 2028 case 2: /* call near abs */ {
1982 long int old_eip; 2029 long int old_eip;
1983 old_eip = ctxt->_eip; 2030 old_eip = ctxt->_eip;
1984 ctxt->_eip = ctxt->src.val; 2031 rc = assign_eip_near(ctxt, ctxt->src.val);
2032 if (rc != X86EMUL_CONTINUE)
2033 break;
1985 ctxt->src.val = old_eip; 2034 ctxt->src.val = old_eip;
1986 rc = em_push(ctxt); 2035 rc = em_push(ctxt);
1987 break; 2036 break;
1988 } 2037 }
1989 case 4: /* jmp abs */ 2038 case 4: /* jmp abs */
1990 ctxt->_eip = ctxt->src.val; 2039 rc = assign_eip_near(ctxt, ctxt->src.val);
1991 break; 2040 break;
1992 case 5: /* jmp far */ 2041 case 5: /* jmp far */
1993 rc = em_jmp_far(ctxt); 2042 rc = em_jmp_far(ctxt);
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2022 2071
2023static int em_ret(struct x86_emulate_ctxt *ctxt) 2072static int em_ret(struct x86_emulate_ctxt *ctxt)
2024{ 2073{
2025 ctxt->dst.type = OP_REG; 2074 int rc;
2026 ctxt->dst.addr.reg = &ctxt->_eip; 2075 unsigned long eip;
2027 ctxt->dst.bytes = ctxt->op_bytes; 2076
2028 return em_pop(ctxt); 2077 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2078 if (rc != X86EMUL_CONTINUE)
2079 return rc;
2080
2081 return assign_eip_near(ctxt, eip);
2029} 2082}
2030 2083
2031static int em_ret_far(struct x86_emulate_ctxt *ctxt) 2084static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2032{ 2085{
2033 int rc; 2086 int rc;
2034 unsigned long cs; 2087 unsigned long eip, cs;
2088 u16 old_cs;
2035 int cpl = ctxt->ops->cpl(ctxt); 2089 int cpl = ctxt->ops->cpl(ctxt);
2090 struct desc_struct old_desc, new_desc;
2091 const struct x86_emulate_ops *ops = ctxt->ops;
2036 2092
2037 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2093 if (ctxt->mode == X86EMUL_MODE_PROT64)
2094 ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
2095 VCPU_SREG_CS);
2096
2097 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2038 if (rc != X86EMUL_CONTINUE) 2098 if (rc != X86EMUL_CONTINUE)
2039 return rc; 2099 return rc;
2040 if (ctxt->op_bytes == 4)
2041 ctxt->_eip = (u32)ctxt->_eip;
2042 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2100 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2043 if (rc != X86EMUL_CONTINUE) 2101 if (rc != X86EMUL_CONTINUE)
2044 return rc; 2102 return rc;
2045 /* Outer-privilege level return is not implemented */ 2103 /* Outer-privilege level return is not implemented */
2046 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2104 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2047 return X86EMUL_UNHANDLEABLE; 2105 return X86EMUL_UNHANDLEABLE;
2048 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2106 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
2107 &new_desc);
2108 if (rc != X86EMUL_CONTINUE)
2109 return rc;
2110 rc = assign_eip_far(ctxt, eip, new_desc.l);
2111 if (rc != X86EMUL_CONTINUE) {
2112 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2113 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
2114 }
2049 return rc; 2115 return rc;
2050} 2116}
2051 2117
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2306{ 2372{
2307 const struct x86_emulate_ops *ops = ctxt->ops; 2373 const struct x86_emulate_ops *ops = ctxt->ops;
2308 struct desc_struct cs, ss; 2374 struct desc_struct cs, ss;
2309 u64 msr_data; 2375 u64 msr_data, rcx, rdx;
2310 int usermode; 2376 int usermode;
2311 u16 cs_sel = 0, ss_sel = 0; 2377 u16 cs_sel = 0, ss_sel = 0;
2312 2378
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2322 else 2388 else
2323 usermode = X86EMUL_MODE_PROT32; 2389 usermode = X86EMUL_MODE_PROT32;
2324 2390
2391 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2392 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2393
2325 cs.dpl = 3; 2394 cs.dpl = 3;
2326 ss.dpl = 3; 2395 ss.dpl = 3;
2327 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 2396 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2339 ss_sel = cs_sel + 8; 2408 ss_sel = cs_sel + 8;
2340 cs.d = 0; 2409 cs.d = 0;
2341 cs.l = 1; 2410 cs.l = 1;
2411 if (is_noncanonical_address(rcx) ||
2412 is_noncanonical_address(rdx))
2413 return emulate_gp(ctxt, 0);
2342 break; 2414 break;
2343 } 2415 }
2344 cs_sel |= SELECTOR_RPL_MASK; 2416 cs_sel |= SELECTOR_RPL_MASK;
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2347 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 2419 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2348 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2420 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2349 2421
2350 ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); 2422 ctxt->_eip = rdx;
2351 *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); 2423 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2352 2424
2353 return X86EMUL_CONTINUE; 2425 return X86EMUL_CONTINUE;
2354} 2426}
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2466 * Now load segment descriptors. If fault happens at this stage 2538 * Now load segment descriptors. If fault happens at this stage
2467 * it is handled in a context of new task 2539 * it is handled in a context of new task
2468 */ 2540 */
2469 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); 2541 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2542 true, NULL);
2470 if (ret != X86EMUL_CONTINUE) 2543 if (ret != X86EMUL_CONTINUE)
2471 return ret; 2544 return ret;
2472 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2545 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2546 true, NULL);
2473 if (ret != X86EMUL_CONTINUE) 2547 if (ret != X86EMUL_CONTINUE)
2474 return ret; 2548 return ret;
2475 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2549 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2550 true, NULL);
2476 if (ret != X86EMUL_CONTINUE) 2551 if (ret != X86EMUL_CONTINUE)
2477 return ret; 2552 return ret;
2478 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2553 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2554 true, NULL);
2479 if (ret != X86EMUL_CONTINUE) 2555 if (ret != X86EMUL_CONTINUE)
2480 return ret; 2556 return ret;
2481 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2557 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2558 true, NULL);
2482 if (ret != X86EMUL_CONTINUE) 2559 if (ret != X86EMUL_CONTINUE)
2483 return ret; 2560 return ret;
2484 2561
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2603 * Now load segment descriptors. If fault happenes at this stage 2680 * Now load segment descriptors. If fault happenes at this stage
2604 * it is handled in a context of new task 2681 * it is handled in a context of new task
2605 */ 2682 */
2606 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); 2683 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2684 cpl, true, NULL);
2607 if (ret != X86EMUL_CONTINUE) 2685 if (ret != X86EMUL_CONTINUE)
2608 return ret; 2686 return ret;
2609 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2687 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2688 true, NULL);
2610 if (ret != X86EMUL_CONTINUE) 2689 if (ret != X86EMUL_CONTINUE)
2611 return ret; 2690 return ret;
2612 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2691 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2692 true, NULL);
2613 if (ret != X86EMUL_CONTINUE) 2693 if (ret != X86EMUL_CONTINUE)
2614 return ret; 2694 return ret;
2615 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2695 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2696 true, NULL);
2616 if (ret != X86EMUL_CONTINUE) 2697 if (ret != X86EMUL_CONTINUE)
2617 return ret; 2698 return ret;
2618 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2699 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2700 true, NULL);
2619 if (ret != X86EMUL_CONTINUE) 2701 if (ret != X86EMUL_CONTINUE)
2620 return ret; 2702 return ret;
2621 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); 2703 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2704 true, NULL);
2622 if (ret != X86EMUL_CONTINUE) 2705 if (ret != X86EMUL_CONTINUE)
2623 return ret; 2706 return ret;
2624 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); 2707 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2708 true, NULL);
2625 if (ret != X86EMUL_CONTINUE) 2709 if (ret != X86EMUL_CONTINUE)
2626 return ret; 2710 return ret;
2627 2711
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
2888 2972
2889static int em_call(struct x86_emulate_ctxt *ctxt) 2973static int em_call(struct x86_emulate_ctxt *ctxt)
2890{ 2974{
2975 int rc;
2891 long rel = ctxt->src.val; 2976 long rel = ctxt->src.val;
2892 2977
2893 ctxt->src.val = (unsigned long)ctxt->_eip; 2978 ctxt->src.val = (unsigned long)ctxt->_eip;
2894 jmp_rel(ctxt, rel); 2979 rc = jmp_rel(ctxt, rel);
2980 if (rc != X86EMUL_CONTINUE)
2981 return rc;
2895 return em_push(ctxt); 2982 return em_push(ctxt);
2896} 2983}
2897 2984
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2900 u16 sel, old_cs; 2987 u16 sel, old_cs;
2901 ulong old_eip; 2988 ulong old_eip;
2902 int rc; 2989 int rc;
2990 struct desc_struct old_desc, new_desc;
2991 const struct x86_emulate_ops *ops = ctxt->ops;
2992 int cpl = ctxt->ops->cpl(ctxt);
2903 2993
2904 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2905 old_eip = ctxt->_eip; 2994 old_eip = ctxt->_eip;
2995 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
2906 2996
2907 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2997 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2908 if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) 2998 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2999 &new_desc);
3000 if (rc != X86EMUL_CONTINUE)
2909 return X86EMUL_CONTINUE; 3001 return X86EMUL_CONTINUE;
2910 3002
2911 ctxt->_eip = 0; 3003 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
2912 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 3004 if (rc != X86EMUL_CONTINUE)
3005 goto fail;
2913 3006
2914 ctxt->src.val = old_cs; 3007 ctxt->src.val = old_cs;
2915 rc = em_push(ctxt); 3008 rc = em_push(ctxt);
2916 if (rc != X86EMUL_CONTINUE) 3009 if (rc != X86EMUL_CONTINUE)
2917 return rc; 3010 goto fail;
2918 3011
2919 ctxt->src.val = old_eip; 3012 ctxt->src.val = old_eip;
2920 return em_push(ctxt); 3013 rc = em_push(ctxt);
3014 /* If we failed, we tainted the memory, but the very least we should
3015 restore cs */
3016 if (rc != X86EMUL_CONTINUE)
3017 goto fail;
3018 return rc;
3019fail:
3020 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3021 return rc;
3022
2921} 3023}
2922 3024
2923static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) 3025static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2924{ 3026{
2925 int rc; 3027 int rc;
3028 unsigned long eip;
2926 3029
2927 ctxt->dst.type = OP_REG; 3030 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2928 ctxt->dst.addr.reg = &ctxt->_eip; 3031 if (rc != X86EMUL_CONTINUE)
2929 ctxt->dst.bytes = ctxt->op_bytes; 3032 return rc;
2930 rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); 3033 rc = assign_eip_near(ctxt, eip);
2931 if (rc != X86EMUL_CONTINUE) 3034 if (rc != X86EMUL_CONTINUE)
2932 return rc; 3035 return rc;
2933 rsp_increment(ctxt, ctxt->src.val); 3036 rsp_increment(ctxt, ctxt->src.val);
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3254 3357
3255static int em_loop(struct x86_emulate_ctxt *ctxt) 3358static int em_loop(struct x86_emulate_ctxt *ctxt)
3256{ 3359{
3360 int rc = X86EMUL_CONTINUE;
3361
3257 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); 3362 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
3258 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && 3363 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3259 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) 3364 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3260 jmp_rel(ctxt, ctxt->src.val); 3365 rc = jmp_rel(ctxt, ctxt->src.val);
3261 3366
3262 return X86EMUL_CONTINUE; 3367 return rc;
3263} 3368}
3264 3369
3265static int em_jcxz(struct x86_emulate_ctxt *ctxt) 3370static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3266{ 3371{
3372 int rc = X86EMUL_CONTINUE;
3373
3267 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) 3374 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3268 jmp_rel(ctxt, ctxt->src.val); 3375 rc = jmp_rel(ctxt, ctxt->src.val);
3269 3376
3270 return X86EMUL_CONTINUE; 3377 return rc;
3271} 3378}
3272 3379
3273static int em_in(struct x86_emulate_ctxt *ctxt) 3380static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
3355 return X86EMUL_CONTINUE; 3462 return X86EMUL_CONTINUE;
3356} 3463}
3357 3464
3465static int em_clflush(struct x86_emulate_ctxt *ctxt)
3466{
3467 /* emulating clflush regardless of cpuid */
3468 return X86EMUL_CONTINUE;
3469}
3470
3358static bool valid_cr(int nr) 3471static bool valid_cr(int nr)
3359{ 3472{
3360 switch (nr) { 3473 switch (nr) {
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
3693 X7(D(Undefined)), 3806 X7(D(Undefined)),
3694}; 3807};
3695 3808
3809static const struct gprefix pfx_0f_ae_7 = {
3810 I(SrcMem | ByteOp, em_clflush), N, N, N,
3811};
3812
3813static const struct group_dual group15 = { {
3814 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
3815}, {
3816 N, N, N, N, N, N, N, N,
3817} };
3818
3696static const struct gprefix pfx_0f_6f_0f_7f = { 3819static const struct gprefix pfx_0f_6f_0f_7f = {
3697 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3820 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3698}; 3821};
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
3901 N, I(ImplicitOps | EmulateOnUD, em_syscall), 4024 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3902 II(ImplicitOps | Priv, em_clts, clts), N, 4025 II(ImplicitOps | Priv, em_clts, clts), N,
3903 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 4026 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3904 N, D(ImplicitOps | ModRM), N, N, 4027 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
3905 /* 0x10 - 0x1F */ 4028 /* 0x10 - 0x1F */
3906 N, N, N, N, N, N, N, N, 4029 N, N, N, N, N, N, N, N,
3907 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), 4030 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4031 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
3908 /* 0x20 - 0x2F */ 4032 /* 0x20 - 0x2F */
3909 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), 4033 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
3910 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), 4034 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
3956 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4080 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3957 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4081 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3958 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4082 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3959 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), 4083 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
3960 /* 0xB0 - 0xB7 */ 4084 /* 0xB0 - 0xB7 */
3961 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4085 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3962 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4086 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
@@ -4473,10 +4597,10 @@ done_prefixes:
4473 /* Decode and fetch the destination operand: register or memory. */ 4597 /* Decode and fetch the destination operand: register or memory. */
4474 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4598 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4475 4599
4476done:
4477 if (ctxt->rip_relative) 4600 if (ctxt->rip_relative)
4478 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4601 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4479 4602
4603done:
4480 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4604 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4481} 4605}
4482 4606
@@ -4726,7 +4850,7 @@ special_insn:
4726 break; 4850 break;
4727 case 0x70 ... 0x7f: /* jcc (short) */ 4851 case 0x70 ... 0x7f: /* jcc (short) */
4728 if (test_cc(ctxt->b, ctxt->eflags)) 4852 if (test_cc(ctxt->b, ctxt->eflags))
4729 jmp_rel(ctxt, ctxt->src.val); 4853 rc = jmp_rel(ctxt, ctxt->src.val);
4730 break; 4854 break;
4731 case 0x8d: /* lea r16/r32, m */ 4855 case 0x8d: /* lea r16/r32, m */
4732 ctxt->dst.val = ctxt->src.addr.mem.ea; 4856 ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4756,7 +4880,7 @@ special_insn:
4756 break; 4880 break;
4757 case 0xe9: /* jmp rel */ 4881 case 0xe9: /* jmp rel */
4758 case 0xeb: /* jmp rel short */ 4882 case 0xeb: /* jmp rel short */
4759 jmp_rel(ctxt, ctxt->src.val); 4883 rc = jmp_rel(ctxt, ctxt->src.val);
4760 ctxt->dst.type = OP_NONE; /* Disable writeback. */ 4884 ctxt->dst.type = OP_NONE; /* Disable writeback. */
4761 break; 4885 break;
4762 case 0xf4: /* hlt */ 4886 case 0xf4: /* hlt */
@@ -4881,13 +5005,11 @@ twobyte_insn:
4881 break; 5005 break;
4882 case 0x80 ... 0x8f: /* jnz rel, etc*/ 5006 case 0x80 ... 0x8f: /* jnz rel, etc*/
4883 if (test_cc(ctxt->b, ctxt->eflags)) 5007 if (test_cc(ctxt->b, ctxt->eflags))
4884 jmp_rel(ctxt, ctxt->src.val); 5008 rc = jmp_rel(ctxt, ctxt->src.val);
4885 break; 5009 break;
4886 case 0x90 ... 0x9f: /* setcc r/m8 */ 5010 case 0x90 ... 0x9f: /* setcc r/m8 */
4887 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 5011 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4888 break; 5012 break;
4889 case 0xae: /* clflush */
4890 break;
4891 case 0xb6 ... 0xb7: /* movzx */ 5013 case 0xb6 ... 0xb7: /* movzx */
4892 ctxt->dst.bytes = ctxt->op_bytes; 5014 ctxt->dst.bytes = ctxt->op_bytes;
4893 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val 5015 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 return; 262 return;
263 263
264 timer = &pit->pit_state.timer; 264 timer = &pit->pit_state.timer;
265 mutex_lock(&pit->pit_state.lock);
265 if (hrtimer_cancel(timer)) 266 if (hrtimer_cancel(timer))
266 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 267 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
268 mutex_unlock(&pit->pit_state.lock);
267} 269}
268 270
269static void destroy_pit_timer(struct kvm_pit *pit) 271static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 806d58e3c320..fd49c867b25a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,7 +298,7 @@ retry_walk:
298 } 298 }
299#endif 299#endif
300 walker->max_level = walker->level; 300 walker->max_level = walker->level;
301 ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); 301 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
302 302
303 accessed_dirty = PT_GUEST_ACCESSED_MASK; 303 accessed_dirty = PT_GUEST_ACCESSED_MASK;
304 pt_access = pte_access = ACC_ALL; 304 pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65510f624dfe..7527cefc5a43 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
3251 msr.host_initiated = false; 3251 msr.host_initiated = false;
3252 3252
3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3254 if (svm_set_msr(&svm->vcpu, &msr)) { 3254 if (kvm_set_msr(&svm->vcpu, &msr)) {
3255 trace_kvm_msr_write_ex(ecx, data); 3255 trace_kvm_msr_write_ex(ecx, data);
3256 kvm_inject_gp(&svm->vcpu, 0); 3256 kvm_inject_gp(&svm->vcpu, 0);
3257 } else { 3257 } else {
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
3551 3551
3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3553 || !svm_exit_handlers[exit_code]) { 3553 || !svm_exit_handlers[exit_code]) {
3554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3554 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
3555 kvm_run->hw.hardware_exit_reason = exit_code; 3555 kvm_queue_exception(vcpu, UD_VECTOR);
3556 return 0; 3556 return 1;
3557 } 3557 }
3558 3558
3559 return svm_exit_handlers[exit_code](svm); 3559 return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0acac81f198b..a8b76c4c95e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2659 default: 2659 default:
2660 msr = find_msr_entry(vmx, msr_index); 2660 msr = find_msr_entry(vmx, msr_index);
2661 if (msr) { 2661 if (msr) {
2662 u64 old_msr_data = msr->data;
2662 msr->data = data; 2663 msr->data = data;
2663 if (msr - vmx->guest_msrs < vmx->save_nmsrs) { 2664 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2664 preempt_disable(); 2665 preempt_disable();
2665 kvm_set_shared_msr(msr->index, msr->data, 2666 ret = kvm_set_shared_msr(msr->index, msr->data,
2666 msr->mask); 2667 msr->mask);
2667 preempt_enable(); 2668 preempt_enable();
2669 if (ret)
2670 msr->data = old_msr_data;
2668 } 2671 }
2669 break; 2672 break;
2670 } 2673 }
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
5291 msr.data = data; 5294 msr.data = data;
5292 msr.index = ecx; 5295 msr.index = ecx;
5293 msr.host_initiated = false; 5296 msr.host_initiated = false;
5294 if (vmx_set_msr(vcpu, &msr) != 0) { 5297 if (kvm_set_msr(vcpu, &msr) != 0) {
5295 trace_kvm_msr_write_ex(ecx, data); 5298 trace_kvm_msr_write_ex(ecx, data);
5296 kvm_inject_gp(vcpu, 0); 5299 kvm_inject_gp(vcpu, 0);
5297 return 1; 5300 return 1;
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6743 return 1; 6746 return 1;
6744} 6747}
6745 6748
6749static int handle_invvpid(struct kvm_vcpu *vcpu)
6750{
6751 kvm_queue_exception(vcpu, UD_VECTOR);
6752 return 1;
6753}
6754
6746/* 6755/*
6747 * The exit handlers return 1 if the exit was handled fully and guest execution 6756 * The exit handlers return 1 if the exit was handled fully and guest execution
6748 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 6757 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6788 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 6797 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6789 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 6798 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6790 [EXIT_REASON_INVEPT] = handle_invept, 6799 [EXIT_REASON_INVEPT] = handle_invept,
6800 [EXIT_REASON_INVVPID] = handle_invvpid,
6791}; 6801};
6792 6802
6793static const int kvm_vmx_max_exit_handlers = 6803static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7023 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: 7033 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
7024 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: 7034 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
7025 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 7035 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
7026 case EXIT_REASON_INVEPT: 7036 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
7027 /* 7037 /*
7028 * VMX instructions trap unconditionally. This allows L1 to 7038 * VMX instructions trap unconditionally. This allows L1 to
7029 * emulate them for its L2 guest, i.e., allows 3-level nesting! 7039 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7164 && kvm_vmx_exit_handlers[exit_reason]) 7174 && kvm_vmx_exit_handlers[exit_reason])
7165 return kvm_vmx_exit_handlers[exit_reason](vcpu); 7175 return kvm_vmx_exit_handlers[exit_reason](vcpu);
7166 else { 7176 else {
7167 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 7177 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
7168 vcpu->run->hw.hardware_exit_reason = exit_reason; 7178 kvm_queue_exception(vcpu, UD_VECTOR);
7179 return 1;
7169 } 7180 }
7170 return 0;
7171} 7181}
7172 7182
7173static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7183static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c8f94331f8..0033df32a745 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
229 shared_msr_update(i, shared_msrs_global.msrs[i]); 229 shared_msr_update(i, shared_msrs_global.msrs[i]);
230} 230}
231 231
232void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 232int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
233{ 233{
234 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
236 int err;
236 237
237 if (((value ^ smsr->values[slot].curr) & mask) == 0) 238 if (((value ^ smsr->values[slot].curr) & mask) == 0)
238 return; 239 return 0;
239 smsr->values[slot].curr = value; 240 smsr->values[slot].curr = value;
240 wrmsrl(shared_msrs_global.msrs[slot], value); 241 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
242 if (err)
243 return 1;
244
241 if (!smsr->registered) { 245 if (!smsr->registered) {
242 smsr->urn.on_user_return = kvm_on_user_return; 246 smsr->urn.on_user_return = kvm_on_user_return;
243 user_return_notifier_register(&smsr->urn); 247 user_return_notifier_register(&smsr->urn);
244 smsr->registered = true; 248 smsr->registered = true;
245 } 249 }
250 return 0;
246} 251}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 252EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 253
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
987} 992}
988EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 993EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
989 994
990
991/* 995/*
992 * Writes msr value into into the appropriate "register". 996 * Writes msr value into into the appropriate "register".
993 * Returns 0 on success, non-0 otherwise. 997 * Returns 0 on success, non-0 otherwise.
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
995 */ 999 */
996int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 1000int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
997{ 1001{
1002 switch (msr->index) {
1003 case MSR_FS_BASE:
1004 case MSR_GS_BASE:
1005 case MSR_KERNEL_GS_BASE:
1006 case MSR_CSTAR:
1007 case MSR_LSTAR:
1008 if (is_noncanonical_address(msr->data))
1009 return 1;
1010 break;
1011 case MSR_IA32_SYSENTER_EIP:
1012 case MSR_IA32_SYSENTER_ESP:
1013 /*
1014 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
1015 * non-canonical address is written on Intel but not on
1016 * AMD (which ignores the top 32-bits, because it does
1017 * not implement 64-bit SYSENTER).
1018 *
1019 * 64-bit code should hence be able to write a non-canonical
1020 * value on AMD. Making the address canonical ensures that
1021 * vmentry does not fail on Intel after writing a non-canonical
1022 * value, and that something deterministic happens if the guest
1023 * invokes 64-bit SYSENTER.
1024 */
1025 msr->data = get_canonical(msr->data);
1026 }
998 return kvm_x86_ops->set_msr(vcpu, msr); 1027 return kvm_x86_ops->set_msr(vcpu, msr);
999} 1028}
1029EXPORT_SYMBOL_GPL(kvm_set_msr);
1000 1030
1001/* 1031/*
1002 * Adapt set_msr() to msr_io()'s calling convention 1032 * Adapt set_msr() to msr_io()'s calling convention
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f15103dff4b4..d143d216d52b 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -40,20 +40,40 @@ void __init efi_bgrt_init(void)
40 if (ACPI_FAILURE(status)) 40 if (ACPI_FAILURE(status))
41 return; 41 return;
42 42
43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) 43 if (bgrt_tab->header.length < sizeof(*bgrt_tab)) {
44 pr_err("Ignoring BGRT: invalid length %u (expected %zu)\n",
45 bgrt_tab->header.length, sizeof(*bgrt_tab));
44 return; 46 return;
45 if (bgrt_tab->version != 1 || bgrt_tab->status != 1) 47 }
48 if (bgrt_tab->version != 1) {
49 pr_err("Ignoring BGRT: invalid version %u (expected 1)\n",
50 bgrt_tab->version);
51 return;
52 }
53 if (bgrt_tab->status != 1) {
54 pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
55 bgrt_tab->status);
56 return;
57 }
58 if (bgrt_tab->image_type != 0) {
59 pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
60 bgrt_tab->image_type);
46 return; 61 return;
47 if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) 62 }
63 if (!bgrt_tab->image_address) {
64 pr_err("Ignoring BGRT: null image address\n");
48 return; 65 return;
66 }
49 67
50 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 68 image = efi_lookup_mapped_addr(bgrt_tab->image_address);
51 if (!image) { 69 if (!image) {
52 image = early_memremap(bgrt_tab->image_address, 70 image = early_memremap(bgrt_tab->image_address,
53 sizeof(bmp_header)); 71 sizeof(bmp_header));
54 ioremapped = true; 72 ioremapped = true;
55 if (!image) 73 if (!image) {
74 pr_err("Ignoring BGRT: failed to map image header memory\n");
56 return; 75 return;
76 }
57 } 77 }
58 78
59 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 79 memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
@@ -61,14 +81,18 @@ void __init efi_bgrt_init(void)
61 early_iounmap(image, sizeof(bmp_header)); 81 early_iounmap(image, sizeof(bmp_header));
62 bgrt_image_size = bmp_header.size; 82 bgrt_image_size = bmp_header.size;
63 83
64 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); 84 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
65 if (!bgrt_image) 85 if (!bgrt_image) {
86 pr_err("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
87 bgrt_image_size);
66 return; 88 return;
89 }
67 90
68 if (ioremapped) { 91 if (ioremapped) {
69 image = early_memremap(bgrt_tab->image_address, 92 image = early_memremap(bgrt_tab->image_address,
70 bmp_header.size); 93 bmp_header.size);
71 if (!image) { 94 if (!image) {
95 pr_err("Ignoring BGRT: failed to map image memory\n");
72 kfree(bgrt_image); 96 kfree(bgrt_image);
73 bgrt_image = NULL; 97 bgrt_image = NULL;
74 return; 98 return;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94fef30..dbc8627a5cdf 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,17 +70,7 @@ static efi_config_table_type_t arch_tables[] __initdata = {
70 70
71u64 efi_setup; /* efi setup_data physical address */ 71u64 efi_setup; /* efi setup_data physical address */
72 72
73static bool disable_runtime __initdata = false; 73static int add_efi_memmap __initdata;
74static int __init setup_noefi(char *arg)
75{
76 disable_runtime = true;
77 return 0;
78}
79early_param("noefi", setup_noefi);
80
81int add_efi_memmap;
82EXPORT_SYMBOL(add_efi_memmap);
83
84static int __init setup_add_efi_memmap(char *arg) 74static int __init setup_add_efi_memmap(char *arg)
85{ 75{
86 add_efi_memmap = 1; 76 add_efi_memmap = 1;
@@ -96,7 +86,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
96{ 86{
97 efi_status_t status; 87 efi_status_t status;
98 88
99 efi_call_phys_prelog(); 89 efi_call_phys_prolog();
100 status = efi_call_phys(efi_phys.set_virtual_address_map, 90 status = efi_call_phys(efi_phys.set_virtual_address_map,
101 memory_map_size, descriptor_size, 91 memory_map_size, descriptor_size,
102 descriptor_version, virtual_map); 92 descriptor_version, virtual_map);
@@ -210,9 +200,12 @@ static void __init print_efi_memmap(void)
210 for (p = memmap.map, i = 0; 200 for (p = memmap.map, i = 0;
211 p < memmap.map_end; 201 p < memmap.map_end;
212 p += memmap.desc_size, i++) { 202 p += memmap.desc_size, i++) {
203 char buf[64];
204
213 md = p; 205 md = p;
214 pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", 206 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
215 i, md->type, md->attribute, md->phys_addr, 207 i, efi_md_typeattr_format(buf, sizeof(buf), md),
208 md->phys_addr,
216 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 209 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
217 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 210 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
218 } 211 }
@@ -344,9 +337,9 @@ static int __init efi_runtime_init32(void)
344 } 337 }
345 338
346 /* 339 /*
347 * We will only need *early* access to the following two 340 * We will only need *early* access to the SetVirtualAddressMap
348 * EFI runtime services before set_virtual_address_map 341 * EFI runtime service. All other runtime services will be called
349 * is invoked. 342 * via the virtual mapping.
350 */ 343 */
351 efi_phys.set_virtual_address_map = 344 efi_phys.set_virtual_address_map =
352 (efi_set_virtual_address_map_t *) 345 (efi_set_virtual_address_map_t *)
@@ -368,9 +361,9 @@ static int __init efi_runtime_init64(void)
368 } 361 }
369 362
370 /* 363 /*
371 * We will only need *early* access to the following two 364 * We will only need *early* access to the SetVirtualAddressMap
372 * EFI runtime services before set_virtual_address_map 365 * EFI runtime service. All other runtime services will be called
373 * is invoked. 366 * via the virtual mapping.
374 */ 367 */
375 efi_phys.set_virtual_address_map = 368 efi_phys.set_virtual_address_map =
376 (efi_set_virtual_address_map_t *) 369 (efi_set_virtual_address_map_t *)
@@ -492,7 +485,7 @@ void __init efi_init(void)
492 if (!efi_runtime_supported()) 485 if (!efi_runtime_supported())
493 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); 486 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
494 else { 487 else {
495 if (disable_runtime || efi_runtime_init()) 488 if (efi_runtime_disabled() || efi_runtime_init())
496 return; 489 return;
497 } 490 }
498 if (efi_memmap_init()) 491 if (efi_memmap_init())
@@ -537,7 +530,7 @@ void __init runtime_code_page_mkexec(void)
537 } 530 }
538} 531}
539 532
540void efi_memory_uc(u64 addr, unsigned long size) 533void __init efi_memory_uc(u64 addr, unsigned long size)
541{ 534{
542 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; 535 unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
543 u64 npages; 536 u64 npages;
@@ -732,6 +725,7 @@ static void __init kexec_enter_virtual_mode(void)
732 */ 725 */
733 if (!efi_is_native()) { 726 if (!efi_is_native()) {
734 efi_unmap_memmap(); 727 efi_unmap_memmap();
728 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
735 return; 729 return;
736 } 730 }
737 731
@@ -805,6 +799,7 @@ static void __init __efi_enter_virtual_mode(void)
805 new_memmap = efi_map_regions(&count, &pg_shift); 799 new_memmap = efi_map_regions(&count, &pg_shift);
806 if (!new_memmap) { 800 if (!new_memmap) {
807 pr_err("Error reallocating memory, EFI runtime non-functional!\n"); 801 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
802 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
808 return; 803 return;
809 } 804 }
810 805
@@ -812,8 +807,10 @@ static void __init __efi_enter_virtual_mode(void)
812 807
813 BUG_ON(!efi.systab); 808 BUG_ON(!efi.systab);
814 809
815 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) 810 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
811 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
816 return; 812 return;
813 }
817 814
818 efi_sync_low_kernel_mappings(); 815 efi_sync_low_kernel_mappings();
819 efi_dump_pagetable(); 816 efi_dump_pagetable();
@@ -938,14 +935,11 @@ u64 efi_mem_attributes(unsigned long phys_addr)
938 return 0; 935 return 0;
939} 936}
940 937
941static int __init parse_efi_cmdline(char *str) 938static int __init arch_parse_efi_cmdline(char *str)
942{ 939{
943 if (*str == '=') 940 if (parse_option_str(str, "old_map"))
944 str++;
945
946 if (!strncmp(str, "old_map", 7))
947 set_bit(EFI_OLD_MEMMAP, &efi.flags); 941 set_bit(EFI_OLD_MEMMAP, &efi.flags);
948 942
949 return 0; 943 return 0;
950} 944}
951early_param("efi", parse_efi_cmdline); 945early_param("efi", arch_parse_efi_cmdline);
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 9ee3491e31fb..40e7cda52936 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,7 +33,7 @@
33 33
34/* 34/*
35 * To make EFI call EFI runtime service in physical addressing mode we need 35 * To make EFI call EFI runtime service in physical addressing mode we need
36 * prelog/epilog before/after the invocation to disable interrupt, to 36 * prolog/epilog before/after the invocation to disable interrupt, to
37 * claim EFI runtime service handler exclusively and to duplicate a memory in 37 * claim EFI runtime service handler exclusively and to duplicate a memory in
38 * low memory space say 0 - 3G. 38 * low memory space say 0 - 3G.
39 */ 39 */
@@ -41,11 +41,13 @@ static unsigned long efi_rt_eflags;
41 41
42void efi_sync_low_kernel_mappings(void) {} 42void efi_sync_low_kernel_mappings(void) {}
43void __init efi_dump_pagetable(void) {} 43void __init efi_dump_pagetable(void) {}
44int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 44int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
45{ 45{
46 return 0; 46 return 0;
47} 47}
48void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} 48void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
49{
50}
49 51
50void __init efi_map_region(efi_memory_desc_t *md) 52void __init efi_map_region(efi_memory_desc_t *md)
51{ 53{
@@ -55,7 +57,7 @@ void __init efi_map_region(efi_memory_desc_t *md)
55void __init efi_map_region_fixed(efi_memory_desc_t *md) {} 57void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
56void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} 58void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
57 59
58void efi_call_phys_prelog(void) 60void __init efi_call_phys_prolog(void)
59{ 61{
60 struct desc_ptr gdt_descr; 62 struct desc_ptr gdt_descr;
61 63
@@ -69,7 +71,7 @@ void efi_call_phys_prelog(void)
69 load_gdt(&gdt_descr); 71 load_gdt(&gdt_descr);
70} 72}
71 73
72void efi_call_phys_epilog(void) 74void __init efi_call_phys_epilog(void)
73{ 75{
74 struct desc_ptr gdt_descr; 76 struct desc_ptr gdt_descr;
75 77
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 290d397e1dd9..35aecb6042fb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -79,7 +79,7 @@ static void __init early_code_mapping_set_exec(int executable)
79 } 79 }
80} 80}
81 81
82void __init efi_call_phys_prelog(void) 82void __init efi_call_phys_prolog(void)
83{ 83{
84 unsigned long vaddress; 84 unsigned long vaddress;
85 int pgd; 85 int pgd;
@@ -139,7 +139,7 @@ void efi_sync_low_kernel_mappings(void)
139 sizeof(pgd_t) * num_pgds); 139 sizeof(pgd_t) * num_pgds);
140} 140}
141 141
142int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 142int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
143{ 143{
144 unsigned long text; 144 unsigned long text;
145 struct page *page; 145 struct page *page;
@@ -192,7 +192,7 @@ int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
192 return 0; 192 return 0;
193} 193}
194 194
195void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) 195void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
196{ 196{
197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 197 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
198 198
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index fbe66e626c09..040192b50d02 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -27,13 +27,13 @@ ENTRY(efi_call_phys)
27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found 27 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
28 * the values of these registers are the same. And, the corresponding 28 * the values of these registers are the same. And, the corresponding
29 * GDT entries are identical. So I will do nothing about segment reg 29 * GDT entries are identical. So I will do nothing about segment reg
30 * and GDT, but change GDT base register in prelog and epilog. 30 * and GDT, but change GDT base register in prolog and epilog.
31 */ 31 */
32 32
33 /* 33 /*
34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. 34 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
35 * But to make it smoothly switch from virtual mode to flat mode. 35 * But to make it smoothly switch from virtual mode to flat mode.
36 * The mapping of lower virtual memory has been created in prelog and 36 * The mapping of lower virtual memory has been created in prolog and
37 * epilog. 37 * epilog.
38 */ 38 */
39 movl $1f, %edx 39 movl $1f, %edx
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
index 46aa25c8ce06..3c1c3866d82b 100644
--- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
+++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h
@@ -10,10 +10,9 @@
10 */ 10 */
11 11
12 12
13/* __attribute__((weak)) makes these declarations overridable */
14/* For every CPU addition a new get_<cpuname>_ops interface needs 13/* For every CPU addition a new get_<cpuname>_ops interface needs
15 * to be added. 14 * to be added.
16 */ 15 */
17extern void *get_penwell_ops(void) __attribute__((weak)); 16extern void *get_penwell_ops(void);
18extern void *get_cloverview_ops(void) __attribute__((weak)); 17extern void *get_cloverview_ops(void);
19extern void *get_tangier_ops(void) __attribute__((weak)); 18extern void *get_tangier_ops(void);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1a3f0445432a..fac5e4f9607c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1636,9 +1636,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
1636 xen_raw_console_write("mapping kernel into physical memory\n"); 1636 xen_raw_console_write("mapping kernel into physical memory\n");
1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); 1637 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
1638 1638
1639 /* Allocate and initialize top and mid mfn levels for p2m structure */
1640 xen_build_mfn_list_list();
1641
1642 /* keep using Xen gdt for now; no urgent need to change it */ 1639 /* keep using Xen gdt for now; no urgent need to change it */
1643 1640
1644#ifdef CONFIG_X86_32 1641#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f62af7647ec9..a8a1a3d08d4d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1217,10 +1217,13 @@ static void __init xen_pagetable_p2m_copy(void)
1217static void __init xen_pagetable_init(void) 1217static void __init xen_pagetable_init(void)
1218{ 1218{
1219 paging_init(); 1219 paging_init();
1220 xen_setup_shared_info();
1221#ifdef CONFIG_X86_64 1220#ifdef CONFIG_X86_64
1222 xen_pagetable_p2m_copy(); 1221 xen_pagetable_p2m_copy();
1223#endif 1222#endif
1223 /* Allocate and initialize top and mid mfn levels for p2m structure */
1224 xen_build_mfn_list_list();
1225
1226 xen_setup_shared_info();
1224 xen_post_allocator_init(); 1227 xen_post_allocator_init();
1225} 1228}
1226static void xen_write_cr2(unsigned long cr2) 1229static void xen_write_cr2(unsigned long cr2)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 9f5983b01ed9..b456b048eca9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -163,6 +163,7 @@
163#include <linux/hash.h> 163#include <linux/hash.h>
164#include <linux/sched.h> 164#include <linux/sched.h>
165#include <linux/seq_file.h> 165#include <linux/seq_file.h>
166#include <linux/bootmem.h>
166 167
167#include <asm/cache.h> 168#include <asm/cache.h>
168#include <asm/setup.h> 169#include <asm/setup.h>
@@ -181,21 +182,20 @@ static void __init m2p_override_init(void);
181 182
182unsigned long xen_max_p2m_pfn __read_mostly; 183unsigned long xen_max_p2m_pfn __read_mostly;
183 184
185static unsigned long *p2m_mid_missing_mfn;
186static unsigned long *p2m_top_mfn;
187static unsigned long **p2m_top_mfn_p;
188
184/* Placeholders for holes in the address space */ 189/* Placeholders for holes in the address space */
185static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); 190static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
186static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); 191static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
187static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE);
188 192
189static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); 193static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
190static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
191static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
192 194
193static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); 195static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
194static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); 196static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE);
195static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
196 197
197RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 198RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
198RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
199 199
200/* For each I/O range remapped we may lose up to two leaf pages for the boundary 200/* For each I/O range remapped we may lose up to two leaf pages for the boundary
201 * violations and three mid pages to cover up to 3GB. With 201 * violations and three mid pages to cover up to 3GB. With
@@ -272,11 +272,11 @@ static void p2m_init(unsigned long *p2m)
272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures 272 * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
273 * 273 *
274 * This is called both at boot time, and after resuming from suspend: 274 * This is called both at boot time, and after resuming from suspend:
275 * - At boot time we're called very early, and must use extend_brk() 275 * - At boot time we're called rather early, and must use alloc_bootmem*()
276 * to allocate memory. 276 * to allocate memory.
277 * 277 *
278 * - After resume we're called from within stop_machine, but the mfn 278 * - After resume we're called from within stop_machine, but the mfn
279 * tree should alreay be completely allocated. 279 * tree should already be completely allocated.
280 */ 280 */
281void __ref xen_build_mfn_list_list(void) 281void __ref xen_build_mfn_list_list(void)
282{ 282{
@@ -287,20 +287,17 @@ void __ref xen_build_mfn_list_list(void)
287 287
288 /* Pre-initialize p2m_top_mfn to be completely missing */ 288 /* Pre-initialize p2m_top_mfn to be completely missing */
289 if (p2m_top_mfn == NULL) { 289 if (p2m_top_mfn == NULL) {
290 p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 290 p2m_mid_missing_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 291 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
292 p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE);
293 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
294 292
295 p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 293 p2m_top_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
296 p2m_top_mfn_p_init(p2m_top_mfn_p); 294 p2m_top_mfn_p_init(p2m_top_mfn_p);
297 295
298 p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); 296 p2m_top_mfn = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
299 p2m_top_mfn_init(p2m_top_mfn); 297 p2m_top_mfn_init(p2m_top_mfn);
300 } else { 298 } else {
301 /* Reinitialise, mfn's all change after migration */ 299 /* Reinitialise, mfn's all change after migration */
302 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); 300 p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
303 p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity);
304 } 301 }
305 302
306 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { 303 for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) {
@@ -328,10 +325,9 @@ void __ref xen_build_mfn_list_list(void)
328 /* 325 /*
329 * XXX boot-time only! We should never find 326 * XXX boot-time only! We should never find
330 * missing parts of the mfn tree after 327 * missing parts of the mfn tree after
331 * runtime. extend_brk() will BUG if we call 328 * runtime.
332 * it too late.
333 */ 329 */
334 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); 330 mid_mfn_p = alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
335 p2m_mid_mfn_init(mid_mfn_p, p2m_missing); 331 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
336 332
337 p2m_top_mfn_p[topidx] = mid_mfn_p; 333 p2m_top_mfn_p[topidx] = mid_mfn_p;
@@ -415,7 +411,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
415 m2p_override_init(); 411 m2p_override_init();
416} 412}
417#ifdef CONFIG_X86_64 413#ifdef CONFIG_X86_64
418#include <linux/bootmem.h>
419unsigned long __init xen_revector_p2m_tree(void) 414unsigned long __init xen_revector_p2m_tree(void)
420{ 415{
421 unsigned long va_start; 416 unsigned long va_start;
@@ -477,7 +472,6 @@ unsigned long __init xen_revector_p2m_tree(void)
477 472
478 copy_page(new, mid_p); 473 copy_page(new, mid_p);
479 p2m_top[topidx][mididx] = &mfn_list[pfn_free]; 474 p2m_top[topidx][mididx] = &mfn_list[pfn_free];
480 p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
481 475
482 pfn_free += P2M_PER_PAGE; 476 pfn_free += P2M_PER_PAGE;
483 477
@@ -538,12 +532,13 @@ static bool alloc_p2m(unsigned long pfn)
538 unsigned topidx, mididx; 532 unsigned topidx, mididx;
539 unsigned long ***top_p, **mid; 533 unsigned long ***top_p, **mid;
540 unsigned long *top_mfn_p, *mid_mfn; 534 unsigned long *top_mfn_p, *mid_mfn;
535 unsigned long *p2m_orig;
541 536
542 topidx = p2m_top_index(pfn); 537 topidx = p2m_top_index(pfn);
543 mididx = p2m_mid_index(pfn); 538 mididx = p2m_mid_index(pfn);
544 539
545 top_p = &p2m_top[topidx]; 540 top_p = &p2m_top[topidx];
546 mid = *top_p; 541 mid = ACCESS_ONCE(*top_p);
547 542
548 if (mid == p2m_mid_missing) { 543 if (mid == p2m_mid_missing) {
549 /* Mid level is missing, allocate a new one */ 544 /* Mid level is missing, allocate a new one */
@@ -558,7 +553,7 @@ static bool alloc_p2m(unsigned long pfn)
558 } 553 }
559 554
560 top_mfn_p = &p2m_top_mfn[topidx]; 555 top_mfn_p = &p2m_top_mfn[topidx];
561 mid_mfn = p2m_top_mfn_p[topidx]; 556 mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]);
562 557
563 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); 558 BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
564 559
@@ -566,6 +561,7 @@ static bool alloc_p2m(unsigned long pfn)
566 /* Separately check the mid mfn level */ 561 /* Separately check the mid mfn level */
567 unsigned long missing_mfn; 562 unsigned long missing_mfn;
568 unsigned long mid_mfn_mfn; 563 unsigned long mid_mfn_mfn;
564 unsigned long old_mfn;
569 565
570 mid_mfn = alloc_p2m_page(); 566 mid_mfn = alloc_p2m_page();
571 if (!mid_mfn) 567 if (!mid_mfn)
@@ -575,17 +571,19 @@ static bool alloc_p2m(unsigned long pfn)
575 571
576 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); 572 missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
577 mid_mfn_mfn = virt_to_mfn(mid_mfn); 573 mid_mfn_mfn = virt_to_mfn(mid_mfn);
578 if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) 574 old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
575 if (old_mfn != missing_mfn) {
579 free_p2m_page(mid_mfn); 576 free_p2m_page(mid_mfn);
580 else 577 mid_mfn = mfn_to_virt(old_mfn);
578 } else {
581 p2m_top_mfn_p[topidx] = mid_mfn; 579 p2m_top_mfn_p[topidx] = mid_mfn;
580 }
582 } 581 }
583 582
584 if (p2m_top[topidx][mididx] == p2m_identity || 583 p2m_orig = ACCESS_ONCE(p2m_top[topidx][mididx]);
585 p2m_top[topidx][mididx] == p2m_missing) { 584 if (p2m_orig == p2m_identity || p2m_orig == p2m_missing) {
586 /* p2m leaf page is missing */ 585 /* p2m leaf page is missing */
587 unsigned long *p2m; 586 unsigned long *p2m;
588 unsigned long *p2m_orig = p2m_top[topidx][mididx];
589 587
590 p2m = alloc_p2m_page(); 588 p2m = alloc_p2m_page();
591 if (!p2m) 589 if (!p2m)
@@ -606,7 +604,6 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
606{ 604{
607 unsigned topidx, mididx, idx; 605 unsigned topidx, mididx, idx;
608 unsigned long *p2m; 606 unsigned long *p2m;
609 unsigned long *mid_mfn_p;
610 607
611 topidx = p2m_top_index(pfn); 608 topidx = p2m_top_index(pfn);
612 mididx = p2m_mid_index(pfn); 609 mididx = p2m_mid_index(pfn);
@@ -633,43 +630,21 @@ static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary)
633 630
634 p2m_top[topidx][mididx] = p2m; 631 p2m_top[topidx][mididx] = p2m;
635 632
636 /* For save/restore we need to MFN of the P2M saved */
637
638 mid_mfn_p = p2m_top_mfn_p[topidx];
639 WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
640 "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
641 topidx, mididx);
642 mid_mfn_p[mididx] = virt_to_mfn(p2m);
643
644 return true; 633 return true;
645} 634}
646 635
647static bool __init early_alloc_p2m_middle(unsigned long pfn) 636static bool __init early_alloc_p2m_middle(unsigned long pfn)
648{ 637{
649 unsigned topidx = p2m_top_index(pfn); 638 unsigned topidx = p2m_top_index(pfn);
650 unsigned long *mid_mfn_p;
651 unsigned long **mid; 639 unsigned long **mid;
652 640
653 mid = p2m_top[topidx]; 641 mid = p2m_top[topidx];
654 mid_mfn_p = p2m_top_mfn_p[topidx];
655 if (mid == p2m_mid_missing) { 642 if (mid == p2m_mid_missing) {
656 mid = extend_brk(PAGE_SIZE, PAGE_SIZE); 643 mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
657 644
658 p2m_mid_init(mid, p2m_missing); 645 p2m_mid_init(mid, p2m_missing);
659 646
660 p2m_top[topidx] = mid; 647 p2m_top[topidx] = mid;
661
662 BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
663 }
664 /* And the save/restore P2M tables.. */
665 if (mid_mfn_p == p2m_mid_missing_mfn) {
666 mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
667 p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
668
669 p2m_top_mfn_p[topidx] = mid_mfn_p;
670 p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
671 /* Note: we don't set mid_mfn_p[midix] here,
672 * look in early_alloc_p2m() */
673 } 648 }
674 return true; 649 return true;
675} 650}
@@ -680,14 +655,13 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn)
680 * replace the P2M leaf with a p2m_missing or p2m_identity. 655 * replace the P2M leaf with a p2m_missing or p2m_identity.
681 * Stick the old page in the new P2M tree location. 656 * Stick the old page in the new P2M tree location.
682 */ 657 */
683bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) 658static bool __init early_can_reuse_p2m_middle(unsigned long set_pfn)
684{ 659{
685 unsigned topidx; 660 unsigned topidx;
686 unsigned mididx; 661 unsigned mididx;
687 unsigned ident_pfns; 662 unsigned ident_pfns;
688 unsigned inv_pfns; 663 unsigned inv_pfns;
689 unsigned long *p2m; 664 unsigned long *p2m;
690 unsigned long *mid_mfn_p;
691 unsigned idx; 665 unsigned idx;
692 unsigned long pfn; 666 unsigned long pfn;
693 667
@@ -733,11 +707,6 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
733found: 707found:
734 /* Found one, replace old with p2m_identity or p2m_missing */ 708 /* Found one, replace old with p2m_identity or p2m_missing */
735 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); 709 p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing);
736 /* And the other for save/restore.. */
737 mid_mfn_p = p2m_top_mfn_p[topidx];
738 /* NOTE: Even if it is a p2m_identity it should still be point to
739 * a page filled with INVALID_P2M_ENTRY entries. */
740 mid_mfn_p[mididx] = virt_to_mfn(p2m_missing);
741 710
742 /* Reset where we want to stick the old page in. */ 711 /* Reset where we want to stick the old page in. */
743 topidx = p2m_top_index(set_pfn); 712 topidx = p2m_top_index(set_pfn);
@@ -752,8 +721,6 @@ found:
752 721
753 p2m_init(p2m); 722 p2m_init(p2m);
754 p2m_top[topidx][mididx] = p2m; 723 p2m_top[topidx][mididx] = p2m;
755 mid_mfn_p = p2m_top_mfn_p[topidx];
756 mid_mfn_p[mididx] = virt_to_mfn(p2m);
757 724
758 return true; 725 return true;
759} 726}
@@ -763,7 +730,7 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn)
763 if (!early_alloc_p2m_middle(pfn)) 730 if (!early_alloc_p2m_middle(pfn))
764 return false; 731 return false;
765 732
766 if (early_can_reuse_p2m_middle(pfn, mfn)) 733 if (early_can_reuse_p2m_middle(pfn))
767 return __set_phys_to_machine(pfn, mfn); 734 return __set_phys_to_machine(pfn, mfn);
768 735
769 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) 736 if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/))
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index af7216128d93..29834b3fd87f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -595,6 +595,7 @@ char * __init xen_memory_setup(void)
595 rc = 0; 595 rc = 0;
596 } 596 }
597 BUG_ON(rc); 597 BUG_ON(rc);
598 BUG_ON(memmap.nr_entries == 0);
598 599
599 /* 600 /*
600 * Xen won't allow a 1:1 mapping to be created to UNUSABLE 601 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1d430b112b3..f473d268d387 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void)
158 cycle_t ret; 158 cycle_t ret;
159 159
160 preempt_disable_notrace(); 160 preempt_disable_notrace();
161 src = this_cpu_ptr(&xen_vcpu->time); 161 src = &__this_cpu_read(xen_vcpu)->time;
162 ret = pvclock_clocksource_read(src); 162 ret = pvclock_clocksource_read(src);
163 preempt_enable_notrace(); 163 preempt_enable_notrace();
164 return ret; 164 return ret;
diff --git a/crypto/cts.c b/crypto/cts.c
index 042223f8e733..133f0874c95e 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -202,7 +202,8 @@ static int cts_cbc_decrypt(struct crypto_cts_ctx *ctx,
202 /* 5. Append the tail (BB - Ln) bytes of Xn (tmp) to Cn to create En */ 202 /* 5. Append the tail (BB - Ln) bytes of Xn (tmp) to Cn to create En */
203 memcpy(s + bsize + lastn, tmp + lastn, bsize - lastn); 203 memcpy(s + bsize + lastn, tmp + lastn, bsize - lastn);
204 /* 6. Decrypt En to create Pn-1 */ 204 /* 6. Decrypt En to create Pn-1 */
205 memset(iv, 0, sizeof(iv)); 205 memzero_explicit(iv, sizeof(iv));
206
206 sg_set_buf(&sgsrc[0], s + bsize, bsize); 207 sg_set_buf(&sgsrc[0], s + bsize, bsize);
207 sg_set_buf(&sgdst[0], d, bsize); 208 sg_set_buf(&sgdst[0], d, bsize);
208 err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize); 209 err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 42794803c480..7bb047432782 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -64,7 +64,7 @@ int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
64 src = data + done; 64 src = data + done;
65 } while (done + SHA1_BLOCK_SIZE <= len); 65 } while (done + SHA1_BLOCK_SIZE <= len);
66 66
67 memset(temp, 0, sizeof(temp)); 67 memzero_explicit(temp, sizeof(temp));
68 partial = 0; 68 partial = 0;
69 } 69 }
70 memcpy(sctx->buffer + partial, src, len - done); 70 memcpy(sctx->buffer + partial, src, len - done);
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index 0bb558344699..65e7b76b057f 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -211,10 +211,9 @@ static void sha256_transform(u32 *state, const u8 *input)
211 211
212 /* clear any sensitive info... */ 212 /* clear any sensitive info... */
213 a = b = c = d = e = f = g = h = t1 = t2 = 0; 213 a = b = c = d = e = f = g = h = t1 = t2 = 0;
214 memset(W, 0, 64 * sizeof(u32)); 214 memzero_explicit(W, 64 * sizeof(u32));
215} 215}
216 216
217
218static int sha224_init(struct shash_desc *desc) 217static int sha224_init(struct shash_desc *desc)
219{ 218{
220 struct sha256_state *sctx = shash_desc_ctx(desc); 219 struct sha256_state *sctx = shash_desc_ctx(desc);
@@ -317,7 +316,7 @@ static int sha224_final(struct shash_desc *desc, u8 *hash)
317 sha256_final(desc, D); 316 sha256_final(desc, D);
318 317
319 memcpy(hash, D, SHA224_DIGEST_SIZE); 318 memcpy(hash, D, SHA224_DIGEST_SIZE);
320 memset(D, 0, SHA256_DIGEST_SIZE); 319 memzero_explicit(D, SHA256_DIGEST_SIZE);
321 320
322 return 0; 321 return 0;
323} 322}
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 6dde57dc511b..95db67197cd9 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -239,7 +239,7 @@ static int sha384_final(struct shash_desc *desc, u8 *hash)
239 sha512_final(desc, D); 239 sha512_final(desc, D);
240 240
241 memcpy(hash, D, 48); 241 memcpy(hash, D, 48);
242 memset(D, 0, 64); 242 memzero_explicit(D, 64);
243 243
244 return 0; 244 return 0;
245} 245}
diff --git a/crypto/tgr192.c b/crypto/tgr192.c
index 87403556fd0b..3c7af0d1ff7a 100644
--- a/crypto/tgr192.c
+++ b/crypto/tgr192.c
@@ -612,7 +612,7 @@ static int tgr160_final(struct shash_desc *desc, u8 * out)
612 612
613 tgr192_final(desc, D); 613 tgr192_final(desc, D);
614 memcpy(out, D, TGR160_DIGEST_SIZE); 614 memcpy(out, D, TGR160_DIGEST_SIZE);
615 memset(D, 0, TGR192_DIGEST_SIZE); 615 memzero_explicit(D, TGR192_DIGEST_SIZE);
616 616
617 return 0; 617 return 0;
618} 618}
@@ -623,7 +623,7 @@ static int tgr128_final(struct shash_desc *desc, u8 * out)
623 623
624 tgr192_final(desc, D); 624 tgr192_final(desc, D);
625 memcpy(out, D, TGR128_DIGEST_SIZE); 625 memcpy(out, D, TGR128_DIGEST_SIZE);
626 memset(D, 0, TGR192_DIGEST_SIZE); 626 memzero_explicit(D, TGR192_DIGEST_SIZE);
627 627
628 return 0; 628 return 0;
629} 629}
diff --git a/crypto/vmac.c b/crypto/vmac.c
index 2eb11a30c29c..d84c24bd7ff7 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -613,7 +613,7 @@ static int vmac_final(struct shash_desc *pdesc, u8 *out)
613 } 613 }
614 mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx); 614 mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx);
615 memcpy(out, &mac, sizeof(vmac_t)); 615 memcpy(out, &mac, sizeof(vmac_t));
616 memset(&mac, 0, sizeof(vmac_t)); 616 memzero_explicit(&mac, sizeof(vmac_t));
617 memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); 617 memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
618 ctx->partial_size = 0; 618 ctx->partial_size = 0;
619 return 0; 619 return 0;
diff --git a/crypto/wp512.c b/crypto/wp512.c
index 180f1d6e03f4..ec64e7762fbb 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1102,8 +1102,8 @@ static int wp384_final(struct shash_desc *desc, u8 *out)
1102 u8 D[64]; 1102 u8 D[64];
1103 1103
1104 wp512_final(desc, D); 1104 wp512_final(desc, D);
1105 memcpy (out, D, WP384_DIGEST_SIZE); 1105 memcpy(out, D, WP384_DIGEST_SIZE);
1106 memset (D, 0, WP512_DIGEST_SIZE); 1106 memzero_explicit(D, WP512_DIGEST_SIZE);
1107 1107
1108 return 0; 1108 return 0;
1109} 1109}
@@ -1113,8 +1113,8 @@ static int wp256_final(struct shash_desc *desc, u8 *out)
1113 u8 D[64]; 1113 u8 D[64];
1114 1114
1115 wp512_final(desc, D); 1115 wp512_final(desc, D);
1116 memcpy (out, D, WP256_DIGEST_SIZE); 1116 memcpy(out, D, WP256_DIGEST_SIZE);
1117 memset (D, 0, WP512_DIGEST_SIZE); 1117 memzero_explicit(D, WP512_DIGEST_SIZE);
1118 1118
1119 return 0; 1119 return 0;
1120} 1120}
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index d0f3265fb85d..b23fe37f67c0 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -144,7 +144,7 @@ config ACPI_VIDEO
144 144
145config ACPI_FAN 145config ACPI_FAN
146 tristate "Fan" 146 tristate "Fan"
147 select THERMAL 147 depends on THERMAL
148 default y 148 default y
149 help 149 help
150 This driver supports ACPI fan devices, allowing user-mode 150 This driver supports ACPI fan devices, allowing user-mode
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 505d4d79fe3e..c3b2fcb729f3 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -43,6 +43,7 @@ acpi-y += pci_root.o pci_link.o pci_irq.o
43acpi-y += acpi_lpss.o 43acpi-y += acpi_lpss.o
44acpi-y += acpi_platform.o 44acpi-y += acpi_platform.o
45acpi-y += acpi_pnp.o 45acpi-y += acpi_pnp.o
46acpi-y += int340x_thermal.o
46acpi-y += power.o 47acpi-y += power.o
47acpi-y += event.o 48acpi-y += event.o
48acpi-y += sysfs.o 49acpi-y += sysfs.o
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 2bf9082f7523..6ba8beb6b9d2 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -16,6 +16,7 @@
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/dma-mapping.h>
19#include <linux/platform_device.h> 20#include <linux/platform_device.h>
20 21
21#include "internal.h" 22#include "internal.h"
@@ -102,6 +103,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
102 pdevinfo.res = resources; 103 pdevinfo.res = resources;
103 pdevinfo.num_res = count; 104 pdevinfo.num_res = count;
104 pdevinfo.acpi_node.companion = adev; 105 pdevinfo.acpi_node.companion = adev;
106 pdevinfo.dma_mask = DMA_BIT_MASK(32);
105 pdev = platform_device_register_full(&pdevinfo); 107 pdev = platform_device_register_full(&pdevinfo);
106 if (IS_ERR(pdev)) 108 if (IS_ERR(pdev))
107 dev_err(&adev->dev, "platform device creation failed: %ld\n", 109 dev_err(&adev->dev, "platform device creation failed: %ld\n",
@@ -113,3 +115,4 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
113 kfree(resources); 115 kfree(resources);
114 return pdev; 116 return pdev;
115} 117}
118EXPORT_SYMBOL_GPL(acpi_create_platform_device);
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 2ad2351a9833..c318d3e27893 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -127,7 +127,7 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
127 127
128acpi_status 128acpi_status
129acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, 129acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
130 acpi_event_status * event_status); 130 acpi_event_status *event_status);
131 131
132acpi_status acpi_hw_disable_all_gpes(void); 132acpi_status acpi_hw_disable_all_gpes(void);
133 133
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 2747279fbe3c..c00e7e41ad75 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -413,8 +413,8 @@ struct acpi_gpe_handler_info {
413 acpi_gpe_handler address; /* Address of handler, if any */ 413 acpi_gpe_handler address; /* Address of handler, if any */
414 void *context; /* Context to be passed to handler */ 414 void *context; /* Context to be passed to handler */
415 struct acpi_namespace_node *method_node; /* Method node for this GPE level (saved) */ 415 struct acpi_namespace_node *method_node; /* Method node for this GPE level (saved) */
416 u8 original_flags; /* Original (pre-handler) GPE info */ 416 u8 original_flags; /* Original (pre-handler) GPE info */
417 u8 originally_enabled; /* True if GPE was originally enabled */ 417 u8 originally_enabled; /* True if GPE was originally enabled */
418}; 418};
419 419
420/* Notify info for implicit notify, multiple device objects */ 420/* Notify info for implicit notify, multiple device objects */
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index f14882788eee..1afe46e44dac 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -49,6 +49,8 @@ acpi_status acpi_allocate_root_table(u32 initial_table_count);
49/* 49/*
50 * tbxfroot - Root pointer utilities 50 * tbxfroot - Root pointer utilities
51 */ 51 */
52u32 acpi_tb_get_rsdp_length(struct acpi_table_rsdp *rsdp);
53
52acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp); 54acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp);
53 55
54u8 *acpi_tb_scan_memory_for_rsdp(u8 *start_address, u32 length); 56u8 *acpi_tb_scan_memory_for_rsdp(u8 *start_address, u32 length);
diff --git a/drivers/acpi/acpica/amlresrc.h b/drivers/acpi/acpica/amlresrc.h
index f3f834408441..3a0beeb86ba5 100644
--- a/drivers/acpi/acpica/amlresrc.h
+++ b/drivers/acpi/acpica/amlresrc.h
@@ -117,6 +117,12 @@ struct asl_resource_node {
117 struct asl_resource_node *next; 117 struct asl_resource_node *next;
118}; 118};
119 119
120struct asl_resource_info {
121 union acpi_parse_object *descriptor_type_op; /* Resource descriptor parse node */
122 union acpi_parse_object *mapping_op; /* Used for mapfile support */
123 u32 current_byte_offset; /* Offset in resource template */
124};
125
120/* Macros used to generate AML resource length fields */ 126/* Macros used to generate AML resource length fields */
121 127
122#define ACPI_AML_SIZE_LARGE(r) (sizeof (r) - sizeof (struct aml_resource_large_header)) 128#define ACPI_AML_SIZE_LARGE(r) (sizeof (r) - sizeof (struct aml_resource_large_header))
@@ -449,4 +455,32 @@ union aml_resource {
449 u8 byte_item; 455 u8 byte_item;
450}; 456};
451 457
458/* Interfaces used by both the disassembler and compiler */
459
460void
461mp_save_gpio_info(union acpi_parse_object *op,
462 union aml_resource *resource,
463 u32 pin_count, u16 *pin_list, char *device_name);
464
465void
466mp_save_serial_info(union acpi_parse_object *op,
467 union aml_resource *resource, char *device_name);
468
469char *mp_get_hid_from_parse_tree(struct acpi_namespace_node *hid_node);
470
471char *mp_get_hid_via_namestring(char *device_name);
472
473char *mp_get_connection_info(union acpi_parse_object *op,
474 u32 pin_index,
475 struct acpi_namespace_node **target_node,
476 char **target_name);
477
478char *mp_get_parent_device_hid(union acpi_parse_object *op,
479 struct acpi_namespace_node **target_node,
480 char **parent_device_name);
481
482char *mp_get_ddn_value(char *device_name);
483
484char *mp_get_hid_value(struct acpi_namespace_node *device_node);
485
452#endif 486#endif
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index e4ba4dec86af..2095dfb72bcb 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -100,13 +100,14 @@ acpi_ev_update_gpe_enable_mask(struct acpi_gpe_event_info *gpe_event_info)
100 * 100 *
101 * FUNCTION: acpi_ev_enable_gpe 101 * FUNCTION: acpi_ev_enable_gpe
102 * 102 *
103 * PARAMETERS: gpe_event_info - GPE to enable 103 * PARAMETERS: gpe_event_info - GPE to enable
104 * 104 *
105 * RETURN: Status 105 * RETURN: Status
106 * 106 *
107 * DESCRIPTION: Clear a GPE of stale events and enable it. 107 * DESCRIPTION: Clear a GPE of stale events and enable it.
108 * 108 *
109 ******************************************************************************/ 109 ******************************************************************************/
110
110acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info) 111acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
111{ 112{
112 acpi_status status; 113 acpi_status status;
@@ -125,6 +126,7 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
125 } 126 }
126 127
127 /* Clear the GPE (of stale events) */ 128 /* Clear the GPE (of stale events) */
129
128 status = acpi_hw_clear_gpe(gpe_event_info); 130 status = acpi_hw_clear_gpe(gpe_event_info);
129 if (ACPI_FAILURE(status)) { 131 if (ACPI_FAILURE(status)) {
130 return_ACPI_STATUS(status); 132 return_ACPI_STATUS(status);
@@ -136,7 +138,6 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
136 return_ACPI_STATUS(status); 138 return_ACPI_STATUS(status);
137} 139}
138 140
139
140/******************************************************************************* 141/*******************************************************************************
141 * 142 *
142 * FUNCTION: acpi_ev_add_gpe_reference 143 * FUNCTION: acpi_ev_add_gpe_reference
@@ -212,7 +213,7 @@ acpi_ev_remove_gpe_reference(struct acpi_gpe_event_info *gpe_event_info)
212 if (ACPI_SUCCESS(status)) { 213 if (ACPI_SUCCESS(status)) {
213 status = 214 status =
214 acpi_hw_low_set_gpe(gpe_event_info, 215 acpi_hw_low_set_gpe(gpe_event_info,
215 ACPI_GPE_DISABLE); 216 ACPI_GPE_DISABLE);
216 } 217 }
217 218
218 if (ACPI_FAILURE(status)) { 219 if (ACPI_FAILURE(status)) {
@@ -334,7 +335,7 @@ struct acpi_gpe_event_info *acpi_ev_get_gpe_event_info(acpi_handle gpe_device,
334 * 335 *
335 ******************************************************************************/ 336 ******************************************************************************/
336 337
337u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list) 338u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info *gpe_xrupt_list)
338{ 339{
339 acpi_status status; 340 acpi_status status;
340 struct acpi_gpe_block_info *gpe_block; 341 struct acpi_gpe_block_info *gpe_block;
@@ -427,7 +428,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
427 428
428 /* Check if there is anything active at all in this register */ 429 /* Check if there is anything active at all in this register */
429 430
430 enabled_status_byte = (u8) (status_reg & enable_reg); 431 enabled_status_byte = (u8)(status_reg & enable_reg);
431 if (!enabled_status_byte) { 432 if (!enabled_status_byte) {
432 433
433 /* No active GPEs in this register, move on */ 434 /* No active GPEs in this register, move on */
@@ -450,7 +451,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
450 acpi_ev_gpe_dispatch(gpe_block-> 451 acpi_ev_gpe_dispatch(gpe_block->
451 node, 452 node,
452 &gpe_block-> 453 &gpe_block->
453 event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number); 454 event_info[((acpi_size) i * ACPI_GPE_REGISTER_WIDTH) + j], j + gpe_register_info->base_gpe_number);
454 } 455 }
455 } 456 }
456 } 457 }
@@ -636,7 +637,7 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_enable_gpe(void *context)
636 * 637 *
637 ******************************************************************************/ 638 ******************************************************************************/
638 639
639acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info *gpe_event_info) 640acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info * gpe_event_info)
640{ 641{
641 acpi_status status; 642 acpi_status status;
642 643
@@ -666,9 +667,9 @@ acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info *gpe_event_info)
666 * 667 *
667 * FUNCTION: acpi_ev_gpe_dispatch 668 * FUNCTION: acpi_ev_gpe_dispatch
668 * 669 *
669 * PARAMETERS: gpe_device - Device node. NULL for GPE0/GPE1 670 * PARAMETERS: gpe_device - Device node. NULL for GPE0/GPE1
670 * gpe_event_info - Info for this GPE 671 * gpe_event_info - Info for this GPE
671 * gpe_number - Number relative to the parent GPE block 672 * gpe_number - Number relative to the parent GPE block
672 * 673 *
673 * RETURN: INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED 674 * RETURN: INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED
674 * 675 *
@@ -681,7 +682,7 @@ acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info *gpe_event_info)
681 682
682u32 683u32
683acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device, 684acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device,
684 struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number) 685 struct acpi_gpe_event_info *gpe_event_info, u32 gpe_number)
685{ 686{
686 acpi_status status; 687 acpi_status status;
687 u32 return_value; 688 u32 return_value;
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 49fc7effd961..7be928379879 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -424,6 +424,7 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
424 } 424 }
425 425
426 /* Disable the GPE in case it's been enabled already. */ 426 /* Disable the GPE in case it's been enabled already. */
427
427 (void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE); 428 (void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
428 429
429 /* 430 /*
diff --git a/drivers/acpi/acpica/evxface.c b/drivers/acpi/acpica/evxface.c
index 11e5803b8b41..55a58f3ec8df 100644
--- a/drivers/acpi/acpica/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -786,18 +786,26 @@ acpi_install_gpe_handler(acpi_handle gpe_device,
786 handler->method_node = gpe_event_info->dispatch.method_node; 786 handler->method_node = gpe_event_info->dispatch.method_node;
787 handler->original_flags = (u8)(gpe_event_info->flags & 787 handler->original_flags = (u8)(gpe_event_info->flags &
788 (ACPI_GPE_XRUPT_TYPE_MASK | 788 (ACPI_GPE_XRUPT_TYPE_MASK |
789 ACPI_GPE_DISPATCH_MASK)); 789 ACPI_GPE_DISPATCH_MASK));
790 790
791 /* 791 /*
792 * If the GPE is associated with a method, it may have been enabled 792 * If the GPE is associated with a method, it may have been enabled
793 * automatically during initialization, in which case it has to be 793 * automatically during initialization, in which case it has to be
794 * disabled now to avoid spurious execution of the handler. 794 * disabled now to avoid spurious execution of the handler.
795 */ 795 */
796 796 if (((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) ||
797 if ((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) 797 (handler->original_flags & ACPI_GPE_DISPATCH_NOTIFY)) &&
798 && gpe_event_info->runtime_count) { 798 gpe_event_info->runtime_count) {
799 handler->originally_enabled = 1; 799 handler->originally_enabled = TRUE;
800 (void)acpi_ev_remove_gpe_reference(gpe_event_info); 800 (void)acpi_ev_remove_gpe_reference(gpe_event_info);
801
802 /* Sanity check of original type against new type */
803
804 if (type !=
805 (u32)(gpe_event_info->flags & ACPI_GPE_XRUPT_TYPE_MASK)) {
806 ACPI_WARNING((AE_INFO,
807 "GPE type mismatch (level/edge)"));
808 }
801 } 809 }
802 810
803 /* Install the handler */ 811 /* Install the handler */
@@ -808,7 +816,7 @@ acpi_install_gpe_handler(acpi_handle gpe_device,
808 816
809 gpe_event_info->flags &= 817 gpe_event_info->flags &=
810 ~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK); 818 ~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK);
811 gpe_event_info->flags |= (u8) (type | ACPI_GPE_DISPATCH_HANDLER); 819 gpe_event_info->flags |= (u8)(type | ACPI_GPE_DISPATCH_HANDLER);
812 820
813 acpi_os_release_lock(acpi_gbl_gpe_lock, flags); 821 acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
814 822
@@ -893,7 +901,7 @@ acpi_remove_gpe_handler(acpi_handle gpe_device,
893 901
894 gpe_event_info->dispatch.method_node = handler->method_node; 902 gpe_event_info->dispatch.method_node = handler->method_node;
895 gpe_event_info->flags &= 903 gpe_event_info->flags &=
896 ~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK); 904 ~(ACPI_GPE_XRUPT_TYPE_MASK | ACPI_GPE_DISPATCH_MASK);
897 gpe_event_info->flags |= handler->original_flags; 905 gpe_event_info->flags |= handler->original_flags;
898 906
899 /* 907 /*
@@ -901,7 +909,8 @@ acpi_remove_gpe_handler(acpi_handle gpe_device,
901 * enabled, it should be enabled at this point to restore the 909 * enabled, it should be enabled at this point to restore the
902 * post-initialization configuration. 910 * post-initialization configuration.
903 */ 911 */
904 if ((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) && 912 if (((handler->original_flags & ACPI_GPE_DISPATCH_METHOD) ||
913 (handler->original_flags & ACPI_GPE_DISPATCH_NOTIFY)) &&
905 handler->originally_enabled) { 914 handler->originally_enabled) {
906 (void)acpi_ev_add_gpe_reference(gpe_event_info); 915 (void)acpi_ev_add_gpe_reference(gpe_event_info);
907 } 916 }
@@ -946,7 +955,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_gpe_handler)
946 * handle is returned. 955 * handle is returned.
947 * 956 *
948 ******************************************************************************/ 957 ******************************************************************************/
949acpi_status acpi_acquire_global_lock(u16 timeout, u32 * handle) 958acpi_status acpi_acquire_global_lock(u16 timeout, u32 *handle)
950{ 959{
951 acpi_status status; 960 acpi_status status;
952 961
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index e286640ad4ff..bb8cbf5961bf 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -324,8 +324,9 @@ ACPI_EXPORT_SYMBOL(acpi_clear_event)
324 ******************************************************************************/ 324 ******************************************************************************/
325acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status) 325acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status)
326{ 326{
327 acpi_status status = AE_OK; 327 acpi_status status;
328 u32 value; 328 acpi_event_status local_event_status = 0;
329 u32 in_byte;
329 330
330 ACPI_FUNCTION_TRACE(acpi_get_event_status); 331 ACPI_FUNCTION_TRACE(acpi_get_event_status);
331 332
@@ -339,29 +340,40 @@ acpi_status acpi_get_event_status(u32 event, acpi_event_status * event_status)
339 return_ACPI_STATUS(AE_BAD_PARAMETER); 340 return_ACPI_STATUS(AE_BAD_PARAMETER);
340 } 341 }
341 342
342 /* Get the status of the requested fixed event */ 343 /* Fixed event currently can be dispatched? */
344
345 if (acpi_gbl_fixed_event_handlers[event].handler) {
346 local_event_status |= ACPI_EVENT_FLAG_HAS_HANDLER;
347 }
348
349 /* Fixed event currently enabled? */
343 350
344 status = 351 status =
345 acpi_read_bit_register(acpi_gbl_fixed_event_info[event]. 352 acpi_read_bit_register(acpi_gbl_fixed_event_info[event].
346 enable_register_id, &value); 353 enable_register_id, &in_byte);
347 if (ACPI_FAILURE(status)) 354 if (ACPI_FAILURE(status)) {
348 return_ACPI_STATUS(status); 355 return_ACPI_STATUS(status);
356 }
349 357
350 *event_status = value; 358 if (in_byte) {
359 local_event_status |= ACPI_EVENT_FLAG_ENABLED;
360 }
361
362 /* Fixed event currently active? */
351 363
352 status = 364 status =
353 acpi_read_bit_register(acpi_gbl_fixed_event_info[event]. 365 acpi_read_bit_register(acpi_gbl_fixed_event_info[event].
354 status_register_id, &value); 366 status_register_id, &in_byte);
355 if (ACPI_FAILURE(status)) 367 if (ACPI_FAILURE(status)) {
356 return_ACPI_STATUS(status); 368 return_ACPI_STATUS(status);
369 }
357 370
358 if (value) 371 if (in_byte) {
359 *event_status |= ACPI_EVENT_FLAG_SET; 372 local_event_status |= ACPI_EVENT_FLAG_SET;
360 373 }
361 if (acpi_gbl_fixed_event_handlers[event].handler)
362 *event_status |= ACPI_EVENT_FLAG_HANDLE;
363 374
364 return_ACPI_STATUS(status); 375 (*event_status) = local_event_status;
376 return_ACPI_STATUS(AE_OK);
365} 377}
366 378
367ACPI_EXPORT_SYMBOL(acpi_get_event_status) 379ACPI_EXPORT_SYMBOL(acpi_get_event_status)
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 56710a03c9b0..e889a5304abd 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -106,8 +106,8 @@ ACPI_EXPORT_SYMBOL(acpi_update_all_gpes)
106 * 106 *
107 * FUNCTION: acpi_enable_gpe 107 * FUNCTION: acpi_enable_gpe
108 * 108 *
109 * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1 109 * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
110 * gpe_number - GPE level within the GPE block 110 * gpe_number - GPE level within the GPE block
111 * 111 *
112 * RETURN: Status 112 * RETURN: Status
113 * 113 *
@@ -115,7 +115,6 @@ ACPI_EXPORT_SYMBOL(acpi_update_all_gpes)
115 * hardware-enabled. 115 * hardware-enabled.
116 * 116 *
117 ******************************************************************************/ 117 ******************************************************************************/
118
119acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number) 118acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number)
120{ 119{
121 acpi_status status = AE_BAD_PARAMETER; 120 acpi_status status = AE_BAD_PARAMETER;
@@ -490,8 +489,8 @@ ACPI_EXPORT_SYMBOL(acpi_clear_gpe)
490 * 489 *
491 * FUNCTION: acpi_get_gpe_status 490 * FUNCTION: acpi_get_gpe_status
492 * 491 *
493 * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1 492 * PARAMETERS: gpe_device - Parent GPE Device. NULL for GPE0/GPE1
494 * gpe_number - GPE level within the GPE block 493 * gpe_number - GPE level within the GPE block
495 * event_status - Where the current status of the event 494 * event_status - Where the current status of the event
496 * will be returned 495 * will be returned
497 * 496 *
@@ -524,9 +523,6 @@ acpi_get_gpe_status(acpi_handle gpe_device,
524 523
525 status = acpi_hw_get_gpe_status(gpe_event_info, event_status); 524 status = acpi_hw_get_gpe_status(gpe_event_info, event_status);
526 525
527 if (gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK)
528 *event_status |= ACPI_EVENT_FLAG_HANDLE;
529
530unlock_and_exit: 526unlock_and_exit:
531 acpi_os_release_lock(acpi_gbl_gpe_lock, flags); 527 acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
532 return_ACPI_STATUS(status); 528 return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index ea62d40fd161..48ac7b7b59cd 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -202,7 +202,7 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
202 202
203acpi_status 203acpi_status
204acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info, 204acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
205 acpi_event_status * event_status) 205 acpi_event_status *event_status)
206{ 206{
207 u32 in_byte; 207 u32 in_byte;
208 u32 register_bit; 208 u32 register_bit;
@@ -216,6 +216,13 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
216 return (AE_BAD_PARAMETER); 216 return (AE_BAD_PARAMETER);
217 } 217 }
218 218
219 /* GPE currently handled? */
220
221 if ((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) !=
222 ACPI_GPE_DISPATCH_NONE) {
223 local_event_status |= ACPI_EVENT_FLAG_HAS_HANDLER;
224 }
225
219 /* Get the info block for the entire GPE register */ 226 /* Get the info block for the entire GPE register */
220 227
221 gpe_register_info = gpe_event_info->register_info; 228 gpe_register_info = gpe_event_info->register_info;
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index 65ab8fed3d5e..43a54af2b548 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -50,6 +50,36 @@ ACPI_MODULE_NAME("tbxfroot")
50 50
51/******************************************************************************* 51/*******************************************************************************
52 * 52 *
53 * FUNCTION: acpi_tb_get_rsdp_length
54 *
55 * PARAMETERS: rsdp - Pointer to RSDP
56 *
57 * RETURN: Table length
58 *
59 * DESCRIPTION: Get the length of the RSDP
60 *
61 ******************************************************************************/
62u32 acpi_tb_get_rsdp_length(struct acpi_table_rsdp *rsdp)
63{
64
65 if (!ACPI_VALIDATE_RSDP_SIG(rsdp->signature)) {
66
67 /* BAD Signature */
68
69 return (0);
70 }
71
72 /* "Length" field is available if table version >= 2 */
73
74 if (rsdp->revision >= 2) {
75 return (rsdp->length);
76 } else {
77 return (ACPI_RSDP_CHECKSUM_LENGTH);
78 }
79}
80
81/*******************************************************************************
82 *
53 * FUNCTION: acpi_tb_validate_rsdp 83 * FUNCTION: acpi_tb_validate_rsdp
54 * 84 *
55 * PARAMETERS: rsdp - Pointer to unvalidated RSDP 85 * PARAMETERS: rsdp - Pointer to unvalidated RSDP
@@ -59,7 +89,8 @@ ACPI_MODULE_NAME("tbxfroot")
59 * DESCRIPTION: Validate the RSDP (ptr) 89 * DESCRIPTION: Validate the RSDP (ptr)
60 * 90 *
61 ******************************************************************************/ 91 ******************************************************************************/
62acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp) 92
93acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp * rsdp)
63{ 94{
64 95
65 /* 96 /*
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index bea6896be122..143ec6ea1468 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -343,6 +343,7 @@ int acpi_device_update_power(struct acpi_device *device, int *state_p)
343 343
344 return 0; 344 return 0;
345} 345}
346EXPORT_SYMBOL_GPL(acpi_device_update_power);
346 347
347int acpi_bus_update_power(acpi_handle handle, int *state_p) 348int acpi_bus_update_power(acpi_handle handle, int *state_p)
348{ 349{
@@ -710,7 +711,7 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
710 return -ENODEV; 711 return -ENODEV;
711 } 712 }
712 713
713 return acpi_device_wakeup(adev, enable, ACPI_STATE_S0); 714 return acpi_device_wakeup(adev, ACPI_STATE_S0, enable);
714} 715}
715EXPORT_SYMBOL(acpi_pm_device_run_wake); 716EXPORT_SYMBOL(acpi_pm_device_run_wake);
716#endif /* CONFIG_PM_RUNTIME */ 717#endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index cb6066c809ea..3d304ff7f095 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -128,12 +128,13 @@ static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */
128static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ 128static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
129 129
130/* -------------------------------------------------------------------------- 130/* --------------------------------------------------------------------------
131 Transaction Management 131 * Transaction Management
132 -------------------------------------------------------------------------- */ 132 * -------------------------------------------------------------------------- */
133 133
134static inline u8 acpi_ec_read_status(struct acpi_ec *ec) 134static inline u8 acpi_ec_read_status(struct acpi_ec *ec)
135{ 135{
136 u8 x = inb(ec->command_addr); 136 u8 x = inb(ec->command_addr);
137
137 pr_debug("EC_SC(R) = 0x%2.2x " 138 pr_debug("EC_SC(R) = 0x%2.2x "
138 "SCI_EVT=%d BURST=%d CMD=%d IBF=%d OBF=%d\n", 139 "SCI_EVT=%d BURST=%d CMD=%d IBF=%d OBF=%d\n",
139 x, 140 x,
@@ -148,6 +149,7 @@ static inline u8 acpi_ec_read_status(struct acpi_ec *ec)
148static inline u8 acpi_ec_read_data(struct acpi_ec *ec) 149static inline u8 acpi_ec_read_data(struct acpi_ec *ec)
149{ 150{
150 u8 x = inb(ec->data_addr); 151 u8 x = inb(ec->data_addr);
152
151 pr_debug("EC_DATA(R) = 0x%2.2x\n", x); 153 pr_debug("EC_DATA(R) = 0x%2.2x\n", x);
152 return x; 154 return x;
153} 155}
@@ -164,10 +166,32 @@ static inline void acpi_ec_write_data(struct acpi_ec *ec, u8 data)
164 outb(data, ec->data_addr); 166 outb(data, ec->data_addr);
165} 167}
166 168
169#ifdef DEBUG
170static const char *acpi_ec_cmd_string(u8 cmd)
171{
172 switch (cmd) {
173 case 0x80:
174 return "RD_EC";
175 case 0x81:
176 return "WR_EC";
177 case 0x82:
178 return "BE_EC";
179 case 0x83:
180 return "BD_EC";
181 case 0x84:
182 return "QR_EC";
183 }
184 return "UNKNOWN";
185}
186#else
187#define acpi_ec_cmd_string(cmd) "UNDEF"
188#endif
189
167static int ec_transaction_completed(struct acpi_ec *ec) 190static int ec_transaction_completed(struct acpi_ec *ec)
168{ 191{
169 unsigned long flags; 192 unsigned long flags;
170 int ret = 0; 193 int ret = 0;
194
171 spin_lock_irqsave(&ec->lock, flags); 195 spin_lock_irqsave(&ec->lock, flags);
172 if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE)) 196 if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE))
173 ret = 1; 197 ret = 1;
@@ -181,7 +205,8 @@ static bool advance_transaction(struct acpi_ec *ec)
181 u8 status; 205 u8 status;
182 bool wakeup = false; 206 bool wakeup = false;
183 207
184 pr_debug("===== %s =====\n", in_interrupt() ? "IRQ" : "TASK"); 208 pr_debug("===== %s (%d) =====\n",
209 in_interrupt() ? "IRQ" : "TASK", smp_processor_id());
185 status = acpi_ec_read_status(ec); 210 status = acpi_ec_read_status(ec);
186 t = ec->curr; 211 t = ec->curr;
187 if (!t) 212 if (!t)
@@ -198,7 +223,8 @@ static bool advance_transaction(struct acpi_ec *ec)
198 if (t->rlen == t->ri) { 223 if (t->rlen == t->ri) {
199 t->flags |= ACPI_EC_COMMAND_COMPLETE; 224 t->flags |= ACPI_EC_COMMAND_COMPLETE;
200 if (t->command == ACPI_EC_COMMAND_QUERY) 225 if (t->command == ACPI_EC_COMMAND_QUERY)
201 pr_debug("hardware QR_EC completion\n"); 226 pr_debug("***** Command(%s) hardware completion *****\n",
227 acpi_ec_cmd_string(t->command));
202 wakeup = true; 228 wakeup = true;
203 } 229 }
204 } else 230 } else
@@ -221,7 +247,8 @@ static bool advance_transaction(struct acpi_ec *ec)
221 t->flags |= ACPI_EC_COMMAND_POLL; 247 t->flags |= ACPI_EC_COMMAND_POLL;
222 t->rdata[t->ri++] = 0x00; 248 t->rdata[t->ri++] = 0x00;
223 t->flags |= ACPI_EC_COMMAND_COMPLETE; 249 t->flags |= ACPI_EC_COMMAND_COMPLETE;
224 pr_debug("software QR_EC completion\n"); 250 pr_debug("***** Command(%s) software completion *****\n",
251 acpi_ec_cmd_string(t->command));
225 wakeup = true; 252 wakeup = true;
226 } else if ((status & ACPI_EC_FLAG_IBF) == 0) { 253 } else if ((status & ACPI_EC_FLAG_IBF) == 0) {
227 acpi_ec_write_cmd(ec, t->command); 254 acpi_ec_write_cmd(ec, t->command);
@@ -264,6 +291,7 @@ static int ec_poll(struct acpi_ec *ec)
264{ 291{
265 unsigned long flags; 292 unsigned long flags;
266 int repeat = 5; /* number of command restarts */ 293 int repeat = 5; /* number of command restarts */
294
267 while (repeat--) { 295 while (repeat--) {
268 unsigned long delay = jiffies + 296 unsigned long delay = jiffies +
269 msecs_to_jiffies(ec_delay); 297 msecs_to_jiffies(ec_delay);
@@ -296,18 +324,25 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec,
296{ 324{
297 unsigned long tmp; 325 unsigned long tmp;
298 int ret = 0; 326 int ret = 0;
327
299 if (EC_FLAGS_MSI) 328 if (EC_FLAGS_MSI)
300 udelay(ACPI_EC_MSI_UDELAY); 329 udelay(ACPI_EC_MSI_UDELAY);
301 /* start transaction */ 330 /* start transaction */
302 spin_lock_irqsave(&ec->lock, tmp); 331 spin_lock_irqsave(&ec->lock, tmp);
303 /* following two actions should be kept atomic */ 332 /* following two actions should be kept atomic */
304 ec->curr = t; 333 ec->curr = t;
334 pr_debug("***** Command(%s) started *****\n",
335 acpi_ec_cmd_string(t->command));
305 start_transaction(ec); 336 start_transaction(ec);
306 spin_unlock_irqrestore(&ec->lock, tmp); 337 spin_unlock_irqrestore(&ec->lock, tmp);
307 ret = ec_poll(ec); 338 ret = ec_poll(ec);
308 spin_lock_irqsave(&ec->lock, tmp); 339 spin_lock_irqsave(&ec->lock, tmp);
309 if (ec->curr->command == ACPI_EC_COMMAND_QUERY) 340 if (ec->curr->command == ACPI_EC_COMMAND_QUERY) {
310 clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags); 341 clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
342 pr_debug("***** Event stopped *****\n");
343 }
344 pr_debug("***** Command(%s) stopped *****\n",
345 acpi_ec_cmd_string(t->command));
311 ec->curr = NULL; 346 ec->curr = NULL;
312 spin_unlock_irqrestore(&ec->lock, tmp); 347 spin_unlock_irqrestore(&ec->lock, tmp);
313 return ret; 348 return ret;
@@ -317,6 +352,7 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t)
317{ 352{
318 int status; 353 int status;
319 u32 glk; 354 u32 glk;
355
320 if (!ec || (!t) || (t->wlen && !t->wdata) || (t->rlen && !t->rdata)) 356 if (!ec || (!t) || (t->wlen && !t->wdata) || (t->rlen && !t->rdata))
321 return -EINVAL; 357 return -EINVAL;
322 if (t->rdata) 358 if (t->rdata)
@@ -333,8 +369,6 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t)
333 goto unlock; 369 goto unlock;
334 } 370 }
335 } 371 }
336 pr_debug("transaction start (cmd=0x%02x, addr=0x%02x)\n",
337 t->command, t->wdata ? t->wdata[0] : 0);
338 /* disable GPE during transaction if storm is detected */ 372 /* disable GPE during transaction if storm is detected */
339 if (test_bit(EC_FLAGS_GPE_STORM, &ec->flags)) { 373 if (test_bit(EC_FLAGS_GPE_STORM, &ec->flags)) {
340 /* It has to be disabled, so that it doesn't trigger. */ 374 /* It has to be disabled, so that it doesn't trigger. */
@@ -355,7 +389,6 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t)
355 t->irq_count); 389 t->irq_count);
356 set_bit(EC_FLAGS_GPE_STORM, &ec->flags); 390 set_bit(EC_FLAGS_GPE_STORM, &ec->flags);
357 } 391 }
358 pr_debug("transaction end\n");
359 if (ec->global_lock) 392 if (ec->global_lock)
360 acpi_release_global_lock(glk); 393 acpi_release_global_lock(glk);
361unlock: 394unlock:
@@ -383,7 +416,7 @@ static int acpi_ec_burst_disable(struct acpi_ec *ec)
383 acpi_ec_transaction(ec, &t) : 0; 416 acpi_ec_transaction(ec, &t) : 0;
384} 417}
385 418
386static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 * data) 419static int acpi_ec_read(struct acpi_ec *ec, u8 address, u8 *data)
387{ 420{
388 int result; 421 int result;
389 u8 d; 422 u8 d;
@@ -419,10 +452,9 @@ int ec_read(u8 addr, u8 *val)
419 if (!err) { 452 if (!err) {
420 *val = temp_data; 453 *val = temp_data;
421 return 0; 454 return 0;
422 } else 455 }
423 return err; 456 return err;
424} 457}
425
426EXPORT_SYMBOL(ec_read); 458EXPORT_SYMBOL(ec_read);
427 459
428int ec_write(u8 addr, u8 val) 460int ec_write(u8 addr, u8 val)
@@ -436,22 +468,21 @@ int ec_write(u8 addr, u8 val)
436 468
437 return err; 469 return err;
438} 470}
439
440EXPORT_SYMBOL(ec_write); 471EXPORT_SYMBOL(ec_write);
441 472
442int ec_transaction(u8 command, 473int ec_transaction(u8 command,
443 const u8 * wdata, unsigned wdata_len, 474 const u8 *wdata, unsigned wdata_len,
444 u8 * rdata, unsigned rdata_len) 475 u8 *rdata, unsigned rdata_len)
445{ 476{
446 struct transaction t = {.command = command, 477 struct transaction t = {.command = command,
447 .wdata = wdata, .rdata = rdata, 478 .wdata = wdata, .rdata = rdata,
448 .wlen = wdata_len, .rlen = rdata_len}; 479 .wlen = wdata_len, .rlen = rdata_len};
480
449 if (!first_ec) 481 if (!first_ec)
450 return -ENODEV; 482 return -ENODEV;
451 483
452 return acpi_ec_transaction(first_ec, &t); 484 return acpi_ec_transaction(first_ec, &t);
453} 485}
454
455EXPORT_SYMBOL(ec_transaction); 486EXPORT_SYMBOL(ec_transaction);
456 487
457/* Get the handle to the EC device */ 488/* Get the handle to the EC device */
@@ -461,7 +492,6 @@ acpi_handle ec_get_handle(void)
461 return NULL; 492 return NULL;
462 return first_ec->handle; 493 return first_ec->handle;
463} 494}
464
465EXPORT_SYMBOL(ec_get_handle); 495EXPORT_SYMBOL(ec_get_handle);
466 496
467/* 497/*
@@ -525,13 +555,14 @@ void acpi_ec_unblock_transactions_early(void)
525 clear_bit(EC_FLAGS_BLOCKED, &first_ec->flags); 555 clear_bit(EC_FLAGS_BLOCKED, &first_ec->flags);
526} 556}
527 557
528static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 * data) 558static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 *data)
529{ 559{
530 int result; 560 int result;
531 u8 d; 561 u8 d;
532 struct transaction t = {.command = ACPI_EC_COMMAND_QUERY, 562 struct transaction t = {.command = ACPI_EC_COMMAND_QUERY,
533 .wdata = NULL, .rdata = &d, 563 .wdata = NULL, .rdata = &d,
534 .wlen = 0, .rlen = 1}; 564 .wlen = 0, .rlen = 1};
565
535 if (!ec || !data) 566 if (!ec || !data)
536 return -EINVAL; 567 return -EINVAL;
537 /* 568 /*
@@ -557,6 +588,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
557{ 588{
558 struct acpi_ec_query_handler *handler = 589 struct acpi_ec_query_handler *handler =
559 kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL); 590 kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL);
591
560 if (!handler) 592 if (!handler)
561 return -ENOMEM; 593 return -ENOMEM;
562 594
@@ -569,12 +601,12 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
569 mutex_unlock(&ec->mutex); 601 mutex_unlock(&ec->mutex);
570 return 0; 602 return 0;
571} 603}
572
573EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler); 604EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler);
574 605
575void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit) 606void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
576{ 607{
577 struct acpi_ec_query_handler *handler, *tmp; 608 struct acpi_ec_query_handler *handler, *tmp;
609
578 mutex_lock(&ec->mutex); 610 mutex_lock(&ec->mutex);
579 list_for_each_entry_safe(handler, tmp, &ec->list, node) { 611 list_for_each_entry_safe(handler, tmp, &ec->list, node) {
580 if (query_bit == handler->query_bit) { 612 if (query_bit == handler->query_bit) {
@@ -584,20 +616,20 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
584 } 616 }
585 mutex_unlock(&ec->mutex); 617 mutex_unlock(&ec->mutex);
586} 618}
587
588EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler); 619EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
589 620
590static void acpi_ec_run(void *cxt) 621static void acpi_ec_run(void *cxt)
591{ 622{
592 struct acpi_ec_query_handler *handler = cxt; 623 struct acpi_ec_query_handler *handler = cxt;
624
593 if (!handler) 625 if (!handler)
594 return; 626 return;
595 pr_debug("start query execution\n"); 627 pr_debug("##### Query(0x%02x) started #####\n", handler->query_bit);
596 if (handler->func) 628 if (handler->func)
597 handler->func(handler->data); 629 handler->func(handler->data);
598 else if (handler->handle) 630 else if (handler->handle)
599 acpi_evaluate_object(handler->handle, NULL, NULL, NULL); 631 acpi_evaluate_object(handler->handle, NULL, NULL, NULL);
600 pr_debug("stop query execution\n"); 632 pr_debug("##### Query(0x%02x) stopped #####\n", handler->query_bit);
601 kfree(handler); 633 kfree(handler);
602} 634}
603 635
@@ -620,8 +652,8 @@ static int acpi_ec_sync_query(struct acpi_ec *ec, u8 *data)
620 if (!copy) 652 if (!copy)
621 return -ENOMEM; 653 return -ENOMEM;
622 memcpy(copy, handler, sizeof(*copy)); 654 memcpy(copy, handler, sizeof(*copy));
623 pr_debug("push query execution (0x%2x) on queue\n", 655 pr_debug("##### Query(0x%02x) scheduled #####\n",
624 value); 656 handler->query_bit);
625 return acpi_os_execute((copy->func) ? 657 return acpi_os_execute((copy->func) ?
626 OSL_NOTIFY_HANDLER : OSL_GPE_HANDLER, 658 OSL_NOTIFY_HANDLER : OSL_GPE_HANDLER,
627 acpi_ec_run, copy); 659 acpi_ec_run, copy);
@@ -633,6 +665,7 @@ static int acpi_ec_sync_query(struct acpi_ec *ec, u8 *data)
633static void acpi_ec_gpe_query(void *ec_cxt) 665static void acpi_ec_gpe_query(void *ec_cxt)
634{ 666{
635 struct acpi_ec *ec = ec_cxt; 667 struct acpi_ec *ec = ec_cxt;
668
636 if (!ec) 669 if (!ec)
637 return; 670 return;
638 mutex_lock(&ec->mutex); 671 mutex_lock(&ec->mutex);
@@ -644,7 +677,7 @@ static int ec_check_sci(struct acpi_ec *ec, u8 state)
644{ 677{
645 if (state & ACPI_EC_FLAG_SCI) { 678 if (state & ACPI_EC_FLAG_SCI) {
646 if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) { 679 if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
647 pr_debug("push gpe query to the queue\n"); 680 pr_debug("***** Event started *****\n");
648 return acpi_os_execute(OSL_NOTIFY_HANDLER, 681 return acpi_os_execute(OSL_NOTIFY_HANDLER,
649 acpi_ec_gpe_query, ec); 682 acpi_ec_gpe_query, ec);
650 } 683 }
@@ -667,8 +700,8 @@ static u32 acpi_ec_gpe_handler(acpi_handle gpe_device,
667} 700}
668 701
669/* -------------------------------------------------------------------------- 702/* --------------------------------------------------------------------------
670 Address Space Management 703 * Address Space Management
671 -------------------------------------------------------------------------- */ 704 * -------------------------------------------------------------------------- */
672 705
673static acpi_status 706static acpi_status
674acpi_ec_space_handler(u32 function, acpi_physical_address address, 707acpi_ec_space_handler(u32 function, acpi_physical_address address,
@@ -699,27 +732,26 @@ acpi_ec_space_handler(u32 function, acpi_physical_address address,
699 switch (result) { 732 switch (result) {
700 case -EINVAL: 733 case -EINVAL:
701 return AE_BAD_PARAMETER; 734 return AE_BAD_PARAMETER;
702 break;
703 case -ENODEV: 735 case -ENODEV:
704 return AE_NOT_FOUND; 736 return AE_NOT_FOUND;
705 break;
706 case -ETIME: 737 case -ETIME:
707 return AE_TIME; 738 return AE_TIME;
708 break;
709 default: 739 default:
710 return AE_OK; 740 return AE_OK;
711 } 741 }
712} 742}
713 743
714/* -------------------------------------------------------------------------- 744/* --------------------------------------------------------------------------
715 Driver Interface 745 * Driver Interface
716 -------------------------------------------------------------------------- */ 746 * -------------------------------------------------------------------------- */
747
717static acpi_status 748static acpi_status
718ec_parse_io_ports(struct acpi_resource *resource, void *context); 749ec_parse_io_ports(struct acpi_resource *resource, void *context);
719 750
720static struct acpi_ec *make_acpi_ec(void) 751static struct acpi_ec *make_acpi_ec(void)
721{ 752{
722 struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL); 753 struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL);
754
723 if (!ec) 755 if (!ec)
724 return NULL; 756 return NULL;
725 ec->flags = 1 << EC_FLAGS_QUERY_PENDING; 757 ec->flags = 1 << EC_FLAGS_QUERY_PENDING;
@@ -742,9 +774,8 @@ acpi_ec_register_query_methods(acpi_handle handle, u32 level,
742 774
743 status = acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer); 775 status = acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer);
744 776
745 if (ACPI_SUCCESS(status) && sscanf(node_name, "_Q%x", &value) == 1) { 777 if (ACPI_SUCCESS(status) && sscanf(node_name, "_Q%x", &value) == 1)
746 acpi_ec_add_query_handler(ec, value, handle, NULL, NULL); 778 acpi_ec_add_query_handler(ec, value, handle, NULL, NULL);
747 }
748 return AE_OK; 779 return AE_OK;
749} 780}
750 781
@@ -753,7 +784,6 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
753{ 784{
754 acpi_status status; 785 acpi_status status;
755 unsigned long long tmp = 0; 786 unsigned long long tmp = 0;
756
757 struct acpi_ec *ec = context; 787 struct acpi_ec *ec = context;
758 788
759 /* clear addr values, ec_parse_io_ports depend on it */ 789 /* clear addr values, ec_parse_io_ports depend on it */
@@ -781,6 +811,7 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
781static int ec_install_handlers(struct acpi_ec *ec) 811static int ec_install_handlers(struct acpi_ec *ec)
782{ 812{
783 acpi_status status; 813 acpi_status status;
814
784 if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags)) 815 if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
785 return 0; 816 return 0;
786 status = acpi_install_gpe_handler(NULL, ec->gpe, 817 status = acpi_install_gpe_handler(NULL, ec->gpe,
@@ -1078,7 +1109,8 @@ int __init acpi_ec_ecdt_probe(void)
1078 boot_ec->data_addr = ecdt_ptr->data.address; 1109 boot_ec->data_addr = ecdt_ptr->data.address;
1079 boot_ec->gpe = ecdt_ptr->gpe; 1110 boot_ec->gpe = ecdt_ptr->gpe;
1080 boot_ec->handle = ACPI_ROOT_OBJECT; 1111 boot_ec->handle = ACPI_ROOT_OBJECT;
1081 acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id, &boot_ec->handle); 1112 acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id,
1113 &boot_ec->handle);
1082 /* Don't trust ECDT, which comes from ASUSTek */ 1114 /* Don't trust ECDT, which comes from ASUSTek */
1083 if (!EC_FLAGS_VALIDATE_ECDT) 1115 if (!EC_FLAGS_VALIDATE_ECDT)
1084 goto install; 1116 goto install;
@@ -1162,6 +1194,5 @@ static void __exit acpi_ec_exit(void)
1162{ 1194{
1163 1195
1164 acpi_bus_unregister_driver(&acpi_ec_driver); 1196 acpi_bus_unregister_driver(&acpi_ec_driver);
1165 return;
1166} 1197}
1167#endif /* 0 */ 1198#endif /* 0 */
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 5328b1090e08..caf9b76b7ef8 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -30,22 +30,19 @@
30#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/thermal.h> 31#include <linux/thermal.h>
32#include <linux/acpi.h> 32#include <linux/acpi.h>
33 33#include <linux/platform_device.h>
34#define ACPI_FAN_CLASS "fan" 34#include <linux/sort.h>
35#define ACPI_FAN_FILE_STATE "state"
36
37#define _COMPONENT ACPI_FAN_COMPONENT
38ACPI_MODULE_NAME("fan");
39 35
40MODULE_AUTHOR("Paul Diefenbaugh"); 36MODULE_AUTHOR("Paul Diefenbaugh");
41MODULE_DESCRIPTION("ACPI Fan Driver"); 37MODULE_DESCRIPTION("ACPI Fan Driver");
42MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
43 39
44static int acpi_fan_add(struct acpi_device *device); 40static int acpi_fan_probe(struct platform_device *pdev);
45static int acpi_fan_remove(struct acpi_device *device); 41static int acpi_fan_remove(struct platform_device *pdev);
46 42
47static const struct acpi_device_id fan_device_ids[] = { 43static const struct acpi_device_id fan_device_ids[] = {
48 {"PNP0C0B", 0}, 44 {"PNP0C0B", 0},
45 {"INT3404", 0},
49 {"", 0}, 46 {"", 0},
50}; 47};
51MODULE_DEVICE_TABLE(acpi, fan_device_ids); 48MODULE_DEVICE_TABLE(acpi, fan_device_ids);
@@ -64,37 +61,100 @@ static struct dev_pm_ops acpi_fan_pm = {
64#define FAN_PM_OPS_PTR NULL 61#define FAN_PM_OPS_PTR NULL
65#endif 62#endif
66 63
67static struct acpi_driver acpi_fan_driver = { 64struct acpi_fan_fps {
68 .name = "fan", 65 u64 control;
69 .class = ACPI_FAN_CLASS, 66 u64 trip_point;
70 .ids = fan_device_ids, 67 u64 speed;
71 .ops = { 68 u64 noise_level;
72 .add = acpi_fan_add, 69 u64 power;
73 .remove = acpi_fan_remove, 70};
74 }, 71
75 .drv.pm = FAN_PM_OPS_PTR, 72struct acpi_fan_fif {
73 u64 revision;
74 u64 fine_grain_ctrl;
75 u64 step_size;
76 u64 low_speed_notification;
77};
78
79struct acpi_fan {
80 bool acpi4;
81 struct acpi_fan_fif fif;
82 struct acpi_fan_fps *fps;
83 int fps_count;
84 struct thermal_cooling_device *cdev;
85};
86
87static struct platform_driver acpi_fan_driver = {
88 .probe = acpi_fan_probe,
89 .remove = acpi_fan_remove,
90 .driver = {
91 .name = "acpi-fan",
92 .acpi_match_table = fan_device_ids,
93 .pm = FAN_PM_OPS_PTR,
94 },
76}; 95};
77 96
78/* thermal cooling device callbacks */ 97/* thermal cooling device callbacks */
79static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long 98static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long
80 *state) 99 *state)
81{ 100{
82 /* ACPI fan device only support two states: ON/OFF */ 101 struct acpi_device *device = cdev->devdata;
83 *state = 1; 102 struct acpi_fan *fan = acpi_driver_data(device);
103
104 if (fan->acpi4)
105 *state = fan->fps_count - 1;
106 else
107 *state = 1;
84 return 0; 108 return 0;
85} 109}
86 110
87static int fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long 111static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
88 *state) 112{
113 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
114 struct acpi_fan *fan = acpi_driver_data(device);
115 union acpi_object *obj;
116 acpi_status status;
117 int control, i;
118
119 status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer);
120 if (ACPI_FAILURE(status)) {
121 dev_err(&device->dev, "Get fan state failed\n");
122 return status;
123 }
124
125 obj = buffer.pointer;
126 if (!obj || obj->type != ACPI_TYPE_PACKAGE ||
127 obj->package.count != 3 ||
128 obj->package.elements[1].type != ACPI_TYPE_INTEGER) {
129 dev_err(&device->dev, "Invalid _FST data\n");
130 status = -EINVAL;
131 goto err;
132 }
133
134 control = obj->package.elements[1].integer.value;
135 for (i = 0; i < fan->fps_count; i++) {
136 if (control == fan->fps[i].control)
137 break;
138 }
139 if (i == fan->fps_count) {
140 dev_dbg(&device->dev, "Invalid control value returned\n");
141 status = -EINVAL;
142 goto err;
143 }
144
145 *state = i;
146
147err:
148 kfree(obj);
149 return status;
150}
151
152static int fan_get_state(struct acpi_device *device, unsigned long *state)
89{ 153{
90 struct acpi_device *device = cdev->devdata;
91 int result; 154 int result;
92 int acpi_state = ACPI_STATE_D0; 155 int acpi_state = ACPI_STATE_D0;
93 156
94 if (!device) 157 result = acpi_device_update_power(device, &acpi_state);
95 return -EINVAL;
96
97 result = acpi_bus_update_power(device->handle, &acpi_state);
98 if (result) 158 if (result)
99 return result; 159 return result;
100 160
@@ -103,21 +163,57 @@ static int fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long
103 return 0; 163 return 0;
104} 164}
105 165
106static int 166static int fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long
107fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) 167 *state)
108{ 168{
109 struct acpi_device *device = cdev->devdata; 169 struct acpi_device *device = cdev->devdata;
110 int result; 170 struct acpi_fan *fan = acpi_driver_data(device);
111 171
112 if (!device || (state != 0 && state != 1)) 172 if (fan->acpi4)
173 return fan_get_state_acpi4(device, state);
174 else
175 return fan_get_state(device, state);
176}
177
178static int fan_set_state(struct acpi_device *device, unsigned long state)
179{
180 if (state != 0 && state != 1)
113 return -EINVAL; 181 return -EINVAL;
114 182
115 result = acpi_bus_set_power(device->handle, 183 return acpi_device_set_power(device,
116 state ? ACPI_STATE_D0 : ACPI_STATE_D3_COLD); 184 state ? ACPI_STATE_D0 : ACPI_STATE_D3_COLD);
185}
117 186
118 return result; 187static int fan_set_state_acpi4(struct acpi_device *device, unsigned long state)
188{
189 struct acpi_fan *fan = acpi_driver_data(device);
190 acpi_status status;
191
192 if (state >= fan->fps_count)
193 return -EINVAL;
194
195 status = acpi_execute_simple_method(device->handle, "_FSL",
196 fan->fps[state].control);
197 if (ACPI_FAILURE(status)) {
198 dev_dbg(&device->dev, "Failed to set state by _FSL\n");
199 return status;
200 }
201
202 return 0;
119} 203}
120 204
205static int
206fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
207{
208 struct acpi_device *device = cdev->devdata;
209 struct acpi_fan *fan = acpi_driver_data(device);
210
211 if (fan->acpi4)
212 return fan_set_state_acpi4(device, state);
213 else
214 return fan_set_state(device, state);
215 }
216
121static const struct thermal_cooling_device_ops fan_cooling_ops = { 217static const struct thermal_cooling_device_ops fan_cooling_ops = {
122 .get_max_state = fan_get_max_state, 218 .get_max_state = fan_get_max_state,
123 .get_cur_state = fan_get_cur_state, 219 .get_cur_state = fan_get_cur_state,
@@ -129,21 +225,125 @@ static const struct thermal_cooling_device_ops fan_cooling_ops = {
129 * -------------------------------------------------------------------------- 225 * --------------------------------------------------------------------------
130*/ 226*/
131 227
132static int acpi_fan_add(struct acpi_device *device) 228static bool acpi_fan_is_acpi4(struct acpi_device *device)
133{ 229{
134 int result = 0; 230 return acpi_has_method(device->handle, "_FIF") &&
135 struct thermal_cooling_device *cdev; 231 acpi_has_method(device->handle, "_FPS") &&
232 acpi_has_method(device->handle, "_FSL") &&
233 acpi_has_method(device->handle, "_FST");
234}
136 235
137 if (!device) 236static int acpi_fan_get_fif(struct acpi_device *device)
138 return -EINVAL; 237{
238 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
239 struct acpi_fan *fan = acpi_driver_data(device);
240 struct acpi_buffer format = { sizeof("NNNN"), "NNNN" };
241 struct acpi_buffer fif = { sizeof(fan->fif), &fan->fif };
242 union acpi_object *obj;
243 acpi_status status;
244
245 status = acpi_evaluate_object(device->handle, "_FIF", NULL, &buffer);
246 if (ACPI_FAILURE(status))
247 return status;
248
249 obj = buffer.pointer;
250 if (!obj || obj->type != ACPI_TYPE_PACKAGE) {
251 dev_err(&device->dev, "Invalid _FIF data\n");
252 status = -EINVAL;
253 goto err;
254 }
139 255
140 strcpy(acpi_device_name(device), "Fan"); 256 status = acpi_extract_package(obj, &format, &fif);
141 strcpy(acpi_device_class(device), ACPI_FAN_CLASS); 257 if (ACPI_FAILURE(status)) {
258 dev_err(&device->dev, "Invalid _FIF element\n");
259 status = -EINVAL;
260 }
142 261
143 result = acpi_bus_update_power(device->handle, NULL); 262err:
144 if (result) { 263 kfree(obj);
145 dev_err(&device->dev, "Setting initial power state\n"); 264 return status;
146 goto end; 265}
266
267static int acpi_fan_speed_cmp(const void *a, const void *b)
268{
269 const struct acpi_fan_fps *fps1 = a;
270 const struct acpi_fan_fps *fps2 = b;
271 return fps1->speed - fps2->speed;
272}
273
274static int acpi_fan_get_fps(struct acpi_device *device)
275{
276 struct acpi_fan *fan = acpi_driver_data(device);
277 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
278 union acpi_object *obj;
279 acpi_status status;
280 int i;
281
282 status = acpi_evaluate_object(device->handle, "_FPS", NULL, &buffer);
283 if (ACPI_FAILURE(status))
284 return status;
285
286 obj = buffer.pointer;
287 if (!obj || obj->type != ACPI_TYPE_PACKAGE || obj->package.count < 2) {
288 dev_err(&device->dev, "Invalid _FPS data\n");
289 status = -EINVAL;
290 goto err;
291 }
292
293 fan->fps_count = obj->package.count - 1; /* minus revision field */
294 fan->fps = devm_kzalloc(&device->dev,
295 fan->fps_count * sizeof(struct acpi_fan_fps),
296 GFP_KERNEL);
297 if (!fan->fps) {
298 dev_err(&device->dev, "Not enough memory\n");
299 status = -ENOMEM;
300 goto err;
301 }
302 for (i = 0; i < fan->fps_count; i++) {
303 struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
304 struct acpi_buffer fps = { sizeof(fan->fps[i]), &fan->fps[i] };
305 status = acpi_extract_package(&obj->package.elements[i + 1],
306 &format, &fps);
307 if (ACPI_FAILURE(status)) {
308 dev_err(&device->dev, "Invalid _FPS element\n");
309 break;
310 }
311 }
312
313 /* sort the state array according to fan speed in increase order */
314 sort(fan->fps, fan->fps_count, sizeof(*fan->fps),
315 acpi_fan_speed_cmp, NULL);
316
317err:
318 kfree(obj);
319 return status;
320}
321
322static int acpi_fan_probe(struct platform_device *pdev)
323{
324 int result = 0;
325 struct thermal_cooling_device *cdev;
326 struct acpi_fan *fan;
327 struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
328
329 fan = devm_kzalloc(&pdev->dev, sizeof(*fan), GFP_KERNEL);
330 if (!fan) {
331 dev_err(&device->dev, "No memory for fan\n");
332 return -ENOMEM;
333 }
334 device->driver_data = fan;
335 platform_set_drvdata(pdev, fan);
336
337 if (acpi_fan_is_acpi4(device)) {
338 if (acpi_fan_get_fif(device) || acpi_fan_get_fps(device))
339 goto end;
340 fan->acpi4 = true;
341 } else {
342 result = acpi_device_update_power(device, NULL);
343 if (result) {
344 dev_err(&device->dev, "Setting initial power state\n");
345 goto end;
346 }
147 } 347 }
148 348
149 cdev = thermal_cooling_device_register("Fan", device, 349 cdev = thermal_cooling_device_register("Fan", device,
@@ -153,44 +353,32 @@ static int acpi_fan_add(struct acpi_device *device)
153 goto end; 353 goto end;
154 } 354 }
155 355
156 dev_dbg(&device->dev, "registered as cooling_device%d\n", cdev->id); 356 dev_dbg(&pdev->dev, "registered as cooling_device%d\n", cdev->id);
157 357
158 device->driver_data = cdev; 358 fan->cdev = cdev;
159 result = sysfs_create_link(&device->dev.kobj, 359 result = sysfs_create_link(&pdev->dev.kobj,
160 &cdev->device.kobj, 360 &cdev->device.kobj,
161 "thermal_cooling"); 361 "thermal_cooling");
162 if (result) 362 if (result)
163 dev_err(&device->dev, "Failed to create sysfs link " 363 dev_err(&pdev->dev, "Failed to create sysfs link 'thermal_cooling'\n");
164 "'thermal_cooling'\n");
165 364
166 result = sysfs_create_link(&cdev->device.kobj, 365 result = sysfs_create_link(&cdev->device.kobj,
167 &device->dev.kobj, 366 &pdev->dev.kobj,
168 "device"); 367 "device");
169 if (result) 368 if (result)
170 dev_err(&device->dev, "Failed to create sysfs link 'device'\n"); 369 dev_err(&pdev->dev, "Failed to create sysfs link 'device'\n");
171
172 dev_info(&device->dev, "ACPI: %s [%s] (%s)\n",
173 acpi_device_name(device), acpi_device_bid(device),
174 !device->power.state ? "on" : "off");
175 370
176end: 371end:
177 return result; 372 return result;
178} 373}
179 374
180static int acpi_fan_remove(struct acpi_device *device) 375static int acpi_fan_remove(struct platform_device *pdev)
181{ 376{
182 struct thermal_cooling_device *cdev; 377 struct acpi_fan *fan = platform_get_drvdata(pdev);
183
184 if (!device)
185 return -EINVAL;
186
187 cdev = acpi_driver_data(device);
188 if (!cdev)
189 return -EINVAL;
190 378
191 sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); 379 sysfs_remove_link(&pdev->dev.kobj, "thermal_cooling");
192 sysfs_remove_link(&cdev->device.kobj, "device"); 380 sysfs_remove_link(&fan->cdev->device.kobj, "device");
193 thermal_cooling_device_unregister(cdev); 381 thermal_cooling_device_unregister(fan->cdev);
194 382
195 return 0; 383 return 0;
196} 384}
@@ -198,10 +386,11 @@ static int acpi_fan_remove(struct acpi_device *device)
198#ifdef CONFIG_PM_SLEEP 386#ifdef CONFIG_PM_SLEEP
199static int acpi_fan_suspend(struct device *dev) 387static int acpi_fan_suspend(struct device *dev)
200{ 388{
201 if (!dev) 389 struct acpi_fan *fan = dev_get_drvdata(dev);
202 return -EINVAL; 390 if (fan->acpi4)
391 return 0;
203 392
204 acpi_bus_set_power(to_acpi_device(dev)->handle, ACPI_STATE_D0); 393 acpi_device_set_power(ACPI_COMPANION(dev), ACPI_STATE_D0);
205 394
206 return AE_OK; 395 return AE_OK;
207} 396}
@@ -209,11 +398,12 @@ static int acpi_fan_suspend(struct device *dev)
209static int acpi_fan_resume(struct device *dev) 398static int acpi_fan_resume(struct device *dev)
210{ 399{
211 int result; 400 int result;
401 struct acpi_fan *fan = dev_get_drvdata(dev);
212 402
213 if (!dev) 403 if (fan->acpi4)
214 return -EINVAL; 404 return 0;
215 405
216 result = acpi_bus_update_power(to_acpi_device(dev)->handle, NULL); 406 result = acpi_device_update_power(ACPI_COMPANION(dev), NULL);
217 if (result) 407 if (result)
218 dev_err(dev, "Error updating fan power state\n"); 408 dev_err(dev, "Error updating fan power state\n");
219 409
@@ -221,4 +411,4 @@ static int acpi_fan_resume(struct device *dev)
221} 411}
222#endif 412#endif
223 413
224module_acpi_driver(acpi_fan_driver); 414module_platform_driver(acpi_fan_driver);
diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c
new file mode 100644
index 000000000000..a27d31d1ba24
--- /dev/null
+++ b/drivers/acpi/int340x_thermal.c
@@ -0,0 +1,51 @@
1/*
2 * ACPI support for int340x thermal drivers
3 *
4 * Copyright (C) 2014, Intel Corporation
5 * Authors: Zhang Rui <rui.zhang@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/acpi.h>
13#include <linux/module.h>
14
15#include "internal.h"
16
17#define DO_ENUMERATION 0x01
18static const struct acpi_device_id int340x_thermal_device_ids[] = {
19 {"INT3400", DO_ENUMERATION },
20 {"INT3401"},
21 {"INT3402"},
22 {"INT3403"},
23 {"INT3404"},
24 {"INT3406"},
25 {"INT3407"},
26 {"INT3408"},
27 {"INT3409"},
28 {"INT340A"},
29 {"INT340B"},
30 {""},
31};
32
33static int int340x_thermal_handler_attach(struct acpi_device *adev,
34 const struct acpi_device_id *id)
35{
36#if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE)
37 if (id->driver_data == DO_ENUMERATION)
38 acpi_create_platform_device(adev);
39#endif
40 return 1;
41}
42
43static struct acpi_scan_handler int340x_thermal_handler = {
44 .ids = int340x_thermal_device_ids,
45 .attach = int340x_thermal_handler_attach,
46};
47
48void __init acpi_int340x_thermal_init(void)
49{
50 acpi_scan_add_handler(&int340x_thermal_handler);
51}
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 4c5cf77e7576..447f6d679b29 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -31,6 +31,7 @@ void acpi_pci_link_init(void);
31void acpi_processor_init(void); 31void acpi_processor_init(void);
32void acpi_platform_init(void); 32void acpi_platform_init(void);
33void acpi_pnp_init(void); 33void acpi_pnp_init(void);
34void acpi_int340x_thermal_init(void);
34int acpi_sysfs_init(void); 35int acpi_sysfs_init(void);
35void acpi_container_init(void); 36void acpi_container_init(void);
36void acpi_memory_hotplug_init(void); 37void acpi_memory_hotplug_init(void);
@@ -103,8 +104,6 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
103int acpi_power_on_resources(struct acpi_device *device, int state); 104int acpi_power_on_resources(struct acpi_device *device, int state);
104int acpi_power_transition(struct acpi_device *device, int state); 105int acpi_power_transition(struct acpi_device *device, int state);
105 106
106int acpi_device_update_power(struct acpi_device *device, int *state_p);
107
108int acpi_wakeup_device_init(void); 107int acpi_wakeup_device_init(void);
109 108
110#ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC 109#ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
@@ -168,13 +167,6 @@ static inline void suspend_nvs_restore(void) {}
168#endif 167#endif
169 168
170/*-------------------------------------------------------------------------- 169/*--------------------------------------------------------------------------
171 Platform bus support
172 -------------------------------------------------------------------------- */
173struct platform_device;
174
175struct platform_device *acpi_create_platform_device(struct acpi_device *adev);
176
177/*--------------------------------------------------------------------------
178 Video 170 Video
179 -------------------------------------------------------------------------- */ 171 -------------------------------------------------------------------------- */
180#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) 172#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index ae44d8654c82..d670158a26c5 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1470,7 +1470,7 @@ static void acpi_wakeup_gpe_init(struct acpi_device *device)
1470 if (ACPI_FAILURE(status)) 1470 if (ACPI_FAILURE(status))
1471 return; 1471 return;
1472 1472
1473 wakeup->flags.run_wake = !!(event_status & ACPI_EVENT_FLAG_HANDLE); 1473 wakeup->flags.run_wake = !!(event_status & ACPI_EVENT_FLAG_HAS_HANDLER);
1474} 1474}
1475 1475
1476static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device) 1476static void acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
@@ -2315,6 +2315,7 @@ int __init acpi_scan_init(void)
2315 acpi_container_init(); 2315 acpi_container_init();
2316 acpi_memory_hotplug_init(); 2316 acpi_memory_hotplug_init();
2317 acpi_pnp_init(); 2317 acpi_pnp_init();
2318 acpi_int340x_thermal_init();
2318 2319
2319 mutex_lock(&acpi_scan_lock); 2320 mutex_lock(&acpi_scan_lock);
2320 /* 2321 /*
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 38cb9782d4b8..13e577c80201 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -537,7 +537,7 @@ static ssize_t counter_show(struct kobject *kobj,
537 if (result) 537 if (result)
538 goto end; 538 goto end;
539 539
540 if (!(status & ACPI_EVENT_FLAG_HANDLE)) 540 if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER))
541 size += sprintf(buf + size, " invalid"); 541 size += sprintf(buf + size, " invalid");
542 else if (status & ACPI_EVENT_FLAG_ENABLED) 542 else if (status & ACPI_EVENT_FLAG_ENABLED)
543 size += sprintf(buf + size, " enabled"); 543 size += sprintf(buf + size, " enabled");
@@ -581,7 +581,7 @@ static ssize_t counter_set(struct kobject *kobj,
581 if (result) 581 if (result)
582 goto end; 582 goto end;
583 583
584 if (!(status & ACPI_EVENT_FLAG_HANDLE)) { 584 if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER)) {
585 printk(KERN_WARNING PREFIX 585 printk(KERN_WARNING PREFIX
586 "Can not change Invalid GPE/Fixed Event status\n"); 586 "Can not change Invalid GPE/Fixed Event status\n");
587 return -EINVAL; 587 return -EINVAL;
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 112817e963e0..d24fa1964eb8 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -528,7 +528,6 @@ static void acpi_thermal_check(void *data)
528} 528}
529 529
530/* sys I/F for generic thermal sysfs support */ 530/* sys I/F for generic thermal sysfs support */
531#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
532 531
533static int thermal_get_temp(struct thermal_zone_device *thermal, 532static int thermal_get_temp(struct thermal_zone_device *thermal,
534 unsigned long *temp) 533 unsigned long *temp)
@@ -543,7 +542,8 @@ static int thermal_get_temp(struct thermal_zone_device *thermal,
543 if (result) 542 if (result)
544 return result; 543 return result;
545 544
546 *temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset); 545 *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(tz->temperature,
546 tz->kelvin_offset);
547 return 0; 547 return 0;
548} 548}
549 549
@@ -647,7 +647,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
647 647
648 if (tz->trips.critical.flags.valid) { 648 if (tz->trips.critical.flags.valid) {
649 if (!trip) { 649 if (!trip) {
650 *temp = KELVIN_TO_MILLICELSIUS( 650 *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
651 tz->trips.critical.temperature, 651 tz->trips.critical.temperature,
652 tz->kelvin_offset); 652 tz->kelvin_offset);
653 return 0; 653 return 0;
@@ -657,7 +657,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
657 657
658 if (tz->trips.hot.flags.valid) { 658 if (tz->trips.hot.flags.valid) {
659 if (!trip) { 659 if (!trip) {
660 *temp = KELVIN_TO_MILLICELSIUS( 660 *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
661 tz->trips.hot.temperature, 661 tz->trips.hot.temperature,
662 tz->kelvin_offset); 662 tz->kelvin_offset);
663 return 0; 663 return 0;
@@ -667,7 +667,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
667 667
668 if (tz->trips.passive.flags.valid) { 668 if (tz->trips.passive.flags.valid) {
669 if (!trip) { 669 if (!trip) {
670 *temp = KELVIN_TO_MILLICELSIUS( 670 *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
671 tz->trips.passive.temperature, 671 tz->trips.passive.temperature,
672 tz->kelvin_offset); 672 tz->kelvin_offset);
673 return 0; 673 return 0;
@@ -678,7 +678,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
678 for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && 678 for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
679 tz->trips.active[i].flags.valid; i++) { 679 tz->trips.active[i].flags.valid; i++) {
680 if (!trip) { 680 if (!trip) {
681 *temp = KELVIN_TO_MILLICELSIUS( 681 *temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
682 tz->trips.active[i].temperature, 682 tz->trips.active[i].temperature,
683 tz->kelvin_offset); 683 tz->kelvin_offset);
684 return 0; 684 return 0;
@@ -694,7 +694,7 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
694 struct acpi_thermal *tz = thermal->devdata; 694 struct acpi_thermal *tz = thermal->devdata;
695 695
696 if (tz->trips.critical.flags.valid) { 696 if (tz->trips.critical.flags.valid) {
697 *temperature = KELVIN_TO_MILLICELSIUS( 697 *temperature = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
698 tz->trips.critical.temperature, 698 tz->trips.critical.temperature,
699 tz->kelvin_offset); 699 tz->kelvin_offset);
700 return 0; 700 return 0;
@@ -714,8 +714,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal,
714 714
715 if (type == THERMAL_TRIP_ACTIVE) { 715 if (type == THERMAL_TRIP_ACTIVE) {
716 unsigned long trip_temp; 716 unsigned long trip_temp;
717 unsigned long temp = KELVIN_TO_MILLICELSIUS(tz->temperature, 717 unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
718 tz->kelvin_offset); 718 tz->temperature, tz->kelvin_offset);
719 if (thermal_get_trip_temp(thermal, trip, &trip_temp)) 719 if (thermal_get_trip_temp(thermal, trip, &trip_temp))
720 return -EINVAL; 720 return -EINVAL;
721 721
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 834f35c4bf8d..371ac12d25b1 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -149,6 +149,21 @@ acpi_extract_package(union acpi_object *package,
149 break; 149 break;
150 } 150 }
151 break; 151 break;
152 case ACPI_TYPE_LOCAL_REFERENCE:
153 switch (format_string[i]) {
154 case 'R':
155 size_required += sizeof(void *);
156 tail_offset += sizeof(void *);
157 break;
158 default:
159 printk(KERN_WARNING PREFIX "Invalid package element"
160 " [%d] got reference,"
161 " expecting [%c]\n",
162 i, format_string[i]);
163 return AE_BAD_DATA;
164 break;
165 }
166 break;
152 167
153 case ACPI_TYPE_PACKAGE: 168 case ACPI_TYPE_PACKAGE:
154 default: 169 default:
@@ -247,7 +262,18 @@ acpi_extract_package(union acpi_object *package,
247 break; 262 break;
248 } 263 }
249 break; 264 break;
250 265 case ACPI_TYPE_LOCAL_REFERENCE:
266 switch (format_string[i]) {
267 case 'R':
268 *(void **)head =
269 (void *)element->reference.handle;
270 head += sizeof(void *);
271 break;
272 default:
273 /* Should never get here */
274 break;
275 }
276 break;
251 case ACPI_TYPE_PACKAGE: 277 case ACPI_TYPE_PACKAGE:
252 /* TBD: handle nested packages... */ 278 /* TBD: handle nested packages... */
253 default: 279 default:
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 82759cef9043..04645c09fe5e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1106,7 +1106,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
1106 __mix_pool_bytes(r, hash.w, sizeof(hash.w)); 1106 __mix_pool_bytes(r, hash.w, sizeof(hash.w));
1107 spin_unlock_irqrestore(&r->lock, flags); 1107 spin_unlock_irqrestore(&r->lock, flags);
1108 1108
1109 memset(workspace, 0, sizeof(workspace)); 1109 memzero_explicit(workspace, sizeof(workspace));
1110 1110
1111 /* 1111 /*
1112 * In case the hash function has some recognizable output 1112 * In case the hash function has some recognizable output
@@ -1118,7 +1118,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
1118 hash.w[2] ^= rol32(hash.w[2], 16); 1118 hash.w[2] ^= rol32(hash.w[2], 16);
1119 1119
1120 memcpy(out, &hash, EXTRACT_SIZE); 1120 memcpy(out, &hash, EXTRACT_SIZE);
1121 memset(&hash, 0, sizeof(hash)); 1121 memzero_explicit(&hash, sizeof(hash));
1122} 1122}
1123 1123
1124/* 1124/*
@@ -1175,7 +1175,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
1175 } 1175 }
1176 1176
1177 /* Wipe data just returned from memory */ 1177 /* Wipe data just returned from memory */
1178 memset(tmp, 0, sizeof(tmp)); 1178 memzero_explicit(tmp, sizeof(tmp));
1179 1179
1180 return ret; 1180 return ret;
1181} 1181}
@@ -1218,7 +1218,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
1218 } 1218 }
1219 1219
1220 /* Wipe data just returned from memory */ 1220 /* Wipe data just returned from memory */
1221 memset(tmp, 0, sizeof(tmp)); 1221 memzero_explicit(tmp, sizeof(tmp));
1222 1222
1223 return ret; 1223 return ret;
1224} 1224}
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 6bbb8b913446..92c162af5045 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -18,6 +18,7 @@
18#include <linux/cpu.h> 18#include <linux/cpu.h>
19#include <linux/cpu_cooling.h> 19#include <linux/cpu_cooling.h>
20#include <linux/cpufreq.h> 20#include <linux/cpufreq.h>
21#include <linux/cpufreq-dt.h>
21#include <linux/cpumask.h> 22#include <linux/cpumask.h>
22#include <linux/err.h> 23#include <linux/err.h>
23#include <linux/module.h> 24#include <linux/module.h>
@@ -146,8 +147,8 @@ try_again:
146 goto try_again; 147 goto try_again;
147 } 148 }
148 149
149 dev_warn(cpu_dev, "failed to get cpu%d regulator: %ld\n", 150 dev_dbg(cpu_dev, "no regulator for cpu%d: %ld\n",
150 cpu, PTR_ERR(cpu_reg)); 151 cpu, PTR_ERR(cpu_reg));
151 } 152 }
152 153
153 cpu_clk = clk_get(cpu_dev, NULL); 154 cpu_clk = clk_get(cpu_dev, NULL);
@@ -178,6 +179,7 @@ try_again:
178 179
179static int cpufreq_init(struct cpufreq_policy *policy) 180static int cpufreq_init(struct cpufreq_policy *policy)
180{ 181{
182 struct cpufreq_dt_platform_data *pd;
181 struct cpufreq_frequency_table *freq_table; 183 struct cpufreq_frequency_table *freq_table;
182 struct thermal_cooling_device *cdev; 184 struct thermal_cooling_device *cdev;
183 struct device_node *np; 185 struct device_node *np;
@@ -265,9 +267,18 @@ static int cpufreq_init(struct cpufreq_policy *policy)
265 policy->driver_data = priv; 267 policy->driver_data = priv;
266 268
267 policy->clk = cpu_clk; 269 policy->clk = cpu_clk;
268 ret = cpufreq_generic_init(policy, freq_table, transition_latency); 270 ret = cpufreq_table_validate_and_show(policy, freq_table);
269 if (ret) 271 if (ret) {
272 dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
273 ret);
270 goto out_cooling_unregister; 274 goto out_cooling_unregister;
275 }
276
277 policy->cpuinfo.transition_latency = transition_latency;
278
279 pd = cpufreq_get_driver_data();
280 if (pd && !pd->independent_clocks)
281 cpumask_setall(policy->cpus);
271 282
272 of_node_put(np); 283 of_node_put(np);
273 284
@@ -335,6 +346,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
335 if (!IS_ERR(cpu_reg)) 346 if (!IS_ERR(cpu_reg))
336 regulator_put(cpu_reg); 347 regulator_put(cpu_reg);
337 348
349 dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
350
338 ret = cpufreq_register_driver(&dt_cpufreq_driver); 351 ret = cpufreq_register_driver(&dt_cpufreq_driver);
339 if (ret) 352 if (ret)
340 dev_err(cpu_dev, "failed register driver: %d\n", ret); 353 dev_err(cpu_dev, "failed register driver: %d\n", ret);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 24bf76fba141..644b54e1e7d1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -512,7 +512,18 @@ show_one(cpuinfo_max_freq, cpuinfo.max_freq);
512show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); 512show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
513show_one(scaling_min_freq, min); 513show_one(scaling_min_freq, min);
514show_one(scaling_max_freq, max); 514show_one(scaling_max_freq, max);
515show_one(scaling_cur_freq, cur); 515
516static ssize_t show_scaling_cur_freq(
517 struct cpufreq_policy *policy, char *buf)
518{
519 ssize_t ret;
520
521 if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
522 ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu));
523 else
524 ret = sprintf(buf, "%u\n", policy->cur);
525 return ret;
526}
516 527
517static int cpufreq_set_policy(struct cpufreq_policy *policy, 528static int cpufreq_set_policy(struct cpufreq_policy *policy,
518 struct cpufreq_policy *new_policy); 529 struct cpufreq_policy *new_policy);
@@ -906,11 +917,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
906 if (ret) 917 if (ret)
907 goto err_out_kobj_put; 918 goto err_out_kobj_put;
908 } 919 }
909 if (has_target()) { 920
910 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); 921 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
911 if (ret) 922 if (ret)
912 goto err_out_kobj_put; 923 goto err_out_kobj_put;
913 } 924
914 if (cpufreq_driver->bios_limit) { 925 if (cpufreq_driver->bios_limit) {
915 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); 926 ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
916 if (ret) 927 if (ret)
@@ -1731,6 +1742,21 @@ const char *cpufreq_get_current_driver(void)
1731} 1742}
1732EXPORT_SYMBOL_GPL(cpufreq_get_current_driver); 1743EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
1733 1744
1745/**
1746 * cpufreq_get_driver_data - return current driver data
1747 *
1748 * Return the private data of the currently loaded cpufreq
1749 * driver, or NULL if no cpufreq driver is loaded.
1750 */
1751void *cpufreq_get_driver_data(void)
1752{
1753 if (cpufreq_driver)
1754 return cpufreq_driver->driver_data;
1755
1756 return NULL;
1757}
1758EXPORT_SYMBOL_GPL(cpufreq_get_driver_data);
1759
1734/********************************************************************* 1760/*********************************************************************
1735 * NOTIFIER LISTS INTERFACE * 1761 * NOTIFIER LISTS INTERFACE *
1736 *********************************************************************/ 1762 *********************************************************************/
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index ec399ad2f059..1608f7105c9f 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
19#include <linux/cpu.h> 19#include <linux/cpu.h>
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/mailbox.h> 22#include <linux/pl320-ipc.h>
23#include <linux/platform_device.h> 23#include <linux/platform_device.h>
24 24
25#define HB_CPUFREQ_CHANGE_NOTE 0x80000001 25#define HB_CPUFREQ_CHANGE_NOTE 0x80000001
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 0668b389c516..27bb6d3877ed 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -52,6 +52,17 @@ static inline int32_t div_fp(int32_t x, int32_t y)
52 return div_s64((int64_t)x << FRAC_BITS, y); 52 return div_s64((int64_t)x << FRAC_BITS, y);
53} 53}
54 54
55static inline int ceiling_fp(int32_t x)
56{
57 int mask, ret;
58
59 ret = fp_toint(x);
60 mask = (1 << FRAC_BITS) - 1;
61 if (x & mask)
62 ret += 1;
63 return ret;
64}
65
55struct sample { 66struct sample {
56 int32_t core_pct_busy; 67 int32_t core_pct_busy;
57 u64 aperf; 68 u64 aperf;
@@ -64,6 +75,7 @@ struct pstate_data {
64 int current_pstate; 75 int current_pstate;
65 int min_pstate; 76 int min_pstate;
66 int max_pstate; 77 int max_pstate;
78 int scaling;
67 int turbo_pstate; 79 int turbo_pstate;
68}; 80};
69 81
@@ -113,6 +125,7 @@ struct pstate_funcs {
113 int (*get_max)(void); 125 int (*get_max)(void);
114 int (*get_min)(void); 126 int (*get_min)(void);
115 int (*get_turbo)(void); 127 int (*get_turbo)(void);
128 int (*get_scaling)(void);
116 void (*set)(struct cpudata*, int pstate); 129 void (*set)(struct cpudata*, int pstate);
117 void (*get_vid)(struct cpudata *); 130 void (*get_vid)(struct cpudata *);
118}; 131};
@@ -138,6 +151,7 @@ struct perf_limits {
138 151
139static struct perf_limits limits = { 152static struct perf_limits limits = {
140 .no_turbo = 0, 153 .no_turbo = 0,
154 .turbo_disabled = 0,
141 .max_perf_pct = 100, 155 .max_perf_pct = 100,
142 .max_perf = int_tofp(1), 156 .max_perf = int_tofp(1),
143 .min_perf_pct = 0, 157 .min_perf_pct = 0,
@@ -218,6 +232,18 @@ static inline void intel_pstate_reset_all_pid(void)
218 } 232 }
219} 233}
220 234
235static inline void update_turbo_state(void)
236{
237 u64 misc_en;
238 struct cpudata *cpu;
239
240 cpu = all_cpu_data[0];
241 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
242 limits.turbo_disabled =
243 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
244 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
245}
246
221/************************** debugfs begin ************************/ 247/************************** debugfs begin ************************/
222static int pid_param_set(void *data, u64 val) 248static int pid_param_set(void *data, u64 val)
223{ 249{
@@ -274,6 +300,20 @@ static void __init intel_pstate_debug_expose_params(void)
274 return sprintf(buf, "%u\n", limits.object); \ 300 return sprintf(buf, "%u\n", limits.object); \
275 } 301 }
276 302
303static ssize_t show_no_turbo(struct kobject *kobj,
304 struct attribute *attr, char *buf)
305{
306 ssize_t ret;
307
308 update_turbo_state();
309 if (limits.turbo_disabled)
310 ret = sprintf(buf, "%u\n", limits.turbo_disabled);
311 else
312 ret = sprintf(buf, "%u\n", limits.no_turbo);
313
314 return ret;
315}
316
277static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, 317static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
278 const char *buf, size_t count) 318 const char *buf, size_t count)
279{ 319{
@@ -283,11 +323,14 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
283 ret = sscanf(buf, "%u", &input); 323 ret = sscanf(buf, "%u", &input);
284 if (ret != 1) 324 if (ret != 1)
285 return -EINVAL; 325 return -EINVAL;
286 limits.no_turbo = clamp_t(int, input, 0 , 1); 326
327 update_turbo_state();
287 if (limits.turbo_disabled) { 328 if (limits.turbo_disabled) {
288 pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); 329 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
289 limits.no_turbo = limits.turbo_disabled; 330 return -EPERM;
290 } 331 }
332 limits.no_turbo = clamp_t(int, input, 0, 1);
333
291 return count; 334 return count;
292} 335}
293 336
@@ -323,7 +366,6 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
323 return count; 366 return count;
324} 367}
325 368
326show_one(no_turbo, no_turbo);
327show_one(max_perf_pct, max_perf_pct); 369show_one(max_perf_pct, max_perf_pct);
328show_one(min_perf_pct, min_perf_pct); 370show_one(min_perf_pct, min_perf_pct);
329 371
@@ -394,7 +436,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate)
394 cpudata->vid.ratio); 436 cpudata->vid.ratio);
395 437
396 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); 438 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
397 vid = fp_toint(vid_fp); 439 vid = ceiling_fp(vid_fp);
398 440
399 if (pstate > cpudata->pstate.max_pstate) 441 if (pstate > cpudata->pstate.max_pstate)
400 vid = cpudata->vid.turbo; 442 vid = cpudata->vid.turbo;
@@ -404,6 +446,22 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate)
404 wrmsrl(MSR_IA32_PERF_CTL, val); 446 wrmsrl(MSR_IA32_PERF_CTL, val);
405} 447}
406 448
449#define BYT_BCLK_FREQS 5
450static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
451
452static int byt_get_scaling(void)
453{
454 u64 value;
455 int i;
456
457 rdmsrl(MSR_FSB_FREQ, value);
458 i = value & 0x3;
459
460 BUG_ON(i > BYT_BCLK_FREQS);
461
462 return byt_freq_table[i] * 100;
463}
464
407static void byt_get_vid(struct cpudata *cpudata) 465static void byt_get_vid(struct cpudata *cpudata)
408{ 466{
409 u64 value; 467 u64 value;
@@ -449,6 +507,11 @@ static int core_get_turbo_pstate(void)
449 return ret; 507 return ret;
450} 508}
451 509
510static inline int core_get_scaling(void)
511{
512 return 100000;
513}
514
452static void core_set_pstate(struct cpudata *cpudata, int pstate) 515static void core_set_pstate(struct cpudata *cpudata, int pstate)
453{ 516{
454 u64 val; 517 u64 val;
@@ -473,6 +536,7 @@ static struct cpu_defaults core_params = {
473 .get_max = core_get_max_pstate, 536 .get_max = core_get_max_pstate,
474 .get_min = core_get_min_pstate, 537 .get_min = core_get_min_pstate,
475 .get_turbo = core_get_turbo_pstate, 538 .get_turbo = core_get_turbo_pstate,
539 .get_scaling = core_get_scaling,
476 .set = core_set_pstate, 540 .set = core_set_pstate,
477 }, 541 },
478}; 542};
@@ -491,6 +555,7 @@ static struct cpu_defaults byt_params = {
491 .get_min = byt_get_min_pstate, 555 .get_min = byt_get_min_pstate,
492 .get_turbo = byt_get_turbo_pstate, 556 .get_turbo = byt_get_turbo_pstate,
493 .set = byt_set_pstate, 557 .set = byt_set_pstate,
558 .get_scaling = byt_get_scaling,
494 .get_vid = byt_get_vid, 559 .get_vid = byt_get_vid,
495 }, 560 },
496}; 561};
@@ -501,7 +566,7 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
501 int max_perf_adj; 566 int max_perf_adj;
502 int min_perf; 567 int min_perf;
503 568
504 if (limits.no_turbo) 569 if (limits.no_turbo || limits.turbo_disabled)
505 max_perf = cpu->pstate.max_pstate; 570 max_perf = cpu->pstate.max_pstate;
506 571
507 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); 572 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
@@ -516,6 +581,8 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
516{ 581{
517 int max_perf, min_perf; 582 int max_perf, min_perf;
518 583
584 update_turbo_state();
585
519 intel_pstate_get_min_max(cpu, &min_perf, &max_perf); 586 intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
520 587
521 pstate = clamp_t(int, pstate, min_perf, max_perf); 588 pstate = clamp_t(int, pstate, min_perf, max_perf);
@@ -523,7 +590,7 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
523 if (pstate == cpu->pstate.current_pstate) 590 if (pstate == cpu->pstate.current_pstate)
524 return; 591 return;
525 592
526 trace_cpu_frequency(pstate * 100000, cpu->cpu); 593 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
527 594
528 cpu->pstate.current_pstate = pstate; 595 cpu->pstate.current_pstate = pstate;
529 596
@@ -535,6 +602,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
535 cpu->pstate.min_pstate = pstate_funcs.get_min(); 602 cpu->pstate.min_pstate = pstate_funcs.get_min();
536 cpu->pstate.max_pstate = pstate_funcs.get_max(); 603 cpu->pstate.max_pstate = pstate_funcs.get_max();
537 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); 604 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
605 cpu->pstate.scaling = pstate_funcs.get_scaling();
538 606
539 if (pstate_funcs.get_vid) 607 if (pstate_funcs.get_vid)
540 pstate_funcs.get_vid(cpu); 608 pstate_funcs.get_vid(cpu);
@@ -550,7 +618,9 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
550 core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); 618 core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
551 619
552 sample->freq = fp_toint( 620 sample->freq = fp_toint(
553 mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); 621 mul_fp(int_tofp(
622 cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
623 core_pct));
554 624
555 sample->core_pct_busy = (int32_t)core_pct; 625 sample->core_pct_busy = (int32_t)core_pct;
556} 626}
@@ -671,7 +741,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
671{ 741{
672 struct cpudata *cpu; 742 struct cpudata *cpu;
673 743
674 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); 744 if (!all_cpu_data[cpunum])
745 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
746 GFP_KERNEL);
675 if (!all_cpu_data[cpunum]) 747 if (!all_cpu_data[cpunum])
676 return -ENOMEM; 748 return -ENOMEM;
677 749
@@ -714,9 +786,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
714 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { 786 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
715 limits.min_perf_pct = 100; 787 limits.min_perf_pct = 100;
716 limits.min_perf = int_tofp(1); 788 limits.min_perf = int_tofp(1);
789 limits.max_policy_pct = 100;
717 limits.max_perf_pct = 100; 790 limits.max_perf_pct = 100;
718 limits.max_perf = int_tofp(1); 791 limits.max_perf = int_tofp(1);
719 limits.no_turbo = limits.turbo_disabled; 792 limits.no_turbo = 0;
720 return 0; 793 return 0;
721 } 794 }
722 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; 795 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
@@ -751,15 +824,12 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
751 824
752 del_timer_sync(&all_cpu_data[cpu_num]->timer); 825 del_timer_sync(&all_cpu_data[cpu_num]->timer);
753 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); 826 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
754 kfree(all_cpu_data[cpu_num]);
755 all_cpu_data[cpu_num] = NULL;
756} 827}
757 828
758static int intel_pstate_cpu_init(struct cpufreq_policy *policy) 829static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
759{ 830{
760 struct cpudata *cpu; 831 struct cpudata *cpu;
761 int rc; 832 int rc;
762 u64 misc_en;
763 833
764 rc = intel_pstate_init_cpu(policy->cpu); 834 rc = intel_pstate_init_cpu(policy->cpu);
765 if (rc) 835 if (rc)
@@ -767,23 +837,18 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
767 837
768 cpu = all_cpu_data[policy->cpu]; 838 cpu = all_cpu_data[policy->cpu];
769 839
770 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
771 if (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
772 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate) {
773 limits.turbo_disabled = 1;
774 limits.no_turbo = 1;
775 }
776 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100) 840 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
777 policy->policy = CPUFREQ_POLICY_PERFORMANCE; 841 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
778 else 842 else
779 policy->policy = CPUFREQ_POLICY_POWERSAVE; 843 policy->policy = CPUFREQ_POLICY_POWERSAVE;
780 844
781 policy->min = cpu->pstate.min_pstate * 100000; 845 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
782 policy->max = cpu->pstate.turbo_pstate * 100000; 846 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
783 847
784 /* cpuinfo and default policy values */ 848 /* cpuinfo and default policy values */
785 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; 849 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
786 policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; 850 policy->cpuinfo.max_freq =
851 cpu->pstate.turbo_pstate * cpu->pstate.scaling;
787 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 852 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
788 cpumask_set_cpu(policy->cpu, policy->cpus); 853 cpumask_set_cpu(policy->cpu, policy->cpus);
789 854
@@ -841,6 +906,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
841 pstate_funcs.get_max = funcs->get_max; 906 pstate_funcs.get_max = funcs->get_max;
842 pstate_funcs.get_min = funcs->get_min; 907 pstate_funcs.get_min = funcs->get_min;
843 pstate_funcs.get_turbo = funcs->get_turbo; 908 pstate_funcs.get_turbo = funcs->get_turbo;
909 pstate_funcs.get_scaling = funcs->get_scaling;
844 pstate_funcs.set = funcs->set; 910 pstate_funcs.set = funcs->set;
845 pstate_funcs.get_vid = funcs->get_vid; 911 pstate_funcs.get_vid = funcs->get_vid;
846} 912}
diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips
index 0e70ee28a5ca..4102be01d06a 100644
--- a/drivers/cpuidle/Kconfig.mips
+++ b/drivers/cpuidle/Kconfig.mips
@@ -3,7 +3,7 @@
3# 3#
4config MIPS_CPS_CPUIDLE 4config MIPS_CPS_CPUIDLE
5 bool "CPU Idle driver for MIPS CPS platforms" 5 bool "CPU Idle driver for MIPS CPS platforms"
6 depends on CPU_IDLE 6 depends on CPU_IDLE && MIPS_CPS
7 depends on SYS_SUPPORTS_MIPS_CPS 7 depends on SYS_SUPPORTS_MIPS_CPS
8 select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT 8 select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT
9 select GENERIC_CLOCKEVENTS_BROADCAST if SMP 9 select GENERIC_CLOCKEVENTS_BROADCAST if SMP
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index a64be578dab2..7d3a3497dd4c 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -163,7 +163,8 @@ static int powernv_add_idle_states(void)
163 int nr_idle_states = 1; /* Snooze */ 163 int nr_idle_states = 1; /* Snooze */
164 int dt_idle_states; 164 int dt_idle_states;
165 const __be32 *idle_state_flags; 165 const __be32 *idle_state_flags;
166 u32 len_flags, flags; 166 const __be32 *idle_state_latency;
167 u32 len_flags, flags, latency_ns;
167 int i; 168 int i;
168 169
169 /* Currently we have snooze statically defined */ 170 /* Currently we have snooze statically defined */
@@ -180,18 +181,32 @@ static int powernv_add_idle_states(void)
180 return nr_idle_states; 181 return nr_idle_states;
181 } 182 }
182 183
184 idle_state_latency = of_get_property(power_mgt,
185 "ibm,cpu-idle-state-latencies-ns", NULL);
186 if (!idle_state_latency) {
187 pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-latencies-ns\n");
188 return nr_idle_states;
189 }
190
183 dt_idle_states = len_flags / sizeof(u32); 191 dt_idle_states = len_flags / sizeof(u32);
184 192
185 for (i = 0; i < dt_idle_states; i++) { 193 for (i = 0; i < dt_idle_states; i++) {
186 194
187 flags = be32_to_cpu(idle_state_flags[i]); 195 flags = be32_to_cpu(idle_state_flags[i]);
196
197 /* Cpuidle accepts exit_latency in us and we estimate
198 * target residency to be 10x exit_latency
199 */
200 latency_ns = be32_to_cpu(idle_state_latency[i]);
188 if (flags & IDLE_USE_INST_NAP) { 201 if (flags & IDLE_USE_INST_NAP) {
189 /* Add NAP state */ 202 /* Add NAP state */
190 strcpy(powernv_states[nr_idle_states].name, "Nap"); 203 strcpy(powernv_states[nr_idle_states].name, "Nap");
191 strcpy(powernv_states[nr_idle_states].desc, "Nap"); 204 strcpy(powernv_states[nr_idle_states].desc, "Nap");
192 powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID; 205 powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID;
193 powernv_states[nr_idle_states].exit_latency = 10; 206 powernv_states[nr_idle_states].exit_latency =
194 powernv_states[nr_idle_states].target_residency = 100; 207 ((unsigned int)latency_ns) / 1000;
208 powernv_states[nr_idle_states].target_residency =
209 ((unsigned int)latency_ns / 100);
195 powernv_states[nr_idle_states].enter = &nap_loop; 210 powernv_states[nr_idle_states].enter = &nap_loop;
196 nr_idle_states++; 211 nr_idle_states++;
197 } 212 }
@@ -202,8 +217,10 @@ static int powernv_add_idle_states(void)
202 strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); 217 strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
203 powernv_states[nr_idle_states].flags = 218 powernv_states[nr_idle_states].flags =
204 CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP; 219 CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
205 powernv_states[nr_idle_states].exit_latency = 300; 220 powernv_states[nr_idle_states].exit_latency =
206 powernv_states[nr_idle_states].target_residency = 1000000; 221 ((unsigned int)latency_ns) / 1000;
222 powernv_states[nr_idle_states].target_residency =
223 ((unsigned int)latency_ns / 100);
207 powernv_states[nr_idle_states].enter = &fastsleep_loop; 224 powernv_states[nr_idle_states].enter = &fastsleep_loop;
208 nr_idle_states++; 225 nr_idle_states++;
209 } 226 }
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 64ecbb501c50..8590099ac148 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -41,6 +41,28 @@ struct efi __read_mostly efi = {
41}; 41};
42EXPORT_SYMBOL(efi); 42EXPORT_SYMBOL(efi);
43 43
44static bool disable_runtime;
45static int __init setup_noefi(char *arg)
46{
47 disable_runtime = true;
48 return 0;
49}
50early_param("noefi", setup_noefi);
51
52bool efi_runtime_disabled(void)
53{
54 return disable_runtime;
55}
56
57static int __init parse_efi_cmdline(char *str)
58{
59 if (parse_option_str(str, "noruntime"))
60 disable_runtime = true;
61
62 return 0;
63}
64early_param("efi", parse_efi_cmdline);
65
44static struct kobject *efi_kobj; 66static struct kobject *efi_kobj;
45static struct kobject *efivars_kobj; 67static struct kobject *efivars_kobj;
46 68
@@ -423,3 +445,60 @@ int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
423 return ret; 445 return ret;
424} 446}
425#endif /* CONFIG_EFI_PARAMS_FROM_FDT */ 447#endif /* CONFIG_EFI_PARAMS_FROM_FDT */
448
449static __initdata char memory_type_name[][20] = {
450 "Reserved",
451 "Loader Code",
452 "Loader Data",
453 "Boot Code",
454 "Boot Data",
455 "Runtime Code",
456 "Runtime Data",
457 "Conventional Memory",
458 "Unusable Memory",
459 "ACPI Reclaim Memory",
460 "ACPI Memory NVS",
461 "Memory Mapped I/O",
462 "MMIO Port Space",
463 "PAL Code"
464};
465
466char * __init efi_md_typeattr_format(char *buf, size_t size,
467 const efi_memory_desc_t *md)
468{
469 char *pos;
470 int type_len;
471 u64 attr;
472
473 pos = buf;
474 if (md->type >= ARRAY_SIZE(memory_type_name))
475 type_len = snprintf(pos, size, "[type=%u", md->type);
476 else
477 type_len = snprintf(pos, size, "[%-*s",
478 (int)(sizeof(memory_type_name[0]) - 1),
479 memory_type_name[md->type]);
480 if (type_len >= size)
481 return buf;
482
483 pos += type_len;
484 size -= type_len;
485
486 attr = md->attribute;
487 if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
488 EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_WP |
489 EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_RUNTIME))
490 snprintf(pos, size, "|attr=0x%016llx]",
491 (unsigned long long)attr);
492 else
493 snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
494 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
495 attr & EFI_MEMORY_XP ? "XP" : "",
496 attr & EFI_MEMORY_RP ? "RP" : "",
497 attr & EFI_MEMORY_WP ? "WP" : "",
498 attr & EFI_MEMORY_UCE ? "UCE" : "",
499 attr & EFI_MEMORY_WB ? "WB" : "",
500 attr & EFI_MEMORY_WT ? "WT" : "",
501 attr & EFI_MEMORY_WC ? "WC" : "",
502 attr & EFI_MEMORY_UC ? "UC" : "");
503 return buf;
504}
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 480339b6b110..75ee05964cbc 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -226,6 +226,10 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table,
226 goto fail_free_image; 226 goto fail_free_image;
227 } 227 }
228 228
229 status = efi_parse_options(cmdline_ptr);
230 if (status != EFI_SUCCESS)
231 pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
232
229 /* 233 /*
230 * Unauthenticated device tree data is a security hazard, so 234 * Unauthenticated device tree data is a security hazard, so
231 * ignore 'dtb=' unless UEFI Secure Boot is disabled. 235 * ignore 'dtb=' unless UEFI Secure Boot is disabled.
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 32d5cca30f49..a920fec8fe88 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -15,8 +15,23 @@
15 15
16#include "efistub.h" 16#include "efistub.h"
17 17
18/*
19 * Some firmware implementations have problems reading files in one go.
20 * A read chunk size of 1MB seems to work for most platforms.
21 *
22 * Unfortunately, reading files in chunks triggers *other* bugs on some
23 * platforms, so we provide a way to disable this workaround, which can
24 * be done by passing "efi=nochunk" on the EFI boot stub command line.
25 *
26 * If you experience issues with initrd images being corrupt it's worth
27 * trying efi=nochunk, but chunking is enabled by default because there
28 * are far more machines that require the workaround than those that
29 * break with it enabled.
30 */
18#define EFI_READ_CHUNK_SIZE (1024 * 1024) 31#define EFI_READ_CHUNK_SIZE (1024 * 1024)
19 32
33static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
34
20struct file_info { 35struct file_info {
21 efi_file_handle_t *handle; 36 efi_file_handle_t *handle;
22 u64 size; 37 u64 size;
@@ -281,6 +296,49 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
281 efi_call_early(free_pages, addr, nr_pages); 296 efi_call_early(free_pages, addr, nr_pages);
282} 297}
283 298
299/*
300 * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
301 * option, e.g. efi=nochunk.
302 *
303 * It should be noted that efi= is parsed in two very different
304 * environments, first in the early boot environment of the EFI boot
305 * stub, and subsequently during the kernel boot.
306 */
307efi_status_t efi_parse_options(char *cmdline)
308{
309 char *str;
310
311 /*
312 * If no EFI parameters were specified on the cmdline we've got
313 * nothing to do.
314 */
315 str = strstr(cmdline, "efi=");
316 if (!str)
317 return EFI_SUCCESS;
318
319 /* Skip ahead to first argument */
320 str += strlen("efi=");
321
322 /*
323 * Remember, because efi= is also used by the kernel we need to
324 * skip over arguments we don't understand.
325 */
326 while (*str) {
327 if (!strncmp(str, "nochunk", 7)) {
328 str += strlen("nochunk");
329 __chunk_size = -1UL;
330 }
331
332 /* Group words together, delimited by "," */
333 while (*str && *str != ',')
334 str++;
335
336 if (*str == ',')
337 str++;
338 }
339
340 return EFI_SUCCESS;
341}
284 342
285/* 343/*
286 * Check the cmdline for a LILO-style file= arguments. 344 * Check the cmdline for a LILO-style file= arguments.
@@ -423,8 +481,8 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
423 size = files[j].size; 481 size = files[j].size;
424 while (size) { 482 while (size) {
425 unsigned long chunksize; 483 unsigned long chunksize;
426 if (size > EFI_READ_CHUNK_SIZE) 484 if (size > __chunk_size)
427 chunksize = EFI_READ_CHUNK_SIZE; 485 chunksize = __chunk_size;
428 else 486 else
429 chunksize = size; 487 chunksize = size;
430 488
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 10daa4bbb258..228bbf910461 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -14,11 +14,80 @@
14 * This file is released under the GPLv2. 14 * This file is released under the GPLv2.
15 */ 15 */
16 16
17#include <linux/bug.h>
17#include <linux/efi.h> 18#include <linux/efi.h>
18#include <linux/spinlock.h> /* spinlock_t */ 19#include <linux/mutex.h>
20#include <linux/spinlock.h>
19#include <asm/efi.h> 21#include <asm/efi.h>
20 22
21/* 23/*
24 * According to section 7.1 of the UEFI spec, Runtime Services are not fully
25 * reentrant, and there are particular combinations of calls that need to be
26 * serialized. (source: UEFI Specification v2.4A)
27 *
28 * Table 31. Rules for Reentry Into Runtime Services
29 * +------------------------------------+-------------------------------+
30 * | If previous call is busy in | Forbidden to call |
31 * +------------------------------------+-------------------------------+
32 * | Any | SetVirtualAddressMap() |
33 * +------------------------------------+-------------------------------+
34 * | ConvertPointer() | ConvertPointer() |
35 * +------------------------------------+-------------------------------+
36 * | SetVariable() | ResetSystem() |
37 * | UpdateCapsule() | |
38 * | SetTime() | |
39 * | SetWakeupTime() | |
40 * | GetNextHighMonotonicCount() | |
41 * +------------------------------------+-------------------------------+
42 * | GetVariable() | GetVariable() |
43 * | GetNextVariableName() | GetNextVariableName() |
44 * | SetVariable() | SetVariable() |
45 * | QueryVariableInfo() | QueryVariableInfo() |
46 * | UpdateCapsule() | UpdateCapsule() |
47 * | QueryCapsuleCapabilities() | QueryCapsuleCapabilities() |
48 * | GetNextHighMonotonicCount() | GetNextHighMonotonicCount() |
49 * +------------------------------------+-------------------------------+
50 * | GetTime() | GetTime() |
51 * | SetTime() | SetTime() |
52 * | GetWakeupTime() | GetWakeupTime() |
53 * | SetWakeupTime() | SetWakeupTime() |
54 * +------------------------------------+-------------------------------+
55 *
56 * Due to the fact that the EFI pstore may write to the variable store in
57 * interrupt context, we need to use a spinlock for at least the groups that
58 * contain SetVariable() and QueryVariableInfo(). That leaves little else, as
59 * none of the remaining functions are actually ever called at runtime.
60 * So let's just use a single spinlock to serialize all Runtime Services calls.
61 */
62static DEFINE_SPINLOCK(efi_runtime_lock);
63
64/*
65 * Some runtime services calls can be reentrant under NMI, even if the table
66 * above says they are not. (source: UEFI Specification v2.4A)
67 *
68 * Table 32. Functions that may be called after Machine Check, INIT and NMI
69 * +----------------------------+------------------------------------------+
70 * | Function | Called after Machine Check, INIT and NMI |
71 * +----------------------------+------------------------------------------+
72 * | GetTime() | Yes, even if previously busy. |
73 * | GetVariable() | Yes, even if previously busy |
74 * | GetNextVariableName() | Yes, even if previously busy |
75 * | QueryVariableInfo() | Yes, even if previously busy |
76 * | SetVariable() | Yes, even if previously busy |
77 * | UpdateCapsule() | Yes, even if previously busy |
78 * | QueryCapsuleCapabilities() | Yes, even if previously busy |
79 * | ResetSystem() | Yes, even if previously busy |
80 * +----------------------------+------------------------------------------+
81 *
82 * In order to prevent deadlocks under NMI, the wrappers for these functions
83 * may only grab the efi_runtime_lock or rtc_lock spinlocks if !efi_in_nmi().
84 * However, not all of the services listed are reachable through NMI code paths,
85 * so the the special handling as suggested by the UEFI spec is only implemented
86 * for QueryVariableInfo() and SetVariable(), as these can be reached in NMI
87 * context through efi_pstore_write().
88 */
89
90/*
22 * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"), 91 * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"),
23 * the EFI specification requires that callers of the time related runtime 92 * the EFI specification requires that callers of the time related runtime
24 * functions serialize with other CMOS accesses in the kernel, as the EFI time 93 * functions serialize with other CMOS accesses in the kernel, as the EFI time
@@ -32,7 +101,9 @@ static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
32 efi_status_t status; 101 efi_status_t status;
33 102
34 spin_lock_irqsave(&rtc_lock, flags); 103 spin_lock_irqsave(&rtc_lock, flags);
104 spin_lock(&efi_runtime_lock);
35 status = efi_call_virt(get_time, tm, tc); 105 status = efi_call_virt(get_time, tm, tc);
106 spin_unlock(&efi_runtime_lock);
36 spin_unlock_irqrestore(&rtc_lock, flags); 107 spin_unlock_irqrestore(&rtc_lock, flags);
37 return status; 108 return status;
38} 109}
@@ -43,7 +114,9 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
43 efi_status_t status; 114 efi_status_t status;
44 115
45 spin_lock_irqsave(&rtc_lock, flags); 116 spin_lock_irqsave(&rtc_lock, flags);
117 spin_lock(&efi_runtime_lock);
46 status = efi_call_virt(set_time, tm); 118 status = efi_call_virt(set_time, tm);
119 spin_unlock(&efi_runtime_lock);
47 spin_unlock_irqrestore(&rtc_lock, flags); 120 spin_unlock_irqrestore(&rtc_lock, flags);
48 return status; 121 return status;
49} 122}
@@ -56,7 +129,9 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
56 efi_status_t status; 129 efi_status_t status;
57 130
58 spin_lock_irqsave(&rtc_lock, flags); 131 spin_lock_irqsave(&rtc_lock, flags);
132 spin_lock(&efi_runtime_lock);
59 status = efi_call_virt(get_wakeup_time, enabled, pending, tm); 133 status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
134 spin_unlock(&efi_runtime_lock);
60 spin_unlock_irqrestore(&rtc_lock, flags); 135 spin_unlock_irqrestore(&rtc_lock, flags);
61 return status; 136 return status;
62} 137}
@@ -67,7 +142,9 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
67 efi_status_t status; 142 efi_status_t status;
68 143
69 spin_lock_irqsave(&rtc_lock, flags); 144 spin_lock_irqsave(&rtc_lock, flags);
145 spin_lock(&efi_runtime_lock);
70 status = efi_call_virt(set_wakeup_time, enabled, tm); 146 status = efi_call_virt(set_wakeup_time, enabled, tm);
147 spin_unlock(&efi_runtime_lock);
71 spin_unlock_irqrestore(&rtc_lock, flags); 148 spin_unlock_irqrestore(&rtc_lock, flags);
72 return status; 149 return status;
73} 150}
@@ -78,14 +155,27 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
78 unsigned long *data_size, 155 unsigned long *data_size,
79 void *data) 156 void *data)
80{ 157{
81 return efi_call_virt(get_variable, name, vendor, attr, data_size, data); 158 unsigned long flags;
159 efi_status_t status;
160
161 spin_lock_irqsave(&efi_runtime_lock, flags);
162 status = efi_call_virt(get_variable, name, vendor, attr, data_size,
163 data);
164 spin_unlock_irqrestore(&efi_runtime_lock, flags);
165 return status;
82} 166}
83 167
84static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, 168static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
85 efi_char16_t *name, 169 efi_char16_t *name,
86 efi_guid_t *vendor) 170 efi_guid_t *vendor)
87{ 171{
88 return efi_call_virt(get_next_variable, name_size, name, vendor); 172 unsigned long flags;
173 efi_status_t status;
174
175 spin_lock_irqsave(&efi_runtime_lock, flags);
176 status = efi_call_virt(get_next_variable, name_size, name, vendor);
177 spin_unlock_irqrestore(&efi_runtime_lock, flags);
178 return status;
89} 179}
90 180
91static efi_status_t virt_efi_set_variable(efi_char16_t *name, 181static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -94,24 +184,61 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
94 unsigned long data_size, 184 unsigned long data_size,
95 void *data) 185 void *data)
96{ 186{
97 return efi_call_virt(set_variable, name, vendor, attr, data_size, data); 187 unsigned long flags;
188 efi_status_t status;
189
190 spin_lock_irqsave(&efi_runtime_lock, flags);
191 status = efi_call_virt(set_variable, name, vendor, attr, data_size,
192 data);
193 spin_unlock_irqrestore(&efi_runtime_lock, flags);
194 return status;
98} 195}
99 196
197static efi_status_t
198virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
199 u32 attr, unsigned long data_size,
200 void *data)
201{
202 unsigned long flags;
203 efi_status_t status;
204
205 if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
206 return EFI_NOT_READY;
207
208 status = efi_call_virt(set_variable, name, vendor, attr, data_size,
209 data);
210 spin_unlock_irqrestore(&efi_runtime_lock, flags);
211 return status;
212}
213
214
100static efi_status_t virt_efi_query_variable_info(u32 attr, 215static efi_status_t virt_efi_query_variable_info(u32 attr,
101 u64 *storage_space, 216 u64 *storage_space,
102 u64 *remaining_space, 217 u64 *remaining_space,
103 u64 *max_variable_size) 218 u64 *max_variable_size)
104{ 219{
220 unsigned long flags;
221 efi_status_t status;
222
105 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) 223 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
106 return EFI_UNSUPPORTED; 224 return EFI_UNSUPPORTED;
107 225
108 return efi_call_virt(query_variable_info, attr, storage_space, 226 spin_lock_irqsave(&efi_runtime_lock, flags);
109 remaining_space, max_variable_size); 227 status = efi_call_virt(query_variable_info, attr, storage_space,
228 remaining_space, max_variable_size);
229 spin_unlock_irqrestore(&efi_runtime_lock, flags);
230 return status;
110} 231}
111 232
112static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) 233static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
113{ 234{
114 return efi_call_virt(get_next_high_mono_count, count); 235 unsigned long flags;
236 efi_status_t status;
237
238 spin_lock_irqsave(&efi_runtime_lock, flags);
239 status = efi_call_virt(get_next_high_mono_count, count);
240 spin_unlock_irqrestore(&efi_runtime_lock, flags);
241 return status;
115} 242}
116 243
117static void virt_efi_reset_system(int reset_type, 244static void virt_efi_reset_system(int reset_type,
@@ -119,17 +246,27 @@ static void virt_efi_reset_system(int reset_type,
119 unsigned long data_size, 246 unsigned long data_size,
120 efi_char16_t *data) 247 efi_char16_t *data)
121{ 248{
249 unsigned long flags;
250
251 spin_lock_irqsave(&efi_runtime_lock, flags);
122 __efi_call_virt(reset_system, reset_type, status, data_size, data); 252 __efi_call_virt(reset_system, reset_type, status, data_size, data);
253 spin_unlock_irqrestore(&efi_runtime_lock, flags);
123} 254}
124 255
125static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, 256static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
126 unsigned long count, 257 unsigned long count,
127 unsigned long sg_list) 258 unsigned long sg_list)
128{ 259{
260 unsigned long flags;
261 efi_status_t status;
262
129 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) 263 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
130 return EFI_UNSUPPORTED; 264 return EFI_UNSUPPORTED;
131 265
132 return efi_call_virt(update_capsule, capsules, count, sg_list); 266 spin_lock_irqsave(&efi_runtime_lock, flags);
267 status = efi_call_virt(update_capsule, capsules, count, sg_list);
268 spin_unlock_irqrestore(&efi_runtime_lock, flags);
269 return status;
133} 270}
134 271
135static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, 272static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
@@ -137,11 +274,17 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
137 u64 *max_size, 274 u64 *max_size,
138 int *reset_type) 275 int *reset_type)
139{ 276{
277 unsigned long flags;
278 efi_status_t status;
279
140 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) 280 if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
141 return EFI_UNSUPPORTED; 281 return EFI_UNSUPPORTED;
142 282
143 return efi_call_virt(query_capsule_caps, capsules, count, max_size, 283 spin_lock_irqsave(&efi_runtime_lock, flags);
144 reset_type); 284 status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
285 reset_type);
286 spin_unlock_irqrestore(&efi_runtime_lock, flags);
287 return status;
145} 288}
146 289
147void efi_native_runtime_setup(void) 290void efi_native_runtime_setup(void)
@@ -153,6 +296,7 @@ void efi_native_runtime_setup(void)
153 efi.get_variable = virt_efi_get_variable; 296 efi.get_variable = virt_efi_get_variable;
154 efi.get_next_variable = virt_efi_get_next_variable; 297 efi.get_next_variable = virt_efi_get_next_variable;
155 efi.set_variable = virt_efi_set_variable; 298 efi.set_variable = virt_efi_set_variable;
299 efi.set_variable_nonblocking = virt_efi_set_variable_nonblocking;
156 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; 300 efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
157 efi.reset_system = virt_efi_reset_system; 301 efi.reset_system = virt_efi_reset_system;
158 efi.query_variable_info = virt_efi_query_variable_info; 302 efi.query_variable_info = virt_efi_query_variable_info;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 5abe943e3404..70a0fb10517f 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -321,11 +321,11 @@ static unsigned long var_name_strnsize(efi_char16_t *variable_name,
321 * Print a warning when duplicate EFI variables are encountered and 321 * Print a warning when duplicate EFI variables are encountered and
322 * disable the sysfs workqueue since the firmware is buggy. 322 * disable the sysfs workqueue since the firmware is buggy.
323 */ 323 */
324static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid, 324static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid,
325 unsigned long len16) 325 unsigned long len16)
326{ 326{
327 size_t i, len8 = len16 / sizeof(efi_char16_t); 327 size_t i, len8 = len16 / sizeof(efi_char16_t);
328 char *s8; 328 char *str8;
329 329
330 /* 330 /*
331 * Disable the workqueue since the algorithm it uses for 331 * Disable the workqueue since the algorithm it uses for
@@ -334,16 +334,16 @@ static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid,
334 */ 334 */
335 efivar_wq_enabled = false; 335 efivar_wq_enabled = false;
336 336
337 s8 = kzalloc(len8, GFP_KERNEL); 337 str8 = kzalloc(len8, GFP_KERNEL);
338 if (!s8) 338 if (!str8)
339 return; 339 return;
340 340
341 for (i = 0; i < len8; i++) 341 for (i = 0; i < len8; i++)
342 s8[i] = s16[i]; 342 str8[i] = str16[i];
343 343
344 printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n", 344 printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n",
345 s8, vendor_guid); 345 str8, vendor_guid);
346 kfree(s8); 346 kfree(str8);
347} 347}
348 348
349/** 349/**
@@ -595,6 +595,39 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
595} 595}
596EXPORT_SYMBOL_GPL(efivar_entry_set); 596EXPORT_SYMBOL_GPL(efivar_entry_set);
597 597
598/*
599 * efivar_entry_set_nonblocking - call set_variable_nonblocking()
600 *
601 * This function is guaranteed to not block and is suitable for calling
602 * from crash/panic handlers.
603 *
604 * Crucially, this function will not block if it cannot acquire
605 * __efivars->lock. Instead, it returns -EBUSY.
606 */
607static int
608efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
609 u32 attributes, unsigned long size, void *data)
610{
611 const struct efivar_operations *ops = __efivars->ops;
612 unsigned long flags;
613 efi_status_t status;
614
615 if (!spin_trylock_irqsave(&__efivars->lock, flags))
616 return -EBUSY;
617
618 status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
619 if (status != EFI_SUCCESS) {
620 spin_unlock_irqrestore(&__efivars->lock, flags);
621 return -ENOSPC;
622 }
623
624 status = ops->set_variable_nonblocking(name, &vendor, attributes,
625 size, data);
626
627 spin_unlock_irqrestore(&__efivars->lock, flags);
628 return efi_status_to_err(status);
629}
630
598/** 631/**
599 * efivar_entry_set_safe - call set_variable() if enough space in firmware 632 * efivar_entry_set_safe - call set_variable() if enough space in firmware
600 * @name: buffer containing the variable name 633 * @name: buffer containing the variable name
@@ -622,6 +655,20 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
622 if (!ops->query_variable_store) 655 if (!ops->query_variable_store)
623 return -ENOSYS; 656 return -ENOSYS;
624 657
658 /*
659 * If the EFI variable backend provides a non-blocking
660 * ->set_variable() operation and we're in a context where we
661 * cannot block, then we need to use it to avoid live-locks,
662 * since the implication is that the regular ->set_variable()
663 * will block.
664 *
665 * If no ->set_variable_nonblocking() is provided then
666 * ->set_variable() is assumed to be non-blocking.
667 */
668 if (!block && ops->set_variable_nonblocking)
669 return efivar_entry_set_nonblocking(name, vendor, attributes,
670 size, data);
671
625 if (!block) { 672 if (!block) {
626 if (!spin_trylock_irqsave(&__efivars->lock, flags)) 673 if (!spin_trylock_irqsave(&__efivars->lock, flags))
627 return -EBUSY; 674 return -EBUSY;
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index e705335101a5..c2a1cba1e984 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -32,6 +32,8 @@ static struct drm_driver driver;
32static const struct pci_device_id pciidlist[] = { 32static const struct pci_device_id pciidlist[] = {
33 { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0, 33 { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
34 0, 0 }, 34 0, 0 },
35 { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
36 0x0001, 0, 0, 0 },
35 {0,} 37 {0,}
36}; 38};
37 39
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3201986bf25e..f66392b6e287 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1711,7 +1711,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
1711#define HPD_STORM_DETECT_PERIOD 1000 1711#define HPD_STORM_DETECT_PERIOD 1000
1712#define HPD_STORM_THRESHOLD 5 1712#define HPD_STORM_THRESHOLD 5
1713 1713
1714static int ilk_port_to_hotplug_shift(enum port port) 1714static int pch_port_to_hotplug_shift(enum port port)
1715{ 1715{
1716 switch (port) { 1716 switch (port) {
1717 case PORT_A: 1717 case PORT_A:
@@ -1727,7 +1727,7 @@ static int ilk_port_to_hotplug_shift(enum port port)
1727 } 1727 }
1728} 1728}
1729 1729
1730static int g4x_port_to_hotplug_shift(enum port port) 1730static int i915_port_to_hotplug_shift(enum port port)
1731{ 1731{
1732 switch (port) { 1732 switch (port) {
1733 case PORT_A: 1733 case PORT_A:
@@ -1785,12 +1785,12 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
1785 if (port && dev_priv->hpd_irq_port[port]) { 1785 if (port && dev_priv->hpd_irq_port[port]) {
1786 bool long_hpd; 1786 bool long_hpd;
1787 1787
1788 if (IS_G4X(dev)) { 1788 if (HAS_PCH_SPLIT(dev)) {
1789 dig_shift = g4x_port_to_hotplug_shift(port); 1789 dig_shift = pch_port_to_hotplug_shift(port);
1790 long_hpd = (hotplug_trigger >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
1791 } else {
1792 dig_shift = ilk_port_to_hotplug_shift(port);
1793 long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT; 1790 long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
1791 } else {
1792 dig_shift = i915_port_to_hotplug_shift(port);
1793 long_hpd = (hotplug_trigger >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
1794 } 1794 }
1795 1795
1796 DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", 1796 DRM_DEBUG_DRIVER("digital hpd port %c - %s\n",
@@ -3458,12 +3458,13 @@ static void gen8_irq_reset(struct drm_device *dev)
3458void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv) 3458void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv)
3459{ 3459{
3460 unsigned long irqflags; 3460 unsigned long irqflags;
3461 uint32_t extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN;
3461 3462
3462 spin_lock_irqsave(&dev_priv->irq_lock, irqflags); 3463 spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
3463 GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_B, dev_priv->de_irq_mask[PIPE_B], 3464 GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_B, dev_priv->de_irq_mask[PIPE_B],
3464 ~dev_priv->de_irq_mask[PIPE_B]); 3465 ~dev_priv->de_irq_mask[PIPE_B] | extra_ier);
3465 GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_C, dev_priv->de_irq_mask[PIPE_C], 3466 GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_C, dev_priv->de_irq_mask[PIPE_C],
3466 ~dev_priv->de_irq_mask[PIPE_C]); 3467 ~dev_priv->de_irq_mask[PIPE_C] | extra_ier);
3467 spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); 3468 spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
3468} 3469}
3469 3470
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 507370513f3d..c9e220963a78 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -73,9 +73,6 @@ static const uint32_t intel_cursor_formats[] = {
73 DRM_FORMAT_ARGB8888, 73 DRM_FORMAT_ARGB8888,
74}; 74};
75 75
76#define DIV_ROUND_CLOSEST_ULL(ll, d) \
77({ unsigned long long _tmp = (ll)+(d)/2; do_div(_tmp, d); _tmp; })
78
79static void intel_increase_pllclock(struct drm_device *dev, 76static void intel_increase_pllclock(struct drm_device *dev,
80 enum pipe pipe); 77 enum pipe pipe);
81static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on); 78static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
@@ -12357,27 +12354,36 @@ static void intel_setup_outputs(struct drm_device *dev)
12357 if (I915_READ(PCH_DP_D) & DP_DETECTED) 12354 if (I915_READ(PCH_DP_D) & DP_DETECTED)
12358 intel_dp_init(dev, PCH_DP_D, PORT_D); 12355 intel_dp_init(dev, PCH_DP_D, PORT_D);
12359 } else if (IS_VALLEYVIEW(dev)) { 12356 } else if (IS_VALLEYVIEW(dev)) {
12360 if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIB) & SDVO_DETECTED) { 12357 /*
12358 * The DP_DETECTED bit is the latched state of the DDC
12359 * SDA pin at boot. However since eDP doesn't require DDC
12360 * (no way to plug in a DP->HDMI dongle) the DDC pins for
12361 * eDP ports may have been muxed to an alternate function.
12362 * Thus we can't rely on the DP_DETECTED bit alone to detect
12363 * eDP ports. Consult the VBT as well as DP_DETECTED to
12364 * detect eDP ports.
12365 */
12366 if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIB) & SDVO_DETECTED)
12361 intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIB, 12367 intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIB,
12362 PORT_B); 12368 PORT_B);
12363 if (I915_READ(VLV_DISPLAY_BASE + DP_B) & DP_DETECTED) 12369 if (I915_READ(VLV_DISPLAY_BASE + DP_B) & DP_DETECTED ||
12364 intel_dp_init(dev, VLV_DISPLAY_BASE + DP_B, PORT_B); 12370 intel_dp_is_edp(dev, PORT_B))
12365 } 12371 intel_dp_init(dev, VLV_DISPLAY_BASE + DP_B, PORT_B);
12366 12372
12367 if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIC) & SDVO_DETECTED) { 12373 if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIC) & SDVO_DETECTED)
12368 intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIC, 12374 intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIC,
12369 PORT_C); 12375 PORT_C);
12370 if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED) 12376 if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED ||
12371 intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C); 12377 intel_dp_is_edp(dev, PORT_C))
12372 } 12378 intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C);
12373 12379
12374 if (IS_CHERRYVIEW(dev)) { 12380 if (IS_CHERRYVIEW(dev)) {
12375 if (I915_READ(VLV_DISPLAY_BASE + CHV_HDMID) & SDVO_DETECTED) { 12381 if (I915_READ(VLV_DISPLAY_BASE + CHV_HDMID) & SDVO_DETECTED)
12376 intel_hdmi_init(dev, VLV_DISPLAY_BASE + CHV_HDMID, 12382 intel_hdmi_init(dev, VLV_DISPLAY_BASE + CHV_HDMID,
12377 PORT_D); 12383 PORT_D);
12378 if (I915_READ(VLV_DISPLAY_BASE + DP_D) & DP_DETECTED) 12384 /* eDP not supported on port D, so don't check VBT */
12379 intel_dp_init(dev, VLV_DISPLAY_BASE + DP_D, PORT_D); 12385 if (I915_READ(VLV_DISPLAY_BASE + DP_D) & DP_DETECTED)
12380 } 12386 intel_dp_init(dev, VLV_DISPLAY_BASE + DP_D, PORT_D);
12381 } 12387 }
12382 12388
12383 intel_dsi_init(dev); 12389 intel_dsi_init(dev);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 07ce04683c30..ba715229a540 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -35,6 +35,9 @@
35#include <drm/drm_fb_helper.h> 35#include <drm/drm_fb_helper.h>
36#include <drm/drm_dp_mst_helper.h> 36#include <drm/drm_dp_mst_helper.h>
37 37
38#define DIV_ROUND_CLOSEST_ULL(ll, d) \
39({ unsigned long long _tmp = (ll)+(d)/2; do_div(_tmp, d); _tmp; })
40
38/** 41/**
39 * _wait_for - magic (register) wait macro 42 * _wait_for - magic (register) wait macro
40 * 43 *
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 18784470a760..0e018cb49147 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -419,9 +419,8 @@ static uint32_t scale(uint32_t source_val,
419 source_val = clamp(source_val, source_min, source_max); 419 source_val = clamp(source_val, source_min, source_max);
420 420
421 /* avoid overflows */ 421 /* avoid overflows */
422 target_val = (uint64_t)(source_val - source_min) * 422 target_val = DIV_ROUND_CLOSEST_ULL((uint64_t)(source_val - source_min) *
423 (target_max - target_min); 423 (target_max - target_min), source_max - source_min);
424 do_div(target_val, source_max - source_min);
425 target_val += target_min; 424 target_val += target_min;
426 425
427 return target_val; 426 return target_val;
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c
index 552fdbd45ebe..1d0e33fb5f61 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnv50.c
@@ -113,6 +113,8 @@
113#define IS_NVA3F(x) (((x) > 0xa0 && (x) < 0xaa) || (x) == 0xaf) 113#define IS_NVA3F(x) (((x) > 0xa0 && (x) < 0xaa) || (x) == 0xaf)
114#define IS_NVAAF(x) ((x) >= 0xaa && (x) <= 0xac) 114#define IS_NVAAF(x) ((x) >= 0xaa && (x) <= 0xac)
115 115
116#include <subdev/fb.h>
117
116/* 118/*
117 * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's 119 * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
118 * the GPU itself that does context-switching, but it needs a special 120 * the GPU itself that does context-switching, but it needs a special
@@ -569,8 +571,12 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
569 gr_def(ctx, 0x407d08, 0x00010040); 571 gr_def(ctx, 0x407d08, 0x00010040);
570 else if (device->chipset < 0xa0) 572 else if (device->chipset < 0xa0)
571 gr_def(ctx, 0x407d08, 0x00390040); 573 gr_def(ctx, 0x407d08, 0x00390040);
572 else 574 else {
573 gr_def(ctx, 0x407d08, 0x003d0040); 575 if (nouveau_fb(device)->ram->type != NV_MEM_TYPE_GDDR5)
576 gr_def(ctx, 0x407d08, 0x003d0040);
577 else
578 gr_def(ctx, 0x407d08, 0x003c0040);
579 }
574 gr_def(ctx, 0x407d0c, 0x00000022); 580 gr_def(ctx, 0x407d0c, 0x00000022);
575 } 581 }
576 582
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 589dbb582da2..fd3dbd59d73e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -400,15 +400,20 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
400 struct nouveau_channel **pchan) 400 struct nouveau_channel **pchan)
401{ 401{
402 struct nouveau_cli *cli = (void *)nvif_client(&device->base); 402 struct nouveau_cli *cli = (void *)nvif_client(&device->base);
403 bool super;
403 int ret; 404 int ret;
404 405
406 /* hack until fencenv50 is fixed, and agp access relaxed */
407 super = cli->base.super;
408 cli->base.super = true;
409
405 ret = nouveau_channel_ind(drm, device, handle, arg0, pchan); 410 ret = nouveau_channel_ind(drm, device, handle, arg0, pchan);
406 if (ret) { 411 if (ret) {
407 NV_PRINTK(debug, cli, "ib channel create, %d\n", ret); 412 NV_PRINTK(debug, cli, "ib channel create, %d\n", ret);
408 ret = nouveau_channel_dma(drm, device, handle, pchan); 413 ret = nouveau_channel_dma(drm, device, handle, pchan);
409 if (ret) { 414 if (ret) {
410 NV_PRINTK(debug, cli, "dma channel create, %d\n", ret); 415 NV_PRINTK(debug, cli, "dma channel create, %d\n", ret);
411 return ret; 416 goto done;
412 } 417 }
413 } 418 }
414 419
@@ -416,8 +421,9 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
416 if (ret) { 421 if (ret) {
417 NV_PRINTK(error, cli, "channel failed to initialise, %d\n", ret); 422 NV_PRINTK(error, cli, "channel failed to initialise, %d\n", ret);
418 nouveau_channel_del(pchan); 423 nouveau_channel_del(pchan);
419 return ret;
420 } 424 }
421 425
422 return 0; 426done:
427 cli->base.super = super;
428 return ret;
423} 429}
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index af9e78546688..0d1396266857 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -572,7 +572,6 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc,
572 struct qxl_framebuffer *qfb; 572 struct qxl_framebuffer *qfb;
573 struct qxl_bo *bo, *old_bo = NULL; 573 struct qxl_bo *bo, *old_bo = NULL;
574 struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); 574 struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
575 uint32_t width, height, base_offset;
576 bool recreate_primary = false; 575 bool recreate_primary = false;
577 int ret; 576 int ret;
578 int surf_id; 577 int surf_id;
@@ -602,9 +601,10 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc,
602 if (qcrtc->index == 0) 601 if (qcrtc->index == 0)
603 recreate_primary = true; 602 recreate_primary = true;
604 603
605 width = mode->hdisplay; 604 if (bo->surf.stride * bo->surf.height > qdev->vram_size) {
606 height = mode->vdisplay; 605 DRM_ERROR("Mode doesn't fit in vram size (vgamem)");
607 base_offset = 0; 606 return -EINVAL;
607 }
608 608
609 ret = qxl_bo_reserve(bo, false); 609 ret = qxl_bo_reserve(bo, false);
610 if (ret != 0) 610 if (ret != 0)
@@ -618,10 +618,10 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc,
618 if (recreate_primary) { 618 if (recreate_primary) {
619 qxl_io_destroy_primary(qdev); 619 qxl_io_destroy_primary(qdev);
620 qxl_io_log(qdev, 620 qxl_io_log(qdev,
621 "recreate primary: %dx%d (was %dx%d,%d,%d)\n", 621 "recreate primary: %dx%d,%d,%d\n",
622 width, height, bo->surf.width, 622 bo->surf.width, bo->surf.height,
623 bo->surf.height, bo->surf.stride, bo->surf.format); 623 bo->surf.stride, bo->surf.format);
624 qxl_io_create_primary(qdev, base_offset, bo); 624 qxl_io_create_primary(qdev, 0, bo);
625 bo->is_primary = true; 625 bo->is_primary = true;
626 } 626 }
627 627
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 300d971187c4..0b2929de9f41 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -24,6 +24,7 @@
24 24
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "btcd.h" 28#include "btcd.h"
28#include "r600_dpm.h" 29#include "r600_dpm.h"
29#include "cypress_dpm.h" 30#include "cypress_dpm.h"
@@ -1170,6 +1171,23 @@ static const struct radeon_blacklist_clocks btc_blacklist_clocks[] =
1170 { 25000, 30000, RADEON_SCLK_UP } 1171 { 25000, 30000, RADEON_SCLK_UP }
1171}; 1172};
1172 1173
1174void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
1175 u32 *max_clock)
1176{
1177 u32 i, clock = 0;
1178
1179 if ((table == NULL) || (table->count == 0)) {
1180 *max_clock = clock;
1181 return;
1182 }
1183
1184 for (i = 0; i < table->count; i++) {
1185 if (clock < table->entries[i].clk)
1186 clock = table->entries[i].clk;
1187 }
1188 *max_clock = clock;
1189}
1190
1173void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table, 1191void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table,
1174 u32 clock, u16 max_voltage, u16 *voltage) 1192 u32 clock, u16 max_voltage, u16 *voltage)
1175{ 1193{
diff --git a/drivers/gpu/drm/radeon/btc_dpm.h b/drivers/gpu/drm/radeon/btc_dpm.h
index 1a15e0e41950..3b6f12b7760b 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.h
+++ b/drivers/gpu/drm/radeon/btc_dpm.h
@@ -46,6 +46,8 @@ void btc_adjust_clock_combinations(struct radeon_device *rdev,
46 struct rv7xx_pl *pl); 46 struct rv7xx_pl *pl);
47void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table, 47void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table,
48 u32 clock, u16 max_voltage, u16 *voltage); 48 u32 clock, u16 max_voltage, u16 *voltage);
49void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
50 u32 *max_clock);
49void btc_apply_voltage_delta_rules(struct radeon_device *rdev, 51void btc_apply_voltage_delta_rules(struct radeon_device *rdev,
50 u16 max_vddc, u16 max_vddci, 52 u16 max_vddc, u16 max_vddci,
51 u16 *vddc, u16 *vddci); 53 u16 *vddc, u16 *vddci);
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index f5c8c0445a94..11a55e9dad7f 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -24,6 +24,7 @@
24#include <linux/firmware.h> 24#include <linux/firmware.h>
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "radeon_ucode.h" 28#include "radeon_ucode.h"
28#include "cikd.h" 29#include "cikd.h"
29#include "r600_dpm.h" 30#include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index c77dad1a4576..4e8432d07f15 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -611,16 +611,19 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
611{ 611{
612 unsigned i; 612 unsigned i;
613 int r; 613 int r;
614 void __iomem *ptr = (void *)rdev->vram_scratch.ptr; 614 unsigned index;
615 u32 tmp; 615 u32 tmp;
616 u64 gpu_addr;
616 617
617 if (!ptr) { 618 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
618 DRM_ERROR("invalid vram scratch pointer\n"); 619 index = R600_WB_DMA_RING_TEST_OFFSET;
619 return -EINVAL; 620 else
620 } 621 index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
622
623 gpu_addr = rdev->wb.gpu_addr + index;
621 624
622 tmp = 0xCAFEDEAD; 625 tmp = 0xCAFEDEAD;
623 writel(tmp, ptr); 626 rdev->wb.wb[index/4] = cpu_to_le32(tmp);
624 627
625 r = radeon_ring_lock(rdev, ring, 5); 628 r = radeon_ring_lock(rdev, ring, 5);
626 if (r) { 629 if (r) {
@@ -628,14 +631,14 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
628 return r; 631 return r;
629 } 632 }
630 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); 633 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
631 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); 634 radeon_ring_write(ring, lower_32_bits(gpu_addr));
632 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr)); 635 radeon_ring_write(ring, upper_32_bits(gpu_addr));
633 radeon_ring_write(ring, 1); /* number of DWs to follow */ 636 radeon_ring_write(ring, 1); /* number of DWs to follow */
634 radeon_ring_write(ring, 0xDEADBEEF); 637 radeon_ring_write(ring, 0xDEADBEEF);
635 radeon_ring_unlock_commit(rdev, ring, false); 638 radeon_ring_unlock_commit(rdev, ring, false);
636 639
637 for (i = 0; i < rdev->usec_timeout; i++) { 640 for (i = 0; i < rdev->usec_timeout; i++) {
638 tmp = readl(ptr); 641 tmp = le32_to_cpu(rdev->wb.wb[index/4]);
639 if (tmp == 0xDEADBEEF) 642 if (tmp == 0xDEADBEEF)
640 break; 643 break;
641 DRM_UDELAY(1); 644 DRM_UDELAY(1);
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index 47d31e915758..9aad0327e4d1 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -24,6 +24,7 @@
24 24
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "evergreend.h" 28#include "evergreend.h"
28#include "r600_dpm.h" 29#include "r600_dpm.h"
29#include "cypress_dpm.h" 30#include "cypress_dpm.h"
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index 950af153f30e..2fe8cfc966d9 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -32,7 +32,7 @@ static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder)
32 struct drm_connector *connector; 32 struct drm_connector *connector;
33 struct radeon_connector *radeon_connector = NULL; 33 struct radeon_connector *radeon_connector = NULL;
34 u32 tmp; 34 u32 tmp;
35 u8 *sadb; 35 u8 *sadb = NULL;
36 int sad_count; 36 int sad_count;
37 37
38 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { 38 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
@@ -49,8 +49,8 @@ static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder)
49 49
50 sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb); 50 sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
51 if (sad_count < 0) { 51 if (sad_count < 0) {
52 DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); 52 DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
53 return; 53 sad_count = 0;
54 } 54 }
55 55
56 /* program the speaker allocation */ 56 /* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index c0bbf68dbc27..f312edf4d50e 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -155,7 +155,7 @@ void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder)
155 struct drm_connector *connector; 155 struct drm_connector *connector;
156 struct radeon_connector *radeon_connector = NULL; 156 struct radeon_connector *radeon_connector = NULL;
157 u32 offset, tmp; 157 u32 offset, tmp;
158 u8 *sadb; 158 u8 *sadb = NULL;
159 int sad_count; 159 int sad_count;
160 160
161 if (!dig || !dig->afmt || !dig->afmt->pin) 161 if (!dig || !dig->afmt || !dig->afmt->pin)
@@ -176,9 +176,9 @@ void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder)
176 } 176 }
177 177
178 sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), &sadb); 178 sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), &sadb);
179 if (sad_count <= 0) { 179 if (sad_count < 0) {
180 DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); 180 DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
181 return; 181 sad_count = 0;
182 } 182 }
183 183
184 /* program the speaker allocation */ 184 /* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 2514d659b1ba..53abd9b17a50 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -133,7 +133,7 @@ static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder)
133 struct drm_connector *connector; 133 struct drm_connector *connector;
134 struct radeon_connector *radeon_connector = NULL; 134 struct radeon_connector *radeon_connector = NULL;
135 u32 tmp; 135 u32 tmp;
136 u8 *sadb; 136 u8 *sadb = NULL;
137 int sad_count; 137 int sad_count;
138 138
139 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { 139 list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
@@ -149,9 +149,9 @@ static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder)
149 } 149 }
150 150
151 sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), &sadb); 151 sad_count = drm_edid_to_speaker_allocation(radeon_connector_edid(connector), &sadb);
152 if (sad_count <= 0) { 152 if (sad_count < 0) {
153 DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); 153 DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
154 return; 154 sad_count = 0;
155 } 155 }
156 156
157 /* program the speaker allocation */ 157 /* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index 715b181c6243..6d2f16cf2c1c 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -23,6 +23,7 @@
23 23
24#include "drmP.h" 24#include "drmP.h"
25#include "radeon.h" 25#include "radeon.h"
26#include "radeon_asic.h"
26#include "nid.h" 27#include "nid.h"
27#include "r600_dpm.h" 28#include "r600_dpm.h"
28#include "ni_dpm.h" 29#include "ni_dpm.h"
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index 100189ec5fa8..aabc343b9a8f 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -232,16 +232,19 @@ int r600_dma_ring_test(struct radeon_device *rdev,
232{ 232{
233 unsigned i; 233 unsigned i;
234 int r; 234 int r;
235 void __iomem *ptr = (void *)rdev->vram_scratch.ptr; 235 unsigned index;
236 u32 tmp; 236 u32 tmp;
237 u64 gpu_addr;
237 238
238 if (!ptr) { 239 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
239 DRM_ERROR("invalid vram scratch pointer\n"); 240 index = R600_WB_DMA_RING_TEST_OFFSET;
240 return -EINVAL; 241 else
241 } 242 index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
243
244 gpu_addr = rdev->wb.gpu_addr + index;
242 245
243 tmp = 0xCAFEDEAD; 246 tmp = 0xCAFEDEAD;
244 writel(tmp, ptr); 247 rdev->wb.wb[index/4] = cpu_to_le32(tmp);
245 248
246 r = radeon_ring_lock(rdev, ring, 4); 249 r = radeon_ring_lock(rdev, ring, 4);
247 if (r) { 250 if (r) {
@@ -249,13 +252,13 @@ int r600_dma_ring_test(struct radeon_device *rdev,
249 return r; 252 return r;
250 } 253 }
251 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 254 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
252 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); 255 radeon_ring_write(ring, lower_32_bits(gpu_addr));
253 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); 256 radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff);
254 radeon_ring_write(ring, 0xDEADBEEF); 257 radeon_ring_write(ring, 0xDEADBEEF);
255 radeon_ring_unlock_commit(rdev, ring, false); 258 radeon_ring_unlock_commit(rdev, ring, false);
256 259
257 for (i = 0; i < rdev->usec_timeout; i++) { 260 for (i = 0; i < rdev->usec_timeout; i++) {
258 tmp = readl(ptr); 261 tmp = le32_to_cpu(rdev->wb.wb[index/4]);
259 if (tmp == 0xDEADBEEF) 262 if (tmp == 0xDEADBEEF)
260 break; 263 break;
261 DRM_UDELAY(1); 264 DRM_UDELAY(1);
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index 9c61b74ef441..f6309bd23e01 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -24,6 +24,7 @@
24 24
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "r600d.h" 28#include "r600d.h"
28#include "r600_dpm.h" 29#include "r600_dpm.h"
29#include "atom.h" 30#include "atom.h"
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index f7c4b226a284..a9717b3fbf1b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1133,6 +1133,8 @@ struct radeon_wb {
1133#define R600_WB_EVENT_OFFSET 3072 1133#define R600_WB_EVENT_OFFSET 3072
1134#define CIK_WB_CP1_WPTR_OFFSET 3328 1134#define CIK_WB_CP1_WPTR_OFFSET 3328
1135#define CIK_WB_CP2_WPTR_OFFSET 3584 1135#define CIK_WB_CP2_WPTR_OFFSET 3584
1136#define R600_WB_DMA_RING_TEST_OFFSET 3588
1137#define CAYMAN_WB_DMA1_RING_TEST_OFFSET 3592
1136 1138
1137/** 1139/**
1138 * struct radeon_pm - power management datas 1140 * struct radeon_pm - power management datas
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index f41cc1538e48..ea2676954dde 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1130,7 +1130,7 @@ static void radeon_check_arguments(struct radeon_device *rdev)
1130 if (radeon_vm_block_size == -1) { 1130 if (radeon_vm_block_size == -1) {
1131 1131
1132 /* Total bits covered by PD + PTs */ 1132 /* Total bits covered by PD + PTs */
1133 unsigned bits = ilog2(radeon_vm_size) + 17; 1133 unsigned bits = ilog2(radeon_vm_size) + 18;
1134 1134
1135 /* Make sure the PD is 4K in size up to 8GB address space. 1135 /* Make sure the PD is 4K in size up to 8GB address space.
1136 Above that split equal between PD and PTs */ 1136 Above that split equal between PD and PTs */
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c
index 02f7710de470..9031f4b69824 100644
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@@ -24,6 +24,7 @@
24 24
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "rs780d.h" 28#include "rs780d.h"
28#include "r600_dpm.h" 29#include "r600_dpm.h"
29#include "rs780_dpm.h" 30#include "rs780_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index e7045b085715..6a5c233361e9 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -24,6 +24,7 @@
24 24
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "rv6xxd.h" 28#include "rv6xxd.h"
28#include "r600_dpm.h" 29#include "r600_dpm.h"
29#include "rv6xx_dpm.h" 30#include "rv6xx_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index 3c76e1dcdf04..755a8f96fe46 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -24,6 +24,7 @@
24 24
25#include "drmP.h" 25#include "drmP.h"
26#include "radeon.h" 26#include "radeon.h"
27#include "radeon_asic.h"
27#include "rv770d.h" 28#include "rv770d.h"
28#include "r600_dpm.h" 29#include "r600_dpm.h"
29#include "rv770_dpm.h" 30#include "rv770_dpm.h"
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 9e4d5d7d348f..a53c2e79d9cb 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -23,6 +23,7 @@
23 23
24#include "drmP.h" 24#include "drmP.h"
25#include "radeon.h" 25#include "radeon.h"
26#include "radeon_asic.h"
26#include "sid.h" 27#include "sid.h"
27#include "r600_dpm.h" 28#include "r600_dpm.h"
28#include "si_dpm.h" 29#include "si_dpm.h"
@@ -2916,6 +2917,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
2916 bool disable_sclk_switching = false; 2917 bool disable_sclk_switching = false;
2917 u32 mclk, sclk; 2918 u32 mclk, sclk;
2918 u16 vddc, vddci; 2919 u16 vddc, vddci;
2920 u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
2919 int i; 2921 int i;
2920 2922
2921 if ((rdev->pm.dpm.new_active_crtc_count > 1) || 2923 if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
@@ -2949,6 +2951,29 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
2949 } 2951 }
2950 } 2952 }
2951 2953
2954 /* limit clocks to max supported clocks based on voltage dependency tables */
2955 btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
2956 &max_sclk_vddc);
2957 btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
2958 &max_mclk_vddci);
2959 btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
2960 &max_mclk_vddc);
2961
2962 for (i = 0; i < ps->performance_level_count; i++) {
2963 if (max_sclk_vddc) {
2964 if (ps->performance_levels[i].sclk > max_sclk_vddc)
2965 ps->performance_levels[i].sclk = max_sclk_vddc;
2966 }
2967 if (max_mclk_vddci) {
2968 if (ps->performance_levels[i].mclk > max_mclk_vddci)
2969 ps->performance_levels[i].mclk = max_mclk_vddci;
2970 }
2971 if (max_mclk_vddc) {
2972 if (ps->performance_levels[i].mclk > max_mclk_vddc)
2973 ps->performance_levels[i].mclk = max_mclk_vddc;
2974 }
2975 }
2976
2952 /* XXX validate the min clocks required for display */ 2977 /* XXX validate the min clocks required for display */
2953 2978
2954 if (disable_mclk_switching) { 2979 if (disable_mclk_switching) {
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index 3f0e8d7b8dbe..1f8a8833e1be 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -23,6 +23,7 @@
23 23
24#include "drmP.h" 24#include "drmP.h"
25#include "radeon.h" 25#include "radeon.h"
26#include "radeon_asic.h"
26#include "sumod.h" 27#include "sumod.h"
27#include "r600_dpm.h" 28#include "r600_dpm.h"
28#include "cypress_dpm.h" 29#include "cypress_dpm.h"
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 57f780053b3e..b4ec5c4e7969 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -23,6 +23,7 @@
23 23
24#include "drmP.h" 24#include "drmP.h"
25#include "radeon.h" 25#include "radeon.h"
26#include "radeon_asic.h"
26#include "trinityd.h" 27#include "trinityd.h"
27#include "r600_dpm.h" 28#include "r600_dpm.h"
28#include "trinity_dpm.h" 29#include "trinity_dpm.h"
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8f5cec67c47d..d395b0bef73b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -709,6 +709,7 @@ out:
709 709
710static int ttm_mem_evict_first(struct ttm_bo_device *bdev, 710static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
711 uint32_t mem_type, 711 uint32_t mem_type,
712 const struct ttm_place *place,
712 bool interruptible, 713 bool interruptible,
713 bool no_wait_gpu) 714 bool no_wait_gpu)
714{ 715{
@@ -720,8 +721,21 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
720 spin_lock(&glob->lru_lock); 721 spin_lock(&glob->lru_lock);
721 list_for_each_entry(bo, &man->lru, lru) { 722 list_for_each_entry(bo, &man->lru, lru) {
722 ret = __ttm_bo_reserve(bo, false, true, false, NULL); 723 ret = __ttm_bo_reserve(bo, false, true, false, NULL);
723 if (!ret) 724 if (!ret) {
725 if (place && (place->fpfn || place->lpfn)) {
726 /* Don't evict this BO if it's outside of the
727 * requested placement range
728 */
729 if (place->fpfn >= (bo->mem.start + bo->mem.size) ||
730 (place->lpfn && place->lpfn <= bo->mem.start)) {
731 __ttm_bo_unreserve(bo);
732 ret = -EBUSY;
733 continue;
734 }
735 }
736
724 break; 737 break;
738 }
725 } 739 }
726 740
727 if (ret) { 741 if (ret) {
@@ -782,7 +796,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
782 return ret; 796 return ret;
783 if (mem->mm_node) 797 if (mem->mm_node)
784 break; 798 break;
785 ret = ttm_mem_evict_first(bdev, mem_type, 799 ret = ttm_mem_evict_first(bdev, mem_type, place,
786 interruptible, no_wait_gpu); 800 interruptible, no_wait_gpu);
787 if (unlikely(ret != 0)) 801 if (unlikely(ret != 0))
788 return ret; 802 return ret;
@@ -994,9 +1008,9 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
994 1008
995 for (i = 0; i < placement->num_placement; i++) { 1009 for (i = 0; i < placement->num_placement; i++) {
996 const struct ttm_place *heap = &placement->placement[i]; 1010 const struct ttm_place *heap = &placement->placement[i];
997 if (mem->mm_node && heap->lpfn != 0 && 1011 if (mem->mm_node &&
998 (mem->start < heap->fpfn || 1012 (mem->start < heap->fpfn ||
999 mem->start + mem->num_pages > heap->lpfn)) 1013 (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
1000 continue; 1014 continue;
1001 1015
1002 *new_flags = heap->flags; 1016 *new_flags = heap->flags;
@@ -1007,9 +1021,9 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
1007 1021
1008 for (i = 0; i < placement->num_busy_placement; i++) { 1022 for (i = 0; i < placement->num_busy_placement; i++) {
1009 const struct ttm_place *heap = &placement->busy_placement[i]; 1023 const struct ttm_place *heap = &placement->busy_placement[i];
1010 if (mem->mm_node && heap->lpfn != 0 && 1024 if (mem->mm_node &&
1011 (mem->start < heap->fpfn || 1025 (mem->start < heap->fpfn ||
1012 mem->start + mem->num_pages > heap->lpfn)) 1026 (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
1013 continue; 1027 continue;
1014 1028
1015 *new_flags = heap->flags; 1029 *new_flags = heap->flags;
@@ -1233,7 +1247,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
1233 spin_lock(&glob->lru_lock); 1247 spin_lock(&glob->lru_lock);
1234 while (!list_empty(&man->lru)) { 1248 while (!list_empty(&man->lru)) {
1235 spin_unlock(&glob->lru_lock); 1249 spin_unlock(&glob->lru_lock);
1236 ret = ttm_mem_evict_first(bdev, mem_type, false, false); 1250 ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, false);
1237 if (ret) { 1251 if (ret) {
1238 if (allow_errors) { 1252 if (allow_errors) {
1239 return ret; 1253 return ret;
diff --git a/drivers/hwmon/menf21bmc_hwmon.c b/drivers/hwmon/menf21bmc_hwmon.c
index c92229d321c9..afc6b58eaa62 100644
--- a/drivers/hwmon/menf21bmc_hwmon.c
+++ b/drivers/hwmon/menf21bmc_hwmon.c
@@ -21,6 +21,7 @@
21#include <linux/jiffies.h> 21#include <linux/jiffies.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/i2c.h> 23#include <linux/i2c.h>
24#include <linux/err.h>
24 25
25#define DRV_NAME "menf21bmc_hwmon" 26#define DRV_NAME "menf21bmc_hwmon"
26 27
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 0bea5776bcbc..3effa931fce2 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2185,7 +2185,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2185 isert_cmd->tx_desc.num_sge = 2; 2185 isert_cmd->tx_desc.num_sge = 2;
2186 } 2186 }
2187 2187
2188 isert_init_send_wr(isert_conn, isert_cmd, send_wr, true); 2188 isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
2189 2189
2190 pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); 2190 pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
2191 2191
@@ -2871,7 +2871,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2871 &isert_cmd->tx_desc.iscsi_header); 2871 &isert_cmd->tx_desc.iscsi_header);
2872 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); 2872 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
2873 isert_init_send_wr(isert_conn, isert_cmd, 2873 isert_init_send_wr(isert_conn, isert_cmd,
2874 &isert_cmd->tx_desc.send_wr, true); 2874 &isert_cmd->tx_desc.send_wr, false);
2875 isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; 2875 isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
2876 wr->send_wr_num += 1; 2876 wr->send_wr_num += 1;
2877 } 2877 }
@@ -3140,7 +3140,7 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
3140 3140
3141accept_wait: 3141accept_wait:
3142 ret = down_interruptible(&isert_np->np_sem); 3142 ret = down_interruptible(&isert_np->np_sem);
3143 if (max_accept > 5) 3143 if (ret || max_accept > 5)
3144 return -ENODEV; 3144 return -ENODEV;
3145 3145
3146 spin_lock_bh(&np->np_thread_lock); 3146 spin_lock_bh(&np->np_thread_lock);
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index aa29198fca3e..7440c58b8e6f 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -9,26 +9,21 @@
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11 11
12#include <linux/module.h> 12#include <linux/ctype.h>
13#include <linux/kernel.h> 13#include <linux/device.h>
14#include <linux/err.h>
14#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/leds.h>
15#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/module.h>
20#include <linux/slab.h>
16#include <linux/spinlock.h> 21#include <linux/spinlock.h>
17#include <linux/device.h>
18#include <linux/timer.h> 22#include <linux/timer.h>
19#include <linux/err.h>
20#include <linux/ctype.h>
21#include <linux/leds.h>
22#include "leds.h" 23#include "leds.h"
23 24
24static struct class *leds_class; 25static struct class *leds_class;
25 26
26static void led_update_brightness(struct led_classdev *led_cdev)
27{
28 if (led_cdev->brightness_get)
29 led_cdev->brightness = led_cdev->brightness_get(led_cdev);
30}
31
32static ssize_t brightness_show(struct device *dev, 27static ssize_t brightness_show(struct device *dev,
33 struct device_attribute *attr, char *buf) 28 struct device_attribute *attr, char *buf)
34{ 29{
@@ -59,14 +54,14 @@ static ssize_t brightness_store(struct device *dev,
59} 54}
60static DEVICE_ATTR_RW(brightness); 55static DEVICE_ATTR_RW(brightness);
61 56
62static ssize_t led_max_brightness_show(struct device *dev, 57static ssize_t max_brightness_show(struct device *dev,
63 struct device_attribute *attr, char *buf) 58 struct device_attribute *attr, char *buf)
64{ 59{
65 struct led_classdev *led_cdev = dev_get_drvdata(dev); 60 struct led_classdev *led_cdev = dev_get_drvdata(dev);
66 61
67 return sprintf(buf, "%u\n", led_cdev->max_brightness); 62 return sprintf(buf, "%u\n", led_cdev->max_brightness);
68} 63}
69static DEVICE_ATTR(max_brightness, 0444, led_max_brightness_show, NULL); 64static DEVICE_ATTR_RO(max_brightness);
70 65
71#ifdef CONFIG_LEDS_TRIGGERS 66#ifdef CONFIG_LEDS_TRIGGERS
72static DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store); 67static DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 71b40d3bf776..aaa8eba9099f 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -12,10 +12,11 @@
12 */ 12 */
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/leds.h>
15#include <linux/list.h> 16#include <linux/list.h>
16#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/mutex.h>
17#include <linux/rwsem.h> 19#include <linux/rwsem.h>
18#include <linux/leds.h>
19#include "leds.h" 20#include "leds.h"
20 21
21DECLARE_RWSEM(leds_list_lock); 22DECLARE_RWSEM(leds_list_lock);
@@ -126,3 +127,19 @@ void led_set_brightness(struct led_classdev *led_cdev,
126 __led_set_brightness(led_cdev, brightness); 127 __led_set_brightness(led_cdev, brightness);
127} 128}
128EXPORT_SYMBOL(led_set_brightness); 129EXPORT_SYMBOL(led_set_brightness);
130
131int led_update_brightness(struct led_classdev *led_cdev)
132{
133 int ret = 0;
134
135 if (led_cdev->brightness_get) {
136 ret = led_cdev->brightness_get(led_cdev);
137 if (ret >= 0) {
138 led_cdev->brightness = ret;
139 return 0;
140 }
141 }
142
143 return ret;
144}
145EXPORT_SYMBOL(led_update_brightness);
diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c
index 1c4ed5510f35..75717ba68ae0 100644
--- a/drivers/leds/leds-gpio-register.c
+++ b/drivers/leds/leds-gpio-register.c
@@ -7,9 +7,9 @@
7 * Free Software Foundation. 7 * Free Software Foundation.
8 */ 8 */
9#include <linux/err.h> 9#include <linux/err.h>
10#include <linux/leds.h>
10#include <linux/platform_device.h> 11#include <linux/platform_device.h>
11#include <linux/slab.h> 12#include <linux/slab.h>
12#include <linux/leds.h>
13 13
14/** 14/**
15 * gpio_led_register_device - register a gpio-led device 15 * gpio_led_register_device - register a gpio-led device
@@ -28,6 +28,9 @@ struct platform_device *__init gpio_led_register_device(
28 struct platform_device *ret; 28 struct platform_device *ret;
29 struct gpio_led_platform_data _pdata = *pdata; 29 struct gpio_led_platform_data _pdata = *pdata;
30 30
31 if (!pdata->num_leds)
32 return ERR_PTR(-EINVAL);
33
31 _pdata.leds = kmemdup(pdata->leds, 34 _pdata.leds = kmemdup(pdata->leds,
32 pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL); 35 pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL);
33 if (!_pdata.leds) 36 if (!_pdata.leds)
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 57ff20fecf57..b4518c8751c8 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -10,17 +10,17 @@
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 * 11 *
12 */ 12 */
13#include <linux/kernel.h> 13#include <linux/err.h>
14#include <linux/platform_device.h>
15#include <linux/gpio.h> 14#include <linux/gpio.h>
15#include <linux/kernel.h>
16#include <linux/leds.h> 16#include <linux/leds.h>
17#include <linux/module.h>
17#include <linux/of.h> 18#include <linux/of.h>
18#include <linux/of_platform.h>
19#include <linux/of_gpio.h> 19#include <linux/of_gpio.h>
20#include <linux/of_platform.h>
21#include <linux/platform_device.h>
20#include <linux/slab.h> 22#include <linux/slab.h>
21#include <linux/workqueue.h> 23#include <linux/workqueue.h>
22#include <linux/module.h>
23#include <linux/err.h>
24 24
25struct gpio_led_data { 25struct gpio_led_data {
26 struct led_classdev cdev; 26 struct led_classdev cdev;
@@ -36,7 +36,7 @@ struct gpio_led_data {
36 36
37static void gpio_led_work(struct work_struct *work) 37static void gpio_led_work(struct work_struct *work)
38{ 38{
39 struct gpio_led_data *led_dat = 39 struct gpio_led_data *led_dat =
40 container_of(work, struct gpio_led_data, work); 40 container_of(work, struct gpio_led_data, work);
41 41
42 if (led_dat->blinking) { 42 if (led_dat->blinking) {
@@ -235,14 +235,12 @@ static struct gpio_leds_priv *gpio_leds_create_of(struct platform_device *pdev)
235} 235}
236#endif /* CONFIG_OF_GPIO */ 236#endif /* CONFIG_OF_GPIO */
237 237
238
239static int gpio_led_probe(struct platform_device *pdev) 238static int gpio_led_probe(struct platform_device *pdev)
240{ 239{
241 struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); 240 struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
242 struct gpio_leds_priv *priv; 241 struct gpio_leds_priv *priv;
243 int i, ret = 0; 242 int i, ret = 0;
244 243
245
246 if (pdata && pdata->num_leds) { 244 if (pdata && pdata->num_leds) {
247 priv = devm_kzalloc(&pdev->dev, 245 priv = devm_kzalloc(&pdev->dev,
248 sizeof_gpio_leds_priv(pdata->num_leds), 246 sizeof_gpio_leds_priv(pdata->num_leds),
diff --git a/drivers/leds/leds-lp3944.c b/drivers/leds/leds-lp3944.c
index 8e1abdcd4c9d..53144fb96167 100644
--- a/drivers/leds/leds-lp3944.c
+++ b/drivers/leds/leds-lp3944.c
@@ -335,7 +335,8 @@ static int lp3944_configure(struct i2c_client *client,
335 } 335 }
336 336
337 /* to expose the default value to userspace */ 337 /* to expose the default value to userspace */
338 led->ldev.brightness = led->status; 338 led->ldev.brightness =
339 (enum led_brightness) led->status;
339 340
340 /* Set the default led status */ 341 /* Set the default led status */
341 err = lp3944_led_set(led, led->status); 342 err = lp3944_led_set(led, led->status);
diff --git a/drivers/leds/trigger/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c
index 35812e3a37f2..c86c41826476 100644
--- a/drivers/leds/trigger/ledtrig-gpio.c
+++ b/drivers/leds/trigger/ledtrig-gpio.c
@@ -48,7 +48,7 @@ static void gpio_trig_work(struct work_struct *work)
48 if (!gpio_data->gpio) 48 if (!gpio_data->gpio)
49 return; 49 return;
50 50
51 tmp = gpio_get_value(gpio_data->gpio); 51 tmp = gpio_get_value_cansleep(gpio_data->gpio);
52 if (gpio_data->inverted) 52 if (gpio_data->inverted)
53 tmp = !tmp; 53 tmp = !tmp;
54 54
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 6d184dbcaca8..94ed7cefb14d 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1,3 +1,7 @@
1# Generic MAILBOX API
2
3obj-$(CONFIG_MAILBOX) += mailbox.o
4
1obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o 5obj-$(CONFIG_PL320_MBOX) += pl320-ipc.o
2 6
3obj-$(CONFIG_OMAP2PLUS_MBOX) += omap-mailbox.o 7obj-$(CONFIG_OMAP2PLUS_MBOX) += omap-mailbox.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 000000000000..afcb430508ec
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,465 @@
1/*
2 * Mailbox: Common code for Mailbox controllers and users
3 *
4 * Copyright (C) 2013-2014 Linaro Ltd.
5 * Author: Jassi Brar <jassisinghbrar@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/interrupt.h>
13#include <linux/spinlock.h>
14#include <linux/mutex.h>
15#include <linux/delay.h>
16#include <linux/slab.h>
17#include <linux/err.h>
18#include <linux/module.h>
19#include <linux/device.h>
20#include <linux/bitops.h>
21#include <linux/mailbox_client.h>
22#include <linux/mailbox_controller.h>
23
24#define TXDONE_BY_IRQ BIT(0) /* controller has remote RTR irq */
25#define TXDONE_BY_POLL BIT(1) /* controller can read status of last TX */
26#define TXDONE_BY_ACK BIT(2) /* S/W ACK recevied by Client ticks the TX */
27
28static LIST_HEAD(mbox_cons);
29static DEFINE_MUTEX(con_mutex);
30
31static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
32{
33 int idx;
34 unsigned long flags;
35
36 spin_lock_irqsave(&chan->lock, flags);
37
38 /* See if there is any space left */
39 if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
40 spin_unlock_irqrestore(&chan->lock, flags);
41 return -ENOBUFS;
42 }
43
44 idx = chan->msg_free;
45 chan->msg_data[idx] = mssg;
46 chan->msg_count++;
47
48 if (idx == MBOX_TX_QUEUE_LEN - 1)
49 chan->msg_free = 0;
50 else
51 chan->msg_free++;
52
53 spin_unlock_irqrestore(&chan->lock, flags);
54
55 return idx;
56}
57
58static void msg_submit(struct mbox_chan *chan)
59{
60 unsigned count, idx;
61 unsigned long flags;
62 void *data;
63 int err;
64
65 spin_lock_irqsave(&chan->lock, flags);
66
67 if (!chan->msg_count || chan->active_req)
68 goto exit;
69
70 count = chan->msg_count;
71 idx = chan->msg_free;
72 if (idx >= count)
73 idx -= count;
74 else
75 idx += MBOX_TX_QUEUE_LEN - count;
76
77 data = chan->msg_data[idx];
78
79 /* Try to submit a message to the MBOX controller */
80 err = chan->mbox->ops->send_data(chan, data);
81 if (!err) {
82 chan->active_req = data;
83 chan->msg_count--;
84 }
85exit:
86 spin_unlock_irqrestore(&chan->lock, flags);
87}
88
89static void tx_tick(struct mbox_chan *chan, int r)
90{
91 unsigned long flags;
92 void *mssg;
93
94 spin_lock_irqsave(&chan->lock, flags);
95 mssg = chan->active_req;
96 chan->active_req = NULL;
97 spin_unlock_irqrestore(&chan->lock, flags);
98
99 /* Submit next message */
100 msg_submit(chan);
101
102 /* Notify the client */
103 if (mssg && chan->cl->tx_done)
104 chan->cl->tx_done(chan->cl, mssg, r);
105
106 if (chan->cl->tx_block)
107 complete(&chan->tx_complete);
108}
109
110static void poll_txdone(unsigned long data)
111{
112 struct mbox_controller *mbox = (struct mbox_controller *)data;
113 bool txdone, resched = false;
114 int i;
115
116 for (i = 0; i < mbox->num_chans; i++) {
117 struct mbox_chan *chan = &mbox->chans[i];
118
119 if (chan->active_req && chan->cl) {
120 resched = true;
121 txdone = chan->mbox->ops->last_tx_done(chan);
122 if (txdone)
123 tx_tick(chan, 0);
124 }
125 }
126
127 if (resched)
128 mod_timer(&mbox->poll, jiffies +
129 msecs_to_jiffies(mbox->txpoll_period));
130}
131
132/**
133 * mbox_chan_received_data - A way for controller driver to push data
134 * received from remote to the upper layer.
135 * @chan: Pointer to the mailbox channel on which RX happened.
136 * @mssg: Client specific message typecasted as void *
137 *
138 * After startup and before shutdown any data received on the chan
139 * is passed on to the API via atomic mbox_chan_received_data().
140 * The controller should ACK the RX only after this call returns.
141 */
142void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
143{
144 /* No buffering the received data */
145 if (chan->cl->rx_callback)
146 chan->cl->rx_callback(chan->cl, mssg);
147}
148EXPORT_SYMBOL_GPL(mbox_chan_received_data);
149
150/**
151 * mbox_chan_txdone - A way for controller driver to notify the
152 * framework that the last TX has completed.
153 * @chan: Pointer to the mailbox chan on which TX happened.
154 * @r: Status of last TX - OK or ERROR
155 *
156 * The controller that has IRQ for TX ACK calls this atomic API
157 * to tick the TX state machine. It works only if txdone_irq
158 * is set by the controller.
159 */
160void mbox_chan_txdone(struct mbox_chan *chan, int r)
161{
162 if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
163 dev_err(chan->mbox->dev,
164 "Controller can't run the TX ticker\n");
165 return;
166 }
167
168 tx_tick(chan, r);
169}
170EXPORT_SYMBOL_GPL(mbox_chan_txdone);
171
172/**
173 * mbox_client_txdone - The way for a client to run the TX state machine.
174 * @chan: Mailbox channel assigned to this client.
175 * @r: Success status of last transmission.
176 *
177 * The client/protocol had received some 'ACK' packet and it notifies
178 * the API that the last packet was sent successfully. This only works
179 * if the controller can't sense TX-Done.
180 */
181void mbox_client_txdone(struct mbox_chan *chan, int r)
182{
183 if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
184 dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
185 return;
186 }
187
188 tx_tick(chan, r);
189}
190EXPORT_SYMBOL_GPL(mbox_client_txdone);
191
192/**
193 * mbox_client_peek_data - A way for client driver to pull data
194 * received from remote by the controller.
195 * @chan: Mailbox channel assigned to this client.
196 *
197 * A poke to controller driver for any received data.
198 * The data is actually passed onto client via the
199 * mbox_chan_received_data()
200 * The call can be made from atomic context, so the controller's
201 * implementation of peek_data() must not sleep.
202 *
203 * Return: True, if controller has, and is going to push after this,
204 * some data.
205 * False, if controller doesn't have any data to be read.
206 */
207bool mbox_client_peek_data(struct mbox_chan *chan)
208{
209 if (chan->mbox->ops->peek_data)
210 return chan->mbox->ops->peek_data(chan);
211
212 return false;
213}
214EXPORT_SYMBOL_GPL(mbox_client_peek_data);
215
216/**
217 * mbox_send_message - For client to submit a message to be
218 * sent to the remote.
219 * @chan: Mailbox channel assigned to this client.
220 * @mssg: Client specific message typecasted.
221 *
222 * For client to submit data to the controller destined for a remote
223 * processor. If the client had set 'tx_block', the call will return
224 * either when the remote receives the data or when 'tx_tout' millisecs
225 * run out.
226 * In non-blocking mode, the requests are buffered by the API and a
227 * non-negative token is returned for each queued request. If the request
228 * is not queued, a negative token is returned. Upon failure or successful
229 * TX, the API calls 'tx_done' from atomic context, from which the client
230 * could submit yet another request.
231 * The pointer to message should be preserved until it is sent
232 * over the chan, i.e, tx_done() is made.
233 * This function could be called from atomic context as it simply
234 * queues the data and returns a token against the request.
235 *
236 * Return: Non-negative integer for successful submission (non-blocking mode)
237 * or transmission over chan (blocking mode).
238 * Negative value denotes failure.
239 */
240int mbox_send_message(struct mbox_chan *chan, void *mssg)
241{
242 int t;
243
244 if (!chan || !chan->cl)
245 return -EINVAL;
246
247 t = add_to_rbuf(chan, mssg);
248 if (t < 0) {
249 dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
250 return t;
251 }
252
253 msg_submit(chan);
254
255 if (chan->txdone_method == TXDONE_BY_POLL)
256 poll_txdone((unsigned long)chan->mbox);
257
258 if (chan->cl->tx_block && chan->active_req) {
259 unsigned long wait;
260 int ret;
261
262 if (!chan->cl->tx_tout) /* wait forever */
263 wait = msecs_to_jiffies(3600000);
264 else
265 wait = msecs_to_jiffies(chan->cl->tx_tout);
266
267 ret = wait_for_completion_timeout(&chan->tx_complete, wait);
268 if (ret == 0) {
269 t = -EIO;
270 tx_tick(chan, -EIO);
271 }
272 }
273
274 return t;
275}
276EXPORT_SYMBOL_GPL(mbox_send_message);
277
278/**
279 * mbox_request_channel - Request a mailbox channel.
280 * @cl: Identity of the client requesting the channel.
281 * @index: Index of mailbox specifier in 'mboxes' property.
282 *
283 * The Client specifies its requirements and capabilities while asking for
284 * a mailbox channel. It can't be called from atomic context.
285 * The channel is exclusively allocated and can't be used by another
286 * client before the owner calls mbox_free_channel.
287 * After assignment, any packet received on this channel will be
288 * handed over to the client via the 'rx_callback'.
289 * The framework holds reference to the client, so the mbox_client
290 * structure shouldn't be modified until the mbox_free_channel returns.
291 *
292 * Return: Pointer to the channel assigned to the client if successful.
293 * ERR_PTR for request failure.
294 */
295struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
296{
297 struct device *dev = cl->dev;
298 struct mbox_controller *mbox;
299 struct of_phandle_args spec;
300 struct mbox_chan *chan;
301 unsigned long flags;
302 int ret;
303
304 if (!dev || !dev->of_node) {
305 pr_debug("%s: No owner device node\n", __func__);
306 return ERR_PTR(-ENODEV);
307 }
308
309 mutex_lock(&con_mutex);
310
311 if (of_parse_phandle_with_args(dev->of_node, "mboxes",
312 "#mbox-cells", index, &spec)) {
313 dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
314 mutex_unlock(&con_mutex);
315 return ERR_PTR(-ENODEV);
316 }
317
318 chan = NULL;
319 list_for_each_entry(mbox, &mbox_cons, node)
320 if (mbox->dev->of_node == spec.np) {
321 chan = mbox->of_xlate(mbox, &spec);
322 break;
323 }
324
325 of_node_put(spec.np);
326
327 if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
328 dev_dbg(dev, "%s: mailbox not free\n", __func__);
329 mutex_unlock(&con_mutex);
330 return ERR_PTR(-EBUSY);
331 }
332
333 spin_lock_irqsave(&chan->lock, flags);
334 chan->msg_free = 0;
335 chan->msg_count = 0;
336 chan->active_req = NULL;
337 chan->cl = cl;
338 init_completion(&chan->tx_complete);
339
340 if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
341 chan->txdone_method |= TXDONE_BY_ACK;
342
343 spin_unlock_irqrestore(&chan->lock, flags);
344
345 ret = chan->mbox->ops->startup(chan);
346 if (ret) {
347 dev_err(dev, "Unable to startup the chan (%d)\n", ret);
348 mbox_free_channel(chan);
349 chan = ERR_PTR(ret);
350 }
351
352 mutex_unlock(&con_mutex);
353 return chan;
354}
355EXPORT_SYMBOL_GPL(mbox_request_channel);
356
357/**
358 * mbox_free_channel - The client relinquishes control of a mailbox
359 * channel by this call.
360 * @chan: The mailbox channel to be freed.
361 */
362void mbox_free_channel(struct mbox_chan *chan)
363{
364 unsigned long flags;
365
366 if (!chan || !chan->cl)
367 return;
368
369 chan->mbox->ops->shutdown(chan);
370
371 /* The queued TX requests are simply aborted, no callbacks are made */
372 spin_lock_irqsave(&chan->lock, flags);
373 chan->cl = NULL;
374 chan->active_req = NULL;
375 if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
376 chan->txdone_method = TXDONE_BY_POLL;
377
378 module_put(chan->mbox->dev->driver->owner);
379 spin_unlock_irqrestore(&chan->lock, flags);
380}
381EXPORT_SYMBOL_GPL(mbox_free_channel);
382
383static struct mbox_chan *
384of_mbox_index_xlate(struct mbox_controller *mbox,
385 const struct of_phandle_args *sp)
386{
387 int ind = sp->args[0];
388
389 if (ind >= mbox->num_chans)
390 return NULL;
391
392 return &mbox->chans[ind];
393}
394
395/**
396 * mbox_controller_register - Register the mailbox controller
397 * @mbox: Pointer to the mailbox controller.
398 *
399 * The controller driver registers its communication channels
400 */
401int mbox_controller_register(struct mbox_controller *mbox)
402{
403 int i, txdone;
404
405 /* Sanity check */
406 if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
407 return -EINVAL;
408
409 if (mbox->txdone_irq)
410 txdone = TXDONE_BY_IRQ;
411 else if (mbox->txdone_poll)
412 txdone = TXDONE_BY_POLL;
413 else /* It has to be ACK then */
414 txdone = TXDONE_BY_ACK;
415
416 if (txdone == TXDONE_BY_POLL) {
417 mbox->poll.function = &poll_txdone;
418 mbox->poll.data = (unsigned long)mbox;
419 init_timer(&mbox->poll);
420 }
421
422 for (i = 0; i < mbox->num_chans; i++) {
423 struct mbox_chan *chan = &mbox->chans[i];
424
425 chan->cl = NULL;
426 chan->mbox = mbox;
427 chan->txdone_method = txdone;
428 spin_lock_init(&chan->lock);
429 }
430
431 if (!mbox->of_xlate)
432 mbox->of_xlate = of_mbox_index_xlate;
433
434 mutex_lock(&con_mutex);
435 list_add_tail(&mbox->node, &mbox_cons);
436 mutex_unlock(&con_mutex);
437
438 return 0;
439}
440EXPORT_SYMBOL_GPL(mbox_controller_register);
441
442/**
443 * mbox_controller_unregister - Unregister the mailbox controller
444 * @mbox: Pointer to the mailbox controller.
445 */
446void mbox_controller_unregister(struct mbox_controller *mbox)
447{
448 int i;
449
450 if (!mbox)
451 return;
452
453 mutex_lock(&con_mutex);
454
455 list_del(&mbox->node);
456
457 for (i = 0; i < mbox->num_chans; i++)
458 mbox_free_channel(&mbox->chans[i]);
459
460 if (mbox->txdone_poll)
461 del_timer_sync(&mbox->poll);
462
463 mutex_unlock(&con_mutex);
464}
465EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cbae2fbb..f3755e0aa935 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
26#include <linux/device.h> 26#include <linux/device.h>
27#include <linux/amba/bus.h> 27#include <linux/amba/bus.h>
28 28
29#include <linux/mailbox.h> 29#include <linux/pl320-ipc.h>
30 30
31#define IPCMxSOURCE(m) ((m) * 0x40) 31#define IPCMxSOURCE(m) ((m) * 0x40)
32#define IPCMxDSET(m) (((m) * 0x40) + 0x004) 32#define IPCMxDSET(m) (((m) * 0x40) + 0x004)
diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
index a9f9c46e5022..63fc63911295 100644
--- a/drivers/pci/pcie/pme.c
+++ b/drivers/pci/pcie/pme.c
@@ -397,6 +397,7 @@ static int pcie_pme_suspend(struct pcie_device *srv)
397 struct pcie_pme_service_data *data = get_service_data(srv); 397 struct pcie_pme_service_data *data = get_service_data(srv);
398 struct pci_dev *port = srv->port; 398 struct pci_dev *port = srv->port;
399 bool wakeup; 399 bool wakeup;
400 int ret;
400 401
401 if (device_may_wakeup(&port->dev)) { 402 if (device_may_wakeup(&port->dev)) {
402 wakeup = true; 403 wakeup = true;
@@ -407,9 +408,10 @@ static int pcie_pme_suspend(struct pcie_device *srv)
407 } 408 }
408 spin_lock_irq(&data->lock); 409 spin_lock_irq(&data->lock);
409 if (wakeup) { 410 if (wakeup) {
410 enable_irq_wake(srv->irq); 411 ret = enable_irq_wake(srv->irq);
411 data->suspend_level = PME_SUSPEND_WAKEUP; 412 data->suspend_level = PME_SUSPEND_WAKEUP;
412 } else { 413 }
414 if (!wakeup || ret) {
413 struct pci_dev *port = srv->port; 415 struct pci_dev *port = srv->port;
414 416
415 pcie_pme_interrupt_enable(port, false); 417 pcie_pme_interrupt_enable(port, false);
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index b800783800a3..ef2dd2e4754b 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -83,6 +83,7 @@ config PWM_BFIN
83config PWM_CLPS711X 83config PWM_CLPS711X
84 tristate "CLPS711X PWM support" 84 tristate "CLPS711X PWM support"
85 depends on ARCH_CLPS711X || COMPILE_TEST 85 depends on ARCH_CLPS711X || COMPILE_TEST
86 depends on HAS_IOMEM
86 help 87 help
87 Generic PWM framework driver for Cirrus Logic CLPS711X. 88 Generic PWM framework driver for Cirrus Logic CLPS711X.
88 89
@@ -101,6 +102,7 @@ config PWM_EP93XX
101config PWM_FSL_FTM 102config PWM_FSL_FTM
102 tristate "Freescale FlexTimer Module (FTM) PWM support" 103 tristate "Freescale FlexTimer Module (FTM) PWM support"
103 depends on OF 104 depends on OF
105 select REGMAP_MMIO
104 help 106 help
105 Generic FTM PWM framework driver for Freescale VF610 and 107 Generic FTM PWM framework driver for Freescale VF610 and
106 Layerscape LS-1 SoCs. 108 Layerscape LS-1 SoCs.
@@ -149,7 +151,7 @@ config PWM_LPC32XX
149 151
150config PWM_LPSS 152config PWM_LPSS
151 tristate "Intel LPSS PWM support" 153 tristate "Intel LPSS PWM support"
152 depends on ACPI 154 depends on X86
153 help 155 help
154 Generic PWM framework driver for Intel Low Power Subsystem PWM 156 Generic PWM framework driver for Intel Low Power Subsystem PWM
155 controller. 157 controller.
@@ -157,6 +159,24 @@ config PWM_LPSS
157 To compile this driver as a module, choose M here: the module 159 To compile this driver as a module, choose M here: the module
158 will be called pwm-lpss. 160 will be called pwm-lpss.
159 161
162config PWM_LPSS_PCI
163 tristate "Intel LPSS PWM PCI driver"
164 depends on PWM_LPSS && PCI
165 help
166 The PCI driver for Intel Low Power Subsystem PWM controller.
167
168 To compile this driver as a module, choose M here: the module
169 will be called pwm-lpss-pci.
170
171config PWM_LPSS_PLATFORM
172 tristate "Intel LPSS PWM platform driver"
173 depends on PWM_LPSS && ACPI
174 help
175 The platform driver for Intel Low Power Subsystem PWM controller.
176
177 To compile this driver as a module, choose M here: the module
178 will be called pwm-lpss-platform.
179
160config PWM_MXS 180config PWM_MXS
161 tristate "Freescale MXS PWM support" 181 tristate "Freescale MXS PWM support"
162 depends on ARCH_MXS && OF 182 depends on ARCH_MXS && OF
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index f8c577d41091..c458606c3755 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -13,6 +13,8 @@ obj-$(CONFIG_PWM_JZ4740) += pwm-jz4740.o
13obj-$(CONFIG_PWM_LP3943) += pwm-lp3943.o 13obj-$(CONFIG_PWM_LP3943) += pwm-lp3943.o
14obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o 14obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o
15obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o 15obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o
16obj-$(CONFIG_PWM_LPSS_PCI) += pwm-lpss-pci.o
17obj-$(CONFIG_PWM_LPSS_PLATFORM) += pwm-lpss-platform.o
16obj-$(CONFIG_PWM_MXS) += pwm-mxs.o 18obj-$(CONFIG_PWM_MXS) += pwm-mxs.o
17obj-$(CONFIG_PWM_PCA9685) += pwm-pca9685.o 19obj-$(CONFIG_PWM_PCA9685) += pwm-pca9685.o
18obj-$(CONFIG_PWM_PUV3) += pwm-puv3.o 20obj-$(CONFIG_PWM_PUV3) += pwm-puv3.o
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index d2c35920ff08..966497d10c6e 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -236,7 +236,7 @@ int pwmchip_add(struct pwm_chip *chip)
236 int ret; 236 int ret;
237 237
238 if (!chip || !chip->dev || !chip->ops || !chip->ops->config || 238 if (!chip || !chip->dev || !chip->ops || !chip->ops->config ||
239 !chip->ops->enable || !chip->ops->disable) 239 !chip->ops->enable || !chip->ops->disable || !chip->npwm)
240 return -EINVAL; 240 return -EINVAL;
241 241
242 mutex_lock(&pwm_lock); 242 mutex_lock(&pwm_lock);
@@ -602,12 +602,9 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
602 struct pwm_device *pwm = ERR_PTR(-EPROBE_DEFER); 602 struct pwm_device *pwm = ERR_PTR(-EPROBE_DEFER);
603 const char *dev_id = dev ? dev_name(dev) : NULL; 603 const char *dev_id = dev ? dev_name(dev) : NULL;
604 struct pwm_chip *chip = NULL; 604 struct pwm_chip *chip = NULL;
605 unsigned int index = 0;
606 unsigned int best = 0; 605 unsigned int best = 0;
607 struct pwm_lookup *p; 606 struct pwm_lookup *p, *chosen = NULL;
608 unsigned int match; 607 unsigned int match;
609 unsigned int period;
610 enum pwm_polarity polarity;
611 608
612 /* look up via DT first */ 609 /* look up via DT first */
613 if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) 610 if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node)
@@ -653,10 +650,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
653 } 650 }
654 651
655 if (match > best) { 652 if (match > best) {
656 chip = pwmchip_find_by_name(p->provider); 653 chosen = p;
657 index = p->index;
658 period = p->period;
659 polarity = p->polarity;
660 654
661 if (match != 3) 655 if (match != 3)
662 best = match; 656 best = match;
@@ -665,17 +659,22 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
665 } 659 }
666 } 660 }
667 661
668 mutex_unlock(&pwm_lookup_lock); 662 if (!chosen)
663 goto out;
669 664
670 if (chip) 665 chip = pwmchip_find_by_name(chosen->provider);
671 pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id); 666 if (!chip)
672 if (IS_ERR(pwm)) 667 goto out;
673 return pwm;
674 668
675 pwm_set_period(pwm, period); 669 pwm = pwm_request_from_chip(chip, chosen->index, con_id ?: dev_id);
676 pwm_set_polarity(pwm, polarity); 670 if (IS_ERR(pwm))
671 goto out;
677 672
673 pwm_set_period(pwm, chosen->period);
674 pwm_set_polarity(pwm, chosen->polarity);
678 675
676out:
677 mutex_unlock(&pwm_lookup_lock);
679 return pwm; 678 return pwm;
680} 679}
681EXPORT_SYMBOL_GPL(pwm_get); 680EXPORT_SYMBOL_GPL(pwm_get);
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index 6e700a541ca3..d3c22de9ee47 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -102,7 +102,7 @@ static int atmel_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
102 int duty_ns, int period_ns) 102 int duty_ns, int period_ns)
103{ 103{
104 struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip); 104 struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
105 unsigned long clk_rate, prd, dty; 105 unsigned long prd, dty;
106 unsigned long long div; 106 unsigned long long div;
107 unsigned int pres = 0; 107 unsigned int pres = 0;
108 u32 val; 108 u32 val;
@@ -113,20 +113,18 @@ static int atmel_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
113 return -EBUSY; 113 return -EBUSY;
114 } 114 }
115 115
116 clk_rate = clk_get_rate(atmel_pwm->clk); 116 /* Calculate the period cycles and prescale value */
117 div = clk_rate; 117 div = (unsigned long long)clk_get_rate(atmel_pwm->clk) * period_ns;
118 do_div(div, NSEC_PER_SEC);
118 119
119 /* Calculate the period cycles */
120 while (div > PWM_MAX_PRD) { 120 while (div > PWM_MAX_PRD) {
121 div = clk_rate / (1 << pres); 121 div >>= 1;
122 div = div * period_ns; 122 pres++;
123 /* 1/Hz = 100000000 ns */ 123 }
124 do_div(div, 1000000000); 124
125 125 if (pres > PRD_MAX_PRES) {
126 if (pres++ > PRD_MAX_PRES) { 126 dev_err(chip->dev, "pres exceeds the maximum value\n");
127 dev_err(chip->dev, "pres exceeds the maximum value\n"); 127 return -EINVAL;
128 return -EINVAL;
129 }
130 } 128 }
131 129
132 /* Calculate the duty cycles */ 130 /* Calculate the duty cycles */
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index a18bc8fea385..0f2cc7ef7784 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -18,14 +18,14 @@
18#include <linux/of_address.h> 18#include <linux/of_address.h>
19#include <linux/platform_device.h> 19#include <linux/platform_device.h>
20#include <linux/pwm.h> 20#include <linux/pwm.h>
21#include <linux/regmap.h>
21#include <linux/slab.h> 22#include <linux/slab.h>
22 23
23#define FTM_SC 0x00 24#define FTM_SC 0x00
24#define FTM_SC_CLK_MASK 0x3 25#define FTM_SC_CLK_MASK_SHIFT 3
25#define FTM_SC_CLK_SHIFT 3 26#define FTM_SC_CLK_MASK (3 << FTM_SC_CLK_MASK_SHIFT)
26#define FTM_SC_CLK(c) (((c) + 1) << FTM_SC_CLK_SHIFT) 27#define FTM_SC_CLK(c) (((c) + 1) << FTM_SC_CLK_MASK_SHIFT)
27#define FTM_SC_PS_MASK 0x7 28#define FTM_SC_PS_MASK 0x7
28#define FTM_SC_PS_SHIFT 0
29 29
30#define FTM_CNT 0x04 30#define FTM_CNT 0x04
31#define FTM_MOD 0x08 31#define FTM_MOD 0x08
@@ -83,7 +83,7 @@ struct fsl_pwm_chip {
83 unsigned int cnt_select; 83 unsigned int cnt_select;
84 unsigned int clk_ps; 84 unsigned int clk_ps;
85 85
86 void __iomem *base; 86 struct regmap *regmap;
87 87
88 int period_ns; 88 int period_ns;
89 89
@@ -219,10 +219,11 @@ static unsigned long fsl_pwm_calculate_duty(struct fsl_pwm_chip *fpc,
219 unsigned long period_ns, 219 unsigned long period_ns,
220 unsigned long duty_ns) 220 unsigned long duty_ns)
221{ 221{
222 unsigned long long val, duty; 222 unsigned long long duty;
223 u32 val;
223 224
224 val = readl(fpc->base + FTM_MOD); 225 regmap_read(fpc->regmap, FTM_MOD, &val);
225 duty = duty_ns * (val + 1); 226 duty = (unsigned long long)duty_ns * (val + 1);
226 do_div(duty, period_ns); 227 do_div(duty, period_ns);
227 228
228 return (unsigned long)duty; 229 return (unsigned long)duty;
@@ -232,7 +233,7 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
232 int duty_ns, int period_ns) 233 int duty_ns, int period_ns)
233{ 234{
234 struct fsl_pwm_chip *fpc = to_fsl_chip(chip); 235 struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
235 u32 val, period, duty; 236 u32 period, duty;
236 237
237 mutex_lock(&fpc->lock); 238 mutex_lock(&fpc->lock);
238 239
@@ -257,11 +258,9 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
257 return -EINVAL; 258 return -EINVAL;
258 } 259 }
259 260
260 val = readl(fpc->base + FTM_SC); 261 regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_PS_MASK,
261 val &= ~(FTM_SC_PS_MASK << FTM_SC_PS_SHIFT); 262 fpc->clk_ps);
262 val |= fpc->clk_ps; 263 regmap_write(fpc->regmap, FTM_MOD, period - 1);
263 writel(val, fpc->base + FTM_SC);
264 writel(period - 1, fpc->base + FTM_MOD);
265 264
266 fpc->period_ns = period_ns; 265 fpc->period_ns = period_ns;
267 } 266 }
@@ -270,8 +269,9 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
270 269
271 duty = fsl_pwm_calculate_duty(fpc, period_ns, duty_ns); 270 duty = fsl_pwm_calculate_duty(fpc, period_ns, duty_ns);
272 271
273 writel(FTM_CSC_MSB | FTM_CSC_ELSB, fpc->base + FTM_CSC(pwm->hwpwm)); 272 regmap_write(fpc->regmap, FTM_CSC(pwm->hwpwm),
274 writel(duty, fpc->base + FTM_CV(pwm->hwpwm)); 273 FTM_CSC_MSB | FTM_CSC_ELSB);
274 regmap_write(fpc->regmap, FTM_CV(pwm->hwpwm), duty);
275 275
276 return 0; 276 return 0;
277} 277}
@@ -283,31 +283,28 @@ static int fsl_pwm_set_polarity(struct pwm_chip *chip,
283 struct fsl_pwm_chip *fpc = to_fsl_chip(chip); 283 struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
284 u32 val; 284 u32 val;
285 285
286 val = readl(fpc->base + FTM_POL); 286 regmap_read(fpc->regmap, FTM_POL, &val);
287 287
288 if (polarity == PWM_POLARITY_INVERSED) 288 if (polarity == PWM_POLARITY_INVERSED)
289 val |= BIT(pwm->hwpwm); 289 val |= BIT(pwm->hwpwm);
290 else 290 else
291 val &= ~BIT(pwm->hwpwm); 291 val &= ~BIT(pwm->hwpwm);
292 292
293 writel(val, fpc->base + FTM_POL); 293 regmap_write(fpc->regmap, FTM_POL, val);
294 294
295 return 0; 295 return 0;
296} 296}
297 297
298static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc) 298static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
299{ 299{
300 u32 val;
301 int ret; 300 int ret;
302 301
303 if (fpc->use_count != 0) 302 if (fpc->use_count != 0)
304 return 0; 303 return 0;
305 304
306 /* select counter clock source */ 305 /* select counter clock source */
307 val = readl(fpc->base + FTM_SC); 306 regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK,
308 val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT); 307 FTM_SC_CLK(fpc->cnt_select));
309 val |= FTM_SC_CLK(fpc->cnt_select);
310 writel(val, fpc->base + FTM_SC);
311 308
312 ret = clk_prepare_enable(fpc->clk[fpc->cnt_select]); 309 ret = clk_prepare_enable(fpc->clk[fpc->cnt_select]);
313 if (ret) 310 if (ret)
@@ -327,13 +324,10 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
327static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) 324static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
328{ 325{
329 struct fsl_pwm_chip *fpc = to_fsl_chip(chip); 326 struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
330 u32 val;
331 int ret; 327 int ret;
332 328
333 mutex_lock(&fpc->lock); 329 mutex_lock(&fpc->lock);
334 val = readl(fpc->base + FTM_OUTMASK); 330 regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm), 0);
335 val &= ~BIT(pwm->hwpwm);
336 writel(val, fpc->base + FTM_OUTMASK);
337 331
338 ret = fsl_counter_clock_enable(fpc); 332 ret = fsl_counter_clock_enable(fpc);
339 mutex_unlock(&fpc->lock); 333 mutex_unlock(&fpc->lock);
@@ -343,8 +337,6 @@ static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
343 337
344static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc) 338static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc)
345{ 339{
346 u32 val;
347
348 /* 340 /*
349 * already disabled, do nothing 341 * already disabled, do nothing
350 */ 342 */
@@ -356,9 +348,7 @@ static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc)
356 return; 348 return;
357 349
358 /* no users left, disable PWM counter clock */ 350 /* no users left, disable PWM counter clock */
359 val = readl(fpc->base + FTM_SC); 351 regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK, 0);
360 val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT);
361 writel(val, fpc->base + FTM_SC);
362 352
363 clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]); 353 clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
364 clk_disable_unprepare(fpc->clk[fpc->cnt_select]); 354 clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
@@ -370,14 +360,12 @@ static void fsl_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
370 u32 val; 360 u32 val;
371 361
372 mutex_lock(&fpc->lock); 362 mutex_lock(&fpc->lock);
373 val = readl(fpc->base + FTM_OUTMASK); 363 regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm),
374 val |= BIT(pwm->hwpwm); 364 BIT(pwm->hwpwm));
375 writel(val, fpc->base + FTM_OUTMASK);
376 365
377 fsl_counter_clock_disable(fpc); 366 fsl_counter_clock_disable(fpc);
378 367
379 val = readl(fpc->base + FTM_OUTMASK); 368 regmap_read(fpc->regmap, FTM_OUTMASK, &val);
380
381 if ((val & 0xFF) == 0xFF) 369 if ((val & 0xFF) == 0xFF)
382 fpc->period_ns = 0; 370 fpc->period_ns = 0;
383 371
@@ -402,19 +390,28 @@ static int fsl_pwm_init(struct fsl_pwm_chip *fpc)
402 if (ret) 390 if (ret)
403 return ret; 391 return ret;
404 392
405 writel(0x00, fpc->base + FTM_CNTIN); 393 regmap_write(fpc->regmap, FTM_CNTIN, 0x00);
406 writel(0x00, fpc->base + FTM_OUTINIT); 394 regmap_write(fpc->regmap, FTM_OUTINIT, 0x00);
407 writel(0xFF, fpc->base + FTM_OUTMASK); 395 regmap_write(fpc->regmap, FTM_OUTMASK, 0xFF);
408 396
409 clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]); 397 clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]);
410 398
411 return 0; 399 return 0;
412} 400}
413 401
402static const struct regmap_config fsl_pwm_regmap_config = {
403 .reg_bits = 32,
404 .reg_stride = 4,
405 .val_bits = 32,
406
407 .max_register = FTM_PWMLOAD,
408};
409
414static int fsl_pwm_probe(struct platform_device *pdev) 410static int fsl_pwm_probe(struct platform_device *pdev)
415{ 411{
416 struct fsl_pwm_chip *fpc; 412 struct fsl_pwm_chip *fpc;
417 struct resource *res; 413 struct resource *res;
414 void __iomem *base;
418 int ret; 415 int ret;
419 416
420 fpc = devm_kzalloc(&pdev->dev, sizeof(*fpc), GFP_KERNEL); 417 fpc = devm_kzalloc(&pdev->dev, sizeof(*fpc), GFP_KERNEL);
@@ -426,9 +423,16 @@ static int fsl_pwm_probe(struct platform_device *pdev)
426 fpc->chip.dev = &pdev->dev; 423 fpc->chip.dev = &pdev->dev;
427 424
428 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 425 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
429 fpc->base = devm_ioremap_resource(&pdev->dev, res); 426 base = devm_ioremap_resource(&pdev->dev, res);
430 if (IS_ERR(fpc->base)) 427 if (IS_ERR(base))
431 return PTR_ERR(fpc->base); 428 return PTR_ERR(base);
429
430 fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
431 &fsl_pwm_regmap_config);
432 if (IS_ERR(fpc->regmap)) {
433 dev_err(&pdev->dev, "regmap init failed\n");
434 return PTR_ERR(fpc->regmap);
435 }
432 436
433 fpc->clk[FSL_PWM_CLK_SYS] = devm_clk_get(&pdev->dev, "ftm_sys"); 437 fpc->clk[FSL_PWM_CLK_SYS] = devm_clk_get(&pdev->dev, "ftm_sys");
434 if (IS_ERR(fpc->clk[FSL_PWM_CLK_SYS])) { 438 if (IS_ERR(fpc->clk[FSL_PWM_CLK_SYS])) {
diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 5449d9150d40..f8b5f109c1ab 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/clk.h> 16#include <linux/clk.h>
17#include <linux/delay.h>
17#include <linux/io.h> 18#include <linux/io.h>
18#include <linux/pwm.h> 19#include <linux/pwm.h>
19#include <linux/of.h> 20#include <linux/of.h>
@@ -21,24 +22,30 @@
21 22
22/* i.MX1 and i.MX21 share the same PWM function block: */ 23/* i.MX1 and i.MX21 share the same PWM function block: */
23 24
24#define MX1_PWMC 0x00 /* PWM Control Register */ 25#define MX1_PWMC 0x00 /* PWM Control Register */
25#define MX1_PWMS 0x04 /* PWM Sample Register */ 26#define MX1_PWMS 0x04 /* PWM Sample Register */
26#define MX1_PWMP 0x08 /* PWM Period Register */ 27#define MX1_PWMP 0x08 /* PWM Period Register */
27 28
28#define MX1_PWMC_EN (1 << 4) 29#define MX1_PWMC_EN (1 << 4)
29 30
30/* i.MX27, i.MX31, i.MX35 share the same PWM function block: */ 31/* i.MX27, i.MX31, i.MX35 share the same PWM function block: */
31 32
32#define MX3_PWMCR 0x00 /* PWM Control Register */ 33#define MX3_PWMCR 0x00 /* PWM Control Register */
33#define MX3_PWMSAR 0x0C /* PWM Sample Register */ 34#define MX3_PWMSR 0x04 /* PWM Status Register */
34#define MX3_PWMPR 0x10 /* PWM Period Register */ 35#define MX3_PWMSAR 0x0C /* PWM Sample Register */
35#define MX3_PWMCR_PRESCALER(x) (((x - 1) & 0xFFF) << 4) 36#define MX3_PWMPR 0x10 /* PWM Period Register */
36#define MX3_PWMCR_DOZEEN (1 << 24) 37#define MX3_PWMCR_PRESCALER(x) ((((x) - 1) & 0xFFF) << 4)
37#define MX3_PWMCR_WAITEN (1 << 23) 38#define MX3_PWMCR_DOZEEN (1 << 24)
39#define MX3_PWMCR_WAITEN (1 << 23)
38#define MX3_PWMCR_DBGEN (1 << 22) 40#define MX3_PWMCR_DBGEN (1 << 22)
39#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16) 41#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16)
40#define MX3_PWMCR_CLKSRC_IPG (1 << 16) 42#define MX3_PWMCR_CLKSRC_IPG (1 << 16)
41#define MX3_PWMCR_EN (1 << 0) 43#define MX3_PWMCR_SWR (1 << 3)
44#define MX3_PWMCR_EN (1 << 0)
45#define MX3_PWMSR_FIFOAV_4WORDS 0x4
46#define MX3_PWMSR_FIFOAV_MASK 0x7
47
48#define MX3_PWM_SWR_LOOP 5
42 49
43struct imx_chip { 50struct imx_chip {
44 struct clk *clk_per; 51 struct clk *clk_per;
@@ -103,9 +110,43 @@ static int imx_pwm_config_v2(struct pwm_chip *chip,
103 struct pwm_device *pwm, int duty_ns, int period_ns) 110 struct pwm_device *pwm, int duty_ns, int period_ns)
104{ 111{
105 struct imx_chip *imx = to_imx_chip(chip); 112 struct imx_chip *imx = to_imx_chip(chip);
113 struct device *dev = chip->dev;
106 unsigned long long c; 114 unsigned long long c;
107 unsigned long period_cycles, duty_cycles, prescale; 115 unsigned long period_cycles, duty_cycles, prescale;
108 u32 cr; 116 unsigned int period_ms;
117 bool enable = test_bit(PWMF_ENABLED, &pwm->flags);
118 int wait_count = 0, fifoav;
119 u32 cr, sr;
120
121 /*
122 * i.MX PWMv2 has a 4-word sample FIFO.
123 * In order to avoid FIFO overflow issue, we do software reset
124 * to clear all sample FIFO if the controller is disabled or
125 * wait for a full PWM cycle to get a relinquished FIFO slot
126 * when the controller is enabled and the FIFO is fully loaded.
127 */
128 if (enable) {
129 sr = readl(imx->mmio_base + MX3_PWMSR);
130 fifoav = sr & MX3_PWMSR_FIFOAV_MASK;
131 if (fifoav == MX3_PWMSR_FIFOAV_4WORDS) {
132 period_ms = DIV_ROUND_UP(pwm->period, NSEC_PER_MSEC);
133 msleep(period_ms);
134
135 sr = readl(imx->mmio_base + MX3_PWMSR);
136 if (fifoav == (sr & MX3_PWMSR_FIFOAV_MASK))
137 dev_warn(dev, "there is no free FIFO slot\n");
138 }
139 } else {
140 writel(MX3_PWMCR_SWR, imx->mmio_base + MX3_PWMCR);
141 do {
142 usleep_range(200, 1000);
143 cr = readl(imx->mmio_base + MX3_PWMCR);
144 } while ((cr & MX3_PWMCR_SWR) &&
145 (wait_count++ < MX3_PWM_SWR_LOOP));
146
147 if (cr & MX3_PWMCR_SWR)
148 dev_warn(dev, "software reset timeout\n");
149 }
109 150
110 c = clk_get_rate(imx->clk_per); 151 c = clk_get_rate(imx->clk_per);
111 c = c * period_ns; 152 c = c * period_ns;
@@ -135,7 +176,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip,
135 MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN | 176 MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
136 MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH; 177 MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH;
137 178
138 if (test_bit(PWMF_ENABLED, &pwm->flags)) 179 if (enable)
139 cr |= MX3_PWMCR_EN; 180 cr |= MX3_PWMCR_EN;
140 181
141 writel(cr, imx->mmio_base + MX3_PWMCR); 182 writel(cr, imx->mmio_base + MX3_PWMCR);
diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c
new file mode 100644
index 000000000000..cf20d2beacdd
--- /dev/null
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -0,0 +1,64 @@
1/*
2 * Intel Low Power Subsystem PWM controller PCI driver
3 *
4 * Copyright (C) 2014, Intel Corporation
5 *
6 * Derived from the original pwm-lpss.c
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/pci.h>
16
17#include "pwm-lpss.h"
18
19static int pwm_lpss_probe_pci(struct pci_dev *pdev,
20 const struct pci_device_id *id)
21{
22 const struct pwm_lpss_boardinfo *info;
23 struct pwm_lpss_chip *lpwm;
24 int err;
25
26 err = pcim_enable_device(pdev);
27 if (err < 0)
28 return err;
29
30 info = (struct pwm_lpss_boardinfo *)id->driver_data;
31 lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info);
32 if (IS_ERR(lpwm))
33 return PTR_ERR(lpwm);
34
35 pci_set_drvdata(pdev, lpwm);
36 return 0;
37}
38
39static void pwm_lpss_remove_pci(struct pci_dev *pdev)
40{
41 struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev);
42
43 pwm_lpss_remove(lpwm);
44}
45
46static const struct pci_device_id pwm_lpss_pci_ids[] = {
47 { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info},
48 { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info},
49 { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info},
50 { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info},
51 { },
52};
53MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids);
54
55static struct pci_driver pwm_lpss_driver_pci = {
56 .name = "pwm-lpss",
57 .id_table = pwm_lpss_pci_ids,
58 .probe = pwm_lpss_probe_pci,
59 .remove = pwm_lpss_remove_pci,
60};
61module_pci_driver(pwm_lpss_driver_pci);
62
63MODULE_DESCRIPTION("PWM PCI driver for Intel LPSS");
64MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
new file mode 100644
index 000000000000..18a9c880a76d
--- /dev/null
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -0,0 +1,68 @@
1/*
2 * Intel Low Power Subsystem PWM controller driver
3 *
4 * Copyright (C) 2014, Intel Corporation
5 *
6 * Derived from the original pwm-lpss.c
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/acpi.h>
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/platform_device.h>
17
18#include "pwm-lpss.h"
19
20static int pwm_lpss_probe_platform(struct platform_device *pdev)
21{
22 const struct pwm_lpss_boardinfo *info;
23 const struct acpi_device_id *id;
24 struct pwm_lpss_chip *lpwm;
25 struct resource *r;
26
27 id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev);
28 if (!id)
29 return -ENODEV;
30
31 info = (const struct pwm_lpss_boardinfo *)id->driver_data;
32 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
33
34 lpwm = pwm_lpss_probe(&pdev->dev, r, info);
35 if (IS_ERR(lpwm))
36 return PTR_ERR(lpwm);
37
38 platform_set_drvdata(pdev, lpwm);
39 return 0;
40}
41
42static int pwm_lpss_remove_platform(struct platform_device *pdev)
43{
44 struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev);
45
46 return pwm_lpss_remove(lpwm);
47}
48
49static const struct acpi_device_id pwm_lpss_acpi_match[] = {
50 { "80860F09", (unsigned long)&pwm_lpss_byt_info },
51 { "80862288", (unsigned long)&pwm_lpss_bsw_info },
52 { },
53};
54MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match);
55
56static struct platform_driver pwm_lpss_driver_platform = {
57 .driver = {
58 .name = "pwm-lpss",
59 .acpi_match_table = pwm_lpss_acpi_match,
60 },
61 .probe = pwm_lpss_probe_platform,
62 .remove = pwm_lpss_remove_platform,
63};
64module_platform_driver(pwm_lpss_driver_platform);
65
66MODULE_DESCRIPTION("PWM platform driver for Intel LPSS");
67MODULE_LICENSE("GPL v2");
68MODULE_ALIAS("platform:pwm-lpss");
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 4df994f72d96..e9798253a16f 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -13,15 +13,11 @@
13 * published by the Free Software Foundation. 13 * published by the Free Software Foundation.
14 */ 14 */
15 15
16#include <linux/acpi.h> 16#include <linux/io.h>
17#include <linux/device.h>
18#include <linux/kernel.h> 17#include <linux/kernel.h>
19#include <linux/module.h> 18#include <linux/module.h>
20#include <linux/pwm.h>
21#include <linux/platform_device.h>
22#include <linux/pci.h>
23 19
24static int pci_drv, plat_drv; /* So we know which drivers registered */ 20#include "pwm-lpss.h"
25 21
26#define PWM 0x00000000 22#define PWM 0x00000000
27#define PWM_ENABLE BIT(31) 23#define PWM_ENABLE BIT(31)
@@ -39,14 +35,17 @@ struct pwm_lpss_chip {
39 unsigned long clk_rate; 35 unsigned long clk_rate;
40}; 36};
41 37
42struct pwm_lpss_boardinfo { 38/* BayTrail */
43 unsigned long clk_rate; 39const struct pwm_lpss_boardinfo pwm_lpss_byt_info = {
40 .clk_rate = 25000000
44}; 41};
42EXPORT_SYMBOL_GPL(pwm_lpss_byt_info);
45 43
46/* BayTrail */ 44/* Braswell */
47static const struct pwm_lpss_boardinfo byt_info = { 45const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
48 25000000 46 .clk_rate = 19200000
49}; 47};
48EXPORT_SYMBOL_GPL(pwm_lpss_bsw_info);
50 49
51static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip) 50static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
52{ 51{
@@ -118,9 +117,8 @@ static const struct pwm_ops pwm_lpss_ops = {
118 .owner = THIS_MODULE, 117 .owner = THIS_MODULE,
119}; 118};
120 119
121static struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, 120struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
122 struct resource *r, 121 const struct pwm_lpss_boardinfo *info)
123 const struct pwm_lpss_boardinfo *info)
124{ 122{
125 struct pwm_lpss_chip *lpwm; 123 struct pwm_lpss_chip *lpwm;
126 int ret; 124 int ret;
@@ -147,8 +145,9 @@ static struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev,
147 145
148 return lpwm; 146 return lpwm;
149} 147}
148EXPORT_SYMBOL_GPL(pwm_lpss_probe);
150 149
151static int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) 150int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
152{ 151{
153 u32 ctrl; 152 u32 ctrl;
154 153
@@ -157,114 +156,8 @@ static int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
157 156
158 return pwmchip_remove(&lpwm->chip); 157 return pwmchip_remove(&lpwm->chip);
159} 158}
160 159EXPORT_SYMBOL_GPL(pwm_lpss_remove);
161static int pwm_lpss_probe_pci(struct pci_dev *pdev,
162 const struct pci_device_id *id)
163{
164 const struct pwm_lpss_boardinfo *info;
165 struct pwm_lpss_chip *lpwm;
166 int err;
167
168 err = pci_enable_device(pdev);
169 if (err < 0)
170 return err;
171
172 info = (struct pwm_lpss_boardinfo *)id->driver_data;
173 lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info);
174 if (IS_ERR(lpwm))
175 return PTR_ERR(lpwm);
176
177 pci_set_drvdata(pdev, lpwm);
178 return 0;
179}
180
181static void pwm_lpss_remove_pci(struct pci_dev *pdev)
182{
183 struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev);
184
185 pwm_lpss_remove(lpwm);
186 pci_disable_device(pdev);
187}
188
189static struct pci_device_id pwm_lpss_pci_ids[] = {
190 { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&byt_info},
191 { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&byt_info},
192 { },
193};
194MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids);
195
196static struct pci_driver pwm_lpss_driver_pci = {
197 .name = "pwm-lpss",
198 .id_table = pwm_lpss_pci_ids,
199 .probe = pwm_lpss_probe_pci,
200 .remove = pwm_lpss_remove_pci,
201};
202
203static int pwm_lpss_probe_platform(struct platform_device *pdev)
204{
205 const struct pwm_lpss_boardinfo *info;
206 const struct acpi_device_id *id;
207 struct pwm_lpss_chip *lpwm;
208 struct resource *r;
209
210 id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev);
211 if (!id)
212 return -ENODEV;
213
214 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
215
216 info = (struct pwm_lpss_boardinfo *)id->driver_data;
217 lpwm = pwm_lpss_probe(&pdev->dev, r, info);
218 if (IS_ERR(lpwm))
219 return PTR_ERR(lpwm);
220
221 platform_set_drvdata(pdev, lpwm);
222 return 0;
223}
224
225static int pwm_lpss_remove_platform(struct platform_device *pdev)
226{
227 struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev);
228
229 return pwm_lpss_remove(lpwm);
230}
231
232static const struct acpi_device_id pwm_lpss_acpi_match[] = {
233 { "80860F09", (unsigned long)&byt_info },
234 { },
235};
236MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match);
237
238static struct platform_driver pwm_lpss_driver_platform = {
239 .driver = {
240 .name = "pwm-lpss",
241 .acpi_match_table = pwm_lpss_acpi_match,
242 },
243 .probe = pwm_lpss_probe_platform,
244 .remove = pwm_lpss_remove_platform,
245};
246
247static int __init pwm_init(void)
248{
249 pci_drv = pci_register_driver(&pwm_lpss_driver_pci);
250 plat_drv = platform_driver_register(&pwm_lpss_driver_platform);
251 if (pci_drv && plat_drv)
252 return pci_drv;
253
254 return 0;
255}
256module_init(pwm_init);
257
258static void __exit pwm_exit(void)
259{
260 if (!pci_drv)
261 pci_unregister_driver(&pwm_lpss_driver_pci);
262 if (!plat_drv)
263 platform_driver_unregister(&pwm_lpss_driver_platform);
264}
265module_exit(pwm_exit);
266 160
267MODULE_DESCRIPTION("PWM driver for Intel LPSS"); 161MODULE_DESCRIPTION("PWM driver for Intel LPSS");
268MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>"); 162MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
269MODULE_LICENSE("GPL v2"); 163MODULE_LICENSE("GPL v2");
270MODULE_ALIAS("platform:pwm-lpss");
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
new file mode 100644
index 000000000000..aa041bb1b67d
--- /dev/null
+++ b/drivers/pwm/pwm-lpss.h
@@ -0,0 +1,32 @@
1/*
2 * Intel Low Power Subsystem PWM controller driver
3 *
4 * Copyright (C) 2014, Intel Corporation
5 *
6 * Derived from the original pwm-lpss.c
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#ifndef __PWM_LPSS_H
14#define __PWM_LPSS_H
15
16#include <linux/device.h>
17#include <linux/pwm.h>
18
19struct pwm_lpss_chip;
20
21struct pwm_lpss_boardinfo {
22 unsigned long clk_rate;
23};
24
25extern const struct pwm_lpss_boardinfo pwm_lpss_byt_info;
26extern const struct pwm_lpss_boardinfo pwm_lpss_bsw_info;
27
28struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
29 const struct pwm_lpss_boardinfo *info);
30int pwm_lpss_remove(struct pwm_lpss_chip *lpwm);
31
32#endif /* __PWM_LPSS_H */
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index bdd8644c01cf..9442df244101 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -24,7 +24,9 @@
24#define PWM_ENABLE (1 << 0) 24#define PWM_ENABLE (1 << 0)
25#define PWM_CONTINUOUS (1 << 1) 25#define PWM_CONTINUOUS (1 << 1)
26#define PWM_DUTY_POSITIVE (1 << 3) 26#define PWM_DUTY_POSITIVE (1 << 3)
27#define PWM_DUTY_NEGATIVE (0 << 3)
27#define PWM_INACTIVE_NEGATIVE (0 << 4) 28#define PWM_INACTIVE_NEGATIVE (0 << 4)
29#define PWM_INACTIVE_POSITIVE (1 << 4)
28#define PWM_OUTPUT_LEFT (0 << 5) 30#define PWM_OUTPUT_LEFT (0 << 5)
29#define PWM_LP_DISABLE (0 << 8) 31#define PWM_LP_DISABLE (0 << 8)
30 32
@@ -45,8 +47,10 @@ struct rockchip_pwm_regs {
45struct rockchip_pwm_data { 47struct rockchip_pwm_data {
46 struct rockchip_pwm_regs regs; 48 struct rockchip_pwm_regs regs;
47 unsigned int prescaler; 49 unsigned int prescaler;
50 const struct pwm_ops *ops;
48 51
49 void (*set_enable)(struct pwm_chip *chip, bool enable); 52 void (*set_enable)(struct pwm_chip *chip,
53 struct pwm_device *pwm, bool enable);
50}; 54};
51 55
52static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) 56static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c)
@@ -54,7 +58,8 @@ static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c)
54 return container_of(c, struct rockchip_pwm_chip, chip); 58 return container_of(c, struct rockchip_pwm_chip, chip);
55} 59}
56 60
57static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, bool enable) 61static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip,
62 struct pwm_device *pwm, bool enable)
58{ 63{
59 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); 64 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
60 u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN; 65 u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN;
@@ -70,14 +75,19 @@ static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, bool enable)
70 writel_relaxed(val, pc->base + pc->data->regs.ctrl); 75 writel_relaxed(val, pc->base + pc->data->regs.ctrl);
71} 76}
72 77
73static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, bool enable) 78static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip,
79 struct pwm_device *pwm, bool enable)
74{ 80{
75 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); 81 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
76 u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE | 82 u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE |
77 PWM_CONTINUOUS | PWM_DUTY_POSITIVE | 83 PWM_CONTINUOUS;
78 PWM_INACTIVE_NEGATIVE;
79 u32 val; 84 u32 val;
80 85
86 if (pwm->polarity == PWM_POLARITY_INVERSED)
87 enable_conf |= PWM_DUTY_NEGATIVE | PWM_INACTIVE_POSITIVE;
88 else
89 enable_conf |= PWM_DUTY_POSITIVE | PWM_INACTIVE_NEGATIVE;
90
81 val = readl_relaxed(pc->base + pc->data->regs.ctrl); 91 val = readl_relaxed(pc->base + pc->data->regs.ctrl);
82 92
83 if (enable) 93 if (enable)
@@ -124,6 +134,19 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
124 return 0; 134 return 0;
125} 135}
126 136
137static int rockchip_pwm_set_polarity(struct pwm_chip *chip,
138 struct pwm_device *pwm,
139 enum pwm_polarity polarity)
140{
141 /*
142 * No action needed here because pwm->polarity will be set by the core
143 * and the core will only change polarity when the PWM is not enabled.
144 * We'll handle things in set_enable().
145 */
146
147 return 0;
148}
149
127static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) 150static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
128{ 151{
129 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); 152 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
@@ -133,7 +156,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
133 if (ret) 156 if (ret)
134 return ret; 157 return ret;
135 158
136 pc->data->set_enable(chip, true); 159 pc->data->set_enable(chip, pwm, true);
137 160
138 return 0; 161 return 0;
139} 162}
@@ -142,18 +165,26 @@ static void rockchip_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
142{ 165{
143 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); 166 struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
144 167
145 pc->data->set_enable(chip, false); 168 pc->data->set_enable(chip, pwm, false);
146 169
147 clk_disable(pc->clk); 170 clk_disable(pc->clk);
148} 171}
149 172
150static const struct pwm_ops rockchip_pwm_ops = { 173static const struct pwm_ops rockchip_pwm_ops_v1 = {
151 .config = rockchip_pwm_config, 174 .config = rockchip_pwm_config,
152 .enable = rockchip_pwm_enable, 175 .enable = rockchip_pwm_enable,
153 .disable = rockchip_pwm_disable, 176 .disable = rockchip_pwm_disable,
154 .owner = THIS_MODULE, 177 .owner = THIS_MODULE,
155}; 178};
156 179
180static const struct pwm_ops rockchip_pwm_ops_v2 = {
181 .config = rockchip_pwm_config,
182 .set_polarity = rockchip_pwm_set_polarity,
183 .enable = rockchip_pwm_enable,
184 .disable = rockchip_pwm_disable,
185 .owner = THIS_MODULE,
186};
187
157static const struct rockchip_pwm_data pwm_data_v1 = { 188static const struct rockchip_pwm_data pwm_data_v1 = {
158 .regs = { 189 .regs = {
159 .duty = 0x04, 190 .duty = 0x04,
@@ -162,6 +193,7 @@ static const struct rockchip_pwm_data pwm_data_v1 = {
162 .ctrl = 0x0c, 193 .ctrl = 0x0c,
163 }, 194 },
164 .prescaler = 2, 195 .prescaler = 2,
196 .ops = &rockchip_pwm_ops_v1,
165 .set_enable = rockchip_pwm_set_enable_v1, 197 .set_enable = rockchip_pwm_set_enable_v1,
166}; 198};
167 199
@@ -173,6 +205,7 @@ static const struct rockchip_pwm_data pwm_data_v2 = {
173 .ctrl = 0x0c, 205 .ctrl = 0x0c,
174 }, 206 },
175 .prescaler = 1, 207 .prescaler = 1,
208 .ops = &rockchip_pwm_ops_v2,
176 .set_enable = rockchip_pwm_set_enable_v2, 209 .set_enable = rockchip_pwm_set_enable_v2,
177}; 210};
178 211
@@ -184,6 +217,7 @@ static const struct rockchip_pwm_data pwm_data_vop = {
184 .ctrl = 0x00, 217 .ctrl = 0x00,
185 }, 218 },
186 .prescaler = 1, 219 .prescaler = 1,
220 .ops = &rockchip_pwm_ops_v2,
187 .set_enable = rockchip_pwm_set_enable_v2, 221 .set_enable = rockchip_pwm_set_enable_v2,
188}; 222};
189 223
@@ -227,10 +261,15 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
227 261
228 pc->data = id->data; 262 pc->data = id->data;
229 pc->chip.dev = &pdev->dev; 263 pc->chip.dev = &pdev->dev;
230 pc->chip.ops = &rockchip_pwm_ops; 264 pc->chip.ops = pc->data->ops;
231 pc->chip.base = -1; 265 pc->chip.base = -1;
232 pc->chip.npwm = 1; 266 pc->chip.npwm = 1;
233 267
268 if (pc->data->ops->set_polarity) {
269 pc->chip.of_xlate = of_pwm_xlate_with_flags;
270 pc->chip.of_pwm_n_cells = 3;
271 }
272
234 ret = pwmchip_add(&pc->chip); 273 ret = pwmchip_add(&pc->chip);
235 if (ret < 0) { 274 if (ret < 0) {
236 clk_unprepare(pc->clk); 275 clk_unprepare(pc->clk);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8cd0beebdc3f..94ae1798d48a 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -830,7 +830,7 @@ config RTC_DRV_DA9063
830 830
831config RTC_DRV_EFI 831config RTC_DRV_EFI
832 tristate "EFI RTC" 832 tristate "EFI RTC"
833 depends on EFI 833 depends on EFI && !X86
834 help 834 help
835 If you say yes here you will get support for the EFI 835 If you say yes here you will get support for the EFI
836 Real Time Clock. 836 Real Time Clock.
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index c384fec6d173..53b589dc34eb 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -236,3 +236,4 @@ MODULE_ALIAS("platform:rtc-efi");
236MODULE_AUTHOR("dann frazier <dannf@hp.com>"); 236MODULE_AUTHOR("dann frazier <dannf@hp.com>");
237MODULE_LICENSE("GPL"); 237MODULE_LICENSE("GPL");
238MODULE_DESCRIPTION("EFI RTC driver"); 238MODULE_DESCRIPTION("EFI RTC driver");
239MODULE_ALIAS("platform:rtc-efi");
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index dc24ecfac2d1..db2cb1f8a1b5 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -105,7 +105,7 @@ config SCLP_ASYNC
105config HMC_DRV 105config HMC_DRV
106 def_tristate m 106 def_tristate m
107 prompt "Support for file transfers from HMC drive CD/DVD-ROM" 107 prompt "Support for file transfers from HMC drive CD/DVD-ROM"
108 depends on 64BIT 108 depends on S390 && 64BIT
109 select CRC16 109 select CRC16
110 help 110 help
111 This option enables support for file transfers from a Hardware 111 This option enables support for file transfers from a Hardware
diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
index 5fd73d77c3af..58cecd45b0f5 100644
--- a/drivers/scsi/osd/Kbuild
+++ b/drivers/scsi/osd/Kbuild
@@ -4,7 +4,7 @@
4# Copyright (C) 2008 Panasas Inc. All rights reserved. 4# Copyright (C) 2008 Panasas Inc. All rights reserved.
5# 5#
6# Authors: 6# Authors:
7# Boaz Harrosh <bharrosh@panasas.com> 7# Boaz Harrosh <ooo@electrozaur.com>
8# Benny Halevy <bhalevy@panasas.com> 8# Benny Halevy <bhalevy@panasas.com>
9# 9#
10# This program is free software; you can redistribute it and/or modify 10# This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/Kconfig b/drivers/scsi/osd/Kconfig
index a0703514eb0f..347cc5e33749 100644
--- a/drivers/scsi/osd/Kconfig
+++ b/drivers/scsi/osd/Kconfig
@@ -4,7 +4,7 @@
4# Copyright (C) 2008 Panasas Inc. All rights reserved. 4# Copyright (C) 2008 Panasas Inc. All rights reserved.
5# 5#
6# Authors: 6# Authors:
7# Boaz Harrosh <bharrosh@panasas.com> 7# Boaz Harrosh <ooo@electrozaur.com>
8# Benny Halevy <bhalevy@panasas.com> 8# Benny Halevy <bhalevy@panasas.com>
9# 9#
10# This program is free software; you can redistribute it and/or modify 10# This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_debug.h b/drivers/scsi/osd/osd_debug.h
index 579e491f11df..26341261bb5c 100644
--- a/drivers/scsi/osd/osd_debug.h
+++ b/drivers/scsi/osd/osd_debug.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2008 Panasas Inc. All rights reserved. 4 * Copyright (C) 2008 Panasas Inc. All rights reserved.
5 * 5 *
6 * Authors: 6 * Authors:
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index fd19fd8468ac..488c3929f19a 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -7,7 +7,7 @@
7 * Copyright (C) 2008 Panasas Inc. All rights reserved. 7 * Copyright (C) 2008 Panasas Inc. All rights reserved.
8 * 8 *
9 * Authors: 9 * Authors:
10 * Boaz Harrosh <bharrosh@panasas.com> 10 * Boaz Harrosh <ooo@electrozaur.com>
11 * Benny Halevy <bhalevy@panasas.com> 11 * Benny Halevy <bhalevy@panasas.com>
12 * 12 *
13 * This program is free software; you can redistribute it and/or modify 13 * This program is free software; you can redistribute it and/or modify
@@ -57,7 +57,7 @@
57 57
58enum { OSD_REQ_RETRIES = 1 }; 58enum { OSD_REQ_RETRIES = 1 };
59 59
60MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); 60MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
61MODULE_DESCRIPTION("open-osd initiator library libosd.ko"); 61MODULE_DESCRIPTION("open-osd initiator library libosd.ko");
62MODULE_LICENSE("GPL"); 62MODULE_LICENSE("GPL");
63 63
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index e1d9a4c4c4b3..92cdd4b06526 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -10,7 +10,7 @@
10 * Copyright (C) 2008 Panasas Inc. All rights reserved. 10 * Copyright (C) 2008 Panasas Inc. All rights reserved.
11 * 11 *
12 * Authors: 12 * Authors:
13 * Boaz Harrosh <bharrosh@panasas.com> 13 * Boaz Harrosh <ooo@electrozaur.com>
14 * Benny Halevy <bhalevy@panasas.com> 14 * Benny Halevy <bhalevy@panasas.com>
15 * 15 *
16 * This program is free software; you can redistribute it and/or modify 16 * This program is free software; you can redistribute it and/or modify
@@ -74,7 +74,7 @@
74static const char osd_name[] = "osd"; 74static const char osd_name[] = "osd";
75static const char *osd_version_string = "open-osd 0.2.1"; 75static const char *osd_version_string = "open-osd 0.2.1";
76 76
77MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); 77MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
78MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko"); 78MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko");
79MODULE_LICENSE("GPL"); 79MODULE_LICENSE("GPL");
80MODULE_ALIAS_CHARDEV_MAJOR(SCSI_OSD_MAJOR); 80MODULE_ALIAS_CHARDEV_MAJOR(SCSI_OSD_MAJOR);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 829752cfd73f..a902fa1db7af 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -112,6 +112,7 @@ static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha,
112 struct qla_tgt_cmd *cmd); 112 struct qla_tgt_cmd *cmd);
113static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, 113static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
114 struct atio_from_isp *atio, uint16_t status, int qfull); 114 struct atio_from_isp *atio, uint16_t status, int qfull);
115static void qlt_disable_vha(struct scsi_qla_host *vha);
115/* 116/*
116 * Global Variables 117 * Global Variables
117 */ 118 */
@@ -210,7 +211,7 @@ static inline void qlt_decr_num_pend_cmds(struct scsi_qla_host *vha)
210 spin_unlock_irqrestore(&vha->hw->tgt.q_full_lock, flags); 211 spin_unlock_irqrestore(&vha->hw->tgt.q_full_lock, flags);
211} 212}
212 213
213void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha, 214static void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
214 struct atio_from_isp *atio) 215 struct atio_from_isp *atio)
215{ 216{
216 ql_dbg(ql_dbg_tgt, vha, 0xe072, 217 ql_dbg(ql_dbg_tgt, vha, 0xe072,
@@ -433,7 +434,7 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
433#if 0 /* FIXME: Re-enable Global event handling.. */ 434#if 0 /* FIXME: Re-enable Global event handling.. */
434 /* Global event */ 435 /* Global event */
435 atomic_inc(&ha->tgt.qla_tgt->tgt_global_resets_count); 436 atomic_inc(&ha->tgt.qla_tgt->tgt_global_resets_count);
436 qlt_clear_tgt_db(ha->tgt.qla_tgt, 1); 437 qlt_clear_tgt_db(ha->tgt.qla_tgt);
437 if (!list_empty(&ha->tgt.qla_tgt->sess_list)) { 438 if (!list_empty(&ha->tgt.qla_tgt->sess_list)) {
438 sess = list_entry(ha->tgt.qla_tgt->sess_list.next, 439 sess = list_entry(ha->tgt.qla_tgt->sess_list.next,
439 typeof(*sess), sess_list_entry); 440 typeof(*sess), sess_list_entry);
@@ -515,7 +516,7 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
515} 516}
516 517
517/* ha->hardware_lock supposed to be held on entry */ 518/* ha->hardware_lock supposed to be held on entry */
518static void qlt_clear_tgt_db(struct qla_tgt *tgt, bool local_only) 519static void qlt_clear_tgt_db(struct qla_tgt *tgt)
519{ 520{
520 struct qla_tgt_sess *sess; 521 struct qla_tgt_sess *sess;
521 522
@@ -867,7 +868,7 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
867 mutex_lock(&vha->vha_tgt.tgt_mutex); 868 mutex_lock(&vha->vha_tgt.tgt_mutex);
868 spin_lock_irqsave(&ha->hardware_lock, flags); 869 spin_lock_irqsave(&ha->hardware_lock, flags);
869 tgt->tgt_stop = 1; 870 tgt->tgt_stop = 1;
870 qlt_clear_tgt_db(tgt, true); 871 qlt_clear_tgt_db(tgt);
871 spin_unlock_irqrestore(&ha->hardware_lock, flags); 872 spin_unlock_irqrestore(&ha->hardware_lock, flags);
872 mutex_unlock(&vha->vha_tgt.tgt_mutex); 873 mutex_unlock(&vha->vha_tgt.tgt_mutex);
873 mutex_unlock(&qla_tgt_mutex); 874 mutex_unlock(&qla_tgt_mutex);
@@ -1462,12 +1463,13 @@ out_err:
1462 return -1; 1463 return -1;
1463} 1464}
1464 1465
1465static inline void qlt_unmap_sg(struct scsi_qla_host *vha, 1466static void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd)
1466 struct qla_tgt_cmd *cmd)
1467{ 1467{
1468 struct qla_hw_data *ha = vha->hw; 1468 struct qla_hw_data *ha = vha->hw;
1469 1469
1470 BUG_ON(!cmd->sg_mapped); 1470 if (!cmd->sg_mapped)
1471 return;
1472
1471 pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction); 1473 pci_unmap_sg(ha->pdev, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction);
1472 cmd->sg_mapped = 0; 1474 cmd->sg_mapped = 0;
1473 1475
@@ -2428,8 +2430,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
2428 return 0; 2430 return 0;
2429 2431
2430out_unmap_unlock: 2432out_unmap_unlock:
2431 if (cmd->sg_mapped) 2433 qlt_unmap_sg(vha, cmd);
2432 qlt_unmap_sg(vha, cmd);
2433 spin_unlock_irqrestore(&ha->hardware_lock, flags); 2434 spin_unlock_irqrestore(&ha->hardware_lock, flags);
2434 2435
2435 return res; 2436 return res;
@@ -2506,8 +2507,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
2506 return res; 2507 return res;
2507 2508
2508out_unlock_free_unmap: 2509out_unlock_free_unmap:
2509 if (cmd->sg_mapped) 2510 qlt_unmap_sg(vha, cmd);
2510 qlt_unmap_sg(vha, cmd);
2511 spin_unlock_irqrestore(&ha->hardware_lock, flags); 2511 spin_unlock_irqrestore(&ha->hardware_lock, flags);
2512 2512
2513 return res; 2513 return res;
@@ -2741,8 +2741,7 @@ done:
2741 if (!ha_locked && !in_interrupt()) 2741 if (!ha_locked && !in_interrupt())
2742 msleep(250); /* just in case */ 2742 msleep(250); /* just in case */
2743 2743
2744 if (cmd->sg_mapped) 2744 qlt_unmap_sg(vha, cmd);
2745 qlt_unmap_sg(vha, cmd);
2746 vha->hw->tgt.tgt_ops->free_cmd(cmd); 2745 vha->hw->tgt.tgt_ops->free_cmd(cmd);
2747 } 2746 }
2748 return; 2747 return;
@@ -3087,8 +3086,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
3087 tfo = se_cmd->se_tfo; 3086 tfo = se_cmd->se_tfo;
3088 cmd->cmd_sent_to_fw = 0; 3087 cmd->cmd_sent_to_fw = 0;
3089 3088
3090 if (cmd->sg_mapped) 3089 qlt_unmap_sg(vha, cmd);
3091 qlt_unmap_sg(vha, cmd);
3092 3090
3093 if (unlikely(status != CTIO_SUCCESS)) { 3091 if (unlikely(status != CTIO_SUCCESS)) {
3094 switch (status & 0xFFFF) { 3092 switch (status & 0xFFFF) {
@@ -5343,7 +5341,7 @@ void qlt_lport_deregister(struct scsi_qla_host *vha)
5343EXPORT_SYMBOL(qlt_lport_deregister); 5341EXPORT_SYMBOL(qlt_lport_deregister);
5344 5342
5345/* Must be called under HW lock */ 5343/* Must be called under HW lock */
5346void qlt_set_mode(struct scsi_qla_host *vha) 5344static void qlt_set_mode(struct scsi_qla_host *vha)
5347{ 5345{
5348 struct qla_hw_data *ha = vha->hw; 5346 struct qla_hw_data *ha = vha->hw;
5349 5347
@@ -5364,7 +5362,7 @@ void qlt_set_mode(struct scsi_qla_host *vha)
5364} 5362}
5365 5363
5366/* Must be called under HW lock */ 5364/* Must be called under HW lock */
5367void qlt_clear_mode(struct scsi_qla_host *vha) 5365static void qlt_clear_mode(struct scsi_qla_host *vha)
5368{ 5366{
5369 struct qla_hw_data *ha = vha->hw; 5367 struct qla_hw_data *ha = vha->hw;
5370 5368
@@ -5428,8 +5426,7 @@ EXPORT_SYMBOL(qlt_enable_vha);
5428 * 5426 *
5429 * Disable Target Mode and reset the adapter 5427 * Disable Target Mode and reset the adapter
5430 */ 5428 */
5431void 5429static void qlt_disable_vha(struct scsi_qla_host *vha)
5432qlt_disable_vha(struct scsi_qla_host *vha)
5433{ 5430{
5434 struct qla_hw_data *ha = vha->hw; 5431 struct qla_hw_data *ha = vha->hw;
5435 struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; 5432 struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 8ff330f7d6f5..332086776dfe 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -1001,11 +1001,11 @@ struct qla_tgt_prm {
1001 struct qla_tgt *tgt; 1001 struct qla_tgt *tgt;
1002 void *pkt; 1002 void *pkt;
1003 struct scatterlist *sg; /* cmd data buffer SG vector */ 1003 struct scatterlist *sg; /* cmd data buffer SG vector */
1004 unsigned char *sense_buffer;
1004 int seg_cnt; 1005 int seg_cnt;
1005 int req_cnt; 1006 int req_cnt;
1006 uint16_t rq_result; 1007 uint16_t rq_result;
1007 uint16_t scsi_status; 1008 uint16_t scsi_status;
1008 unsigned char *sense_buffer;
1009 int sense_buffer_len; 1009 int sense_buffer_len;
1010 int residual; 1010 int residual;
1011 int add_status_pkt; 1011 int add_status_pkt;
@@ -1033,10 +1033,6 @@ struct qla_tgt_srr_ctio {
1033 1033
1034 1034
1035extern struct qla_tgt_data qla_target; 1035extern struct qla_tgt_data qla_target;
1036/*
1037 * Internal function prototypes
1038 */
1039void qlt_disable_vha(struct scsi_qla_host *);
1040 1036
1041/* 1037/*
1042 * Function prototypes for qla_target.c logic used by qla2xxx LLD code. 1038 * Function prototypes for qla_target.c logic used by qla2xxx LLD code.
@@ -1049,8 +1045,6 @@ extern void qlt_lport_deregister(struct scsi_qla_host *);
1049extern void qlt_unreg_sess(struct qla_tgt_sess *); 1045extern void qlt_unreg_sess(struct qla_tgt_sess *);
1050extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *); 1046extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *);
1051extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *); 1047extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *);
1052extern void qlt_set_mode(struct scsi_qla_host *ha);
1053extern void qlt_clear_mode(struct scsi_qla_host *ha);
1054extern int __init qlt_init(void); 1048extern int __init qlt_init(void);
1055extern void qlt_exit(void); 1049extern void qlt_exit(void);
1056extern void qlt_update_vp_map(struct scsi_qla_host *, int); 1050extern void qlt_update_vp_map(struct scsi_qla_host *, int);
@@ -1083,13 +1077,9 @@ static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha)
1083/* 1077/*
1084 * Exported symbols from qla_target.c LLD logic used by qla2xxx code.. 1078 * Exported symbols from qla_target.c LLD logic used by qla2xxx code..
1085 */ 1079 */
1086extern void qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *,
1087 struct atio_from_isp *);
1088extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *); 1080extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, response_t *);
1089extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); 1081extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *);
1090extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); 1082extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t);
1091extern int qlt_rdy_to_xfer_dif(struct qla_tgt_cmd *);
1092extern int qlt_xmit_response_dif(struct qla_tgt_cmd *, int, uint8_t);
1093extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); 1083extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *);
1094extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); 1084extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *);
1095extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); 1085extern void qlt_free_cmd(struct qla_tgt_cmd *cmd);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 031b2961c6b7..73f9feecda72 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -786,7 +786,16 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
786 pr_debug("fc_rport domain: port_id 0x%06x\n", nacl->nport_id); 786 pr_debug("fc_rport domain: port_id 0x%06x\n", nacl->nport_id);
787 787
788 node = btree_remove32(&lport->lport_fcport_map, nacl->nport_id); 788 node = btree_remove32(&lport->lport_fcport_map, nacl->nport_id);
789 WARN_ON(node && (node != se_nacl)); 789 if (WARN_ON(node && (node != se_nacl))) {
790 /*
791 * The nacl no longer matches what we think it should be.
792 * Most likely a new dynamic acl has been added while
793 * someone dropped the hardware lock. It clearly is a
794 * bug elsewhere, but this bit can't make things worse.
795 */
796 btree_insert32(&lport->lport_fcport_map, nacl->nport_id,
797 node, GFP_ATOMIC);
798 }
790 799
791 pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n", 800 pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n",
792 se_nacl, nacl->nport_wwnn, nacl->nport_id); 801 se_nacl, nacl->nport_wwnn, nacl->nport_id);
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index dc2d84ac5a0e..81d44c477a5b 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,6 +31,13 @@ config TCM_PSCSI
31 Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered 31 Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
32 passthrough access to Linux/SCSI device 32 passthrough access to Linux/SCSI device
33 33
34config TCM_USER
35 tristate "TCM/USER Subsystem Plugin for Linux"
36 depends on UIO && NET
37 help
38 Say Y here to enable the TCM/USER subsystem plugin for a userspace
39 process to handle requests
40
34source "drivers/target/loopback/Kconfig" 41source "drivers/target/loopback/Kconfig"
35source "drivers/target/tcm_fc/Kconfig" 42source "drivers/target/tcm_fc/Kconfig"
36source "drivers/target/iscsi/Kconfig" 43source "drivers/target/iscsi/Kconfig"
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 85b012d2f89b..bbb4a7d638ef 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_TARGET_CORE) += target_core_mod.o
22obj-$(CONFIG_TCM_IBLOCK) += target_core_iblock.o 22obj-$(CONFIG_TCM_IBLOCK) += target_core_iblock.o
23obj-$(CONFIG_TCM_FILEIO) += target_core_file.o 23obj-$(CONFIG_TCM_FILEIO) += target_core_file.o
24obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o 24obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o
25obj-$(CONFIG_TCM_USER) += target_core_user.o
25 26
26# Fabric modules 27# Fabric modules
27obj-$(CONFIG_LOOPBACK_TARGET) += loopback/ 28obj-$(CONFIG_LOOPBACK_TARGET) += loopback/
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 260c3e1e312c..b19e4329ba00 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3709,7 +3709,6 @@ static inline void iscsit_thread_check_cpumask(
3709 struct task_struct *p, 3709 struct task_struct *p,
3710 int mode) 3710 int mode)
3711{ 3711{
3712 char buf[128];
3713 /* 3712 /*
3714 * mode == 1 signals iscsi_target_tx_thread() usage. 3713 * mode == 1 signals iscsi_target_tx_thread() usage.
3715 * mode == 0 signals iscsi_target_rx_thread() usage. 3714 * mode == 0 signals iscsi_target_rx_thread() usage.
@@ -3728,8 +3727,6 @@ static inline void iscsit_thread_check_cpumask(
3728 * both TX and RX kthreads are scheduled to run on the 3727 * both TX and RX kthreads are scheduled to run on the
3729 * same CPU. 3728 * same CPU.
3730 */ 3729 */
3731 memset(buf, 0, 128);
3732 cpumask_scnprintf(buf, 128, conn->conn_cpumask);
3733 set_cpus_allowed_ptr(p, conn->conn_cpumask); 3730 set_cpus_allowed_ptr(p, conn->conn_cpumask);
3734} 3731}
3735 3732
@@ -4326,8 +4323,7 @@ int iscsit_close_connection(
4326 if (conn->conn_tx_hash.tfm) 4323 if (conn->conn_tx_hash.tfm)
4327 crypto_free_hash(conn->conn_tx_hash.tfm); 4324 crypto_free_hash(conn->conn_tx_hash.tfm);
4328 4325
4329 if (conn->conn_cpumask) 4326 free_cpumask_var(conn->conn_cpumask);
4330 free_cpumask_var(conn->conn_cpumask);
4331 4327
4332 kfree(conn->conn_ops); 4328 kfree(conn->conn_ops);
4333 conn->conn_ops = NULL; 4329 conn->conn_ops = NULL;
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index ae03f3e5de1e..9059c1e0b26e 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -669,12 +669,10 @@ static ssize_t lio_target_nacl_show_info(
669 } else { 669 } else {
670 sess = se_sess->fabric_sess_ptr; 670 sess = se_sess->fabric_sess_ptr;
671 671
672 if (sess->sess_ops->InitiatorName) 672 rb += sprintf(page+rb, "InitiatorName: %s\n",
673 rb += sprintf(page+rb, "InitiatorName: %s\n", 673 sess->sess_ops->InitiatorName);
674 sess->sess_ops->InitiatorName); 674 rb += sprintf(page+rb, "InitiatorAlias: %s\n",
675 if (sess->sess_ops->InitiatorAlias) 675 sess->sess_ops->InitiatorAlias);
676 rb += sprintf(page+rb, "InitiatorAlias: %s\n",
677 sess->sess_ops->InitiatorAlias);
678 676
679 rb += sprintf(page+rb, "LIO Session ID: %u " 677 rb += sprintf(page+rb, "LIO Session ID: %u "
680 "ISID: 0x%02x %02x %02x %02x %02x %02x " 678 "ISID: 0x%02x %02x %02x %02x %02x %02x "
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index 0d1e6ee3e992..a0ae5fc0ad75 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -345,7 +345,6 @@ static int iscsit_dataout_check_datasn(
345 struct iscsi_cmd *cmd, 345 struct iscsi_cmd *cmd,
346 unsigned char *buf) 346 unsigned char *buf)
347{ 347{
348 int dump = 0, recovery = 0;
349 u32 data_sn = 0; 348 u32 data_sn = 0;
350 struct iscsi_conn *conn = cmd->conn; 349 struct iscsi_conn *conn = cmd->conn;
351 struct iscsi_data *hdr = (struct iscsi_data *) buf; 350 struct iscsi_data *hdr = (struct iscsi_data *) buf;
@@ -370,13 +369,11 @@ static int iscsit_dataout_check_datasn(
370 pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x" 369 pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
371 " higher than expected 0x%08x.\n", cmd->init_task_tag, 370 " higher than expected 0x%08x.\n", cmd->init_task_tag,
372 be32_to_cpu(hdr->datasn), data_sn); 371 be32_to_cpu(hdr->datasn), data_sn);
373 recovery = 1;
374 goto recover; 372 goto recover;
375 } else if (be32_to_cpu(hdr->datasn) < data_sn) { 373 } else if (be32_to_cpu(hdr->datasn) < data_sn) {
376 pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x" 374 pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x"
377 " lower than expected 0x%08x, discarding payload.\n", 375 " lower than expected 0x%08x, discarding payload.\n",
378 cmd->init_task_tag, be32_to_cpu(hdr->datasn), data_sn); 376 cmd->init_task_tag, be32_to_cpu(hdr->datasn), data_sn);
379 dump = 1;
380 goto dump; 377 goto dump;
381 } 378 }
382 379
@@ -392,8 +389,7 @@ dump:
392 if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) 389 if (iscsit_dump_data_payload(conn, payload_length, 1) < 0)
393 return DATAOUT_CANNOT_RECOVER; 390 return DATAOUT_CANNOT_RECOVER;
394 391
395 return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : 392 return DATAOUT_WITHIN_COMMAND_RECOVERY;
396 DATAOUT_NORMAL;
397} 393}
398 394
399static int iscsit_dataout_pre_datapduinorder_yes( 395static int iscsit_dataout_pre_datapduinorder_yes(
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 5e71ac609418..480f2e0ecc11 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -978,8 +978,7 @@ int iscsit_setup_np(
978 return 0; 978 return 0;
979fail: 979fail:
980 np->np_socket = NULL; 980 np->np_socket = NULL;
981 if (sock) 981 sock_release(sock);
982 sock_release(sock);
983 return ret; 982 return ret;
984} 983}
985 984
@@ -1190,8 +1189,7 @@ old_sess_out:
1190 if (!IS_ERR(conn->conn_tx_hash.tfm)) 1189 if (!IS_ERR(conn->conn_tx_hash.tfm))
1191 crypto_free_hash(conn->conn_tx_hash.tfm); 1190 crypto_free_hash(conn->conn_tx_hash.tfm);
1192 1191
1193 if (conn->conn_cpumask) 1192 free_cpumask_var(conn->conn_cpumask);
1194 free_cpumask_var(conn->conn_cpumask);
1195 1193
1196 kfree(conn->conn_ops); 1194 kfree(conn->conn_ops);
1197 1195
@@ -1268,8 +1266,6 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
1268 iscsit_put_transport(conn->conn_transport); 1266 iscsit_put_transport(conn->conn_transport);
1269 kfree(conn); 1267 kfree(conn);
1270 conn = NULL; 1268 conn = NULL;
1271 if (ret == -ENODEV)
1272 goto out;
1273 /* Get another socket */ 1269 /* Get another socket */
1274 return 1; 1270 return 1;
1275 } 1271 }
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 73355f4fca74..ce87ce9bdb9c 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1481,8 +1481,9 @@ void iscsit_collect_login_stats(
1481 if (conn->param_list) 1481 if (conn->param_list)
1482 intrname = iscsi_find_param_from_key(INITIATORNAME, 1482 intrname = iscsi_find_param_from_key(INITIATORNAME,
1483 conn->param_list); 1483 conn->param_list);
1484 strcpy(ls->last_intr_fail_name, 1484 strlcpy(ls->last_intr_fail_name,
1485 (intrname ? intrname->value : "Unknown")); 1485 (intrname ? intrname->value : "Unknown"),
1486 sizeof(ls->last_intr_fail_name));
1486 1487
1487 ls->last_intr_fail_ip_family = conn->login_family; 1488 ls->last_intr_fail_ip_family = conn->login_family;
1488 1489
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 340de9d92b15..ab3ab27d49b7 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -153,18 +153,11 @@ static int tcm_loop_change_queue_type(struct scsi_device *sdev, int tag)
153/* 153/*
154 * Locate the SAM Task Attr from struct scsi_cmnd * 154 * Locate the SAM Task Attr from struct scsi_cmnd *
155 */ 155 */
156static int tcm_loop_sam_attr(struct scsi_cmnd *sc) 156static int tcm_loop_sam_attr(struct scsi_cmnd *sc, int tag)
157{ 157{
158 if (sc->device->tagged_supported) { 158 if (sc->device->tagged_supported &&
159 switch (sc->tag) { 159 sc->device->ordered_tags && tag >= 0)
160 case HEAD_OF_QUEUE_TAG: 160 return MSG_ORDERED_TAG;
161 return MSG_HEAD_TAG;
162 case ORDERED_QUEUE_TAG:
163 return MSG_ORDERED_TAG;
164 default:
165 break;
166 }
167 }
168 161
169 return MSG_SIMPLE_TAG; 162 return MSG_SIMPLE_TAG;
170} 163}
@@ -227,7 +220,7 @@ static void tcm_loop_submission_work(struct work_struct *work)
227 220
228 rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd, 221 rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd,
229 &tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun, 222 &tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun,
230 transfer_length, tcm_loop_sam_attr(sc), 223 transfer_length, tcm_loop_sam_attr(sc, tl_cmd->sc_cmd_tag),
231 sc->sc_data_direction, 0, 224 sc->sc_data_direction, 0,
232 scsi_sglist(sc), scsi_sg_count(sc), 225 scsi_sglist(sc), scsi_sg_count(sc),
233 sgl_bidi, sgl_bidi_count, 226 sgl_bidi, sgl_bidi_count,
@@ -266,7 +259,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
266 } 259 }
267 260
268 tl_cmd->sc = sc; 261 tl_cmd->sc = sc;
269 tl_cmd->sc_cmd_tag = sc->tag; 262 tl_cmd->sc_cmd_tag = sc->request->tag;
270 INIT_WORK(&tl_cmd->work, tcm_loop_submission_work); 263 INIT_WORK(&tl_cmd->work, tcm_loop_submission_work);
271 queue_work(tcm_loop_workqueue, &tl_cmd->work); 264 queue_work(tcm_loop_workqueue, &tl_cmd->work);
272 return 0; 265 return 0;
@@ -370,7 +363,7 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc)
370 */ 363 */
371 tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; 364 tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
372 ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, 365 ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
373 sc->tag, TMR_ABORT_TASK); 366 sc->request->tag, TMR_ABORT_TASK);
374 return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; 367 return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
375} 368}
376 369
@@ -960,8 +953,7 @@ static int tcm_loop_port_link(
960 struct tcm_loop_tpg, tl_se_tpg); 953 struct tcm_loop_tpg, tl_se_tpg);
961 struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba; 954 struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
962 955
963 atomic_inc(&tl_tpg->tl_tpg_port_count); 956 atomic_inc_mb(&tl_tpg->tl_tpg_port_count);
964 smp_mb__after_atomic();
965 /* 957 /*
966 * Add Linux/SCSI struct scsi_device by HCTL 958 * Add Linux/SCSI struct scsi_device by HCTL
967 */ 959 */
@@ -995,8 +987,7 @@ static void tcm_loop_port_unlink(
995 scsi_remove_device(sd); 987 scsi_remove_device(sd);
996 scsi_device_put(sd); 988 scsi_device_put(sd);
997 989
998 atomic_dec(&tl_tpg->tl_tpg_port_count); 990 atomic_dec_mb(&tl_tpg->tl_tpg_port_count);
999 smp_mb__after_atomic();
1000 991
1001 pr_debug("TCM_Loop_ConfigFS: Port Unlink Successful\n"); 992 pr_debug("TCM_Loop_ConfigFS: Port Unlink Successful\n");
1002} 993}
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fbc5ebb5f761..fb87780929d2 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -392,8 +392,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
392 if (tg_pt_id != tg_pt_gp->tg_pt_gp_id) 392 if (tg_pt_id != tg_pt_gp->tg_pt_gp_id)
393 continue; 393 continue;
394 394
395 atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); 395 atomic_inc_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
396 smp_mb__after_atomic();
397 396
398 spin_unlock(&dev->t10_alua.tg_pt_gps_lock); 397 spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
399 398
@@ -403,8 +402,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
403 found = true; 402 found = true;
404 403
405 spin_lock(&dev->t10_alua.tg_pt_gps_lock); 404 spin_lock(&dev->t10_alua.tg_pt_gps_lock);
406 atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); 405 atomic_dec_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
407 smp_mb__after_atomic();
408 break; 406 break;
409 } 407 }
410 spin_unlock(&dev->t10_alua.tg_pt_gps_lock); 408 spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
@@ -998,8 +996,7 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
998 * every I_T nexus other than the I_T nexus on which the SET 996 * every I_T nexus other than the I_T nexus on which the SET
999 * TARGET PORT GROUPS command 997 * TARGET PORT GROUPS command
1000 */ 998 */
1001 atomic_inc(&mem->tg_pt_gp_mem_ref_cnt); 999 atomic_inc_mb(&mem->tg_pt_gp_mem_ref_cnt);
1002 smp_mb__after_atomic();
1003 spin_unlock(&tg_pt_gp->tg_pt_gp_lock); 1000 spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
1004 1001
1005 spin_lock_bh(&port->sep_alua_lock); 1002 spin_lock_bh(&port->sep_alua_lock);
@@ -1028,8 +1025,7 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
1028 spin_unlock_bh(&port->sep_alua_lock); 1025 spin_unlock_bh(&port->sep_alua_lock);
1029 1026
1030 spin_lock(&tg_pt_gp->tg_pt_gp_lock); 1027 spin_lock(&tg_pt_gp->tg_pt_gp_lock);
1031 atomic_dec(&mem->tg_pt_gp_mem_ref_cnt); 1028 atomic_dec_mb(&mem->tg_pt_gp_mem_ref_cnt);
1032 smp_mb__after_atomic();
1033 } 1029 }
1034 spin_unlock(&tg_pt_gp->tg_pt_gp_lock); 1030 spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
1035 /* 1031 /*
@@ -1063,7 +1059,6 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
1063 core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state)); 1059 core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
1064 spin_lock(&dev->t10_alua.tg_pt_gps_lock); 1060 spin_lock(&dev->t10_alua.tg_pt_gps_lock);
1065 atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); 1061 atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
1066 smp_mb__after_atomic();
1067 spin_unlock(&dev->t10_alua.tg_pt_gps_lock); 1062 spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
1068 1063
1069 if (tg_pt_gp->tg_pt_gp_transition_complete) 1064 if (tg_pt_gp->tg_pt_gp_transition_complete)
@@ -1125,7 +1120,6 @@ static int core_alua_do_transition_tg_pt(
1125 */ 1120 */
1126 spin_lock(&dev->t10_alua.tg_pt_gps_lock); 1121 spin_lock(&dev->t10_alua.tg_pt_gps_lock);
1127 atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); 1122 atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
1128 smp_mb__after_atomic();
1129 spin_unlock(&dev->t10_alua.tg_pt_gps_lock); 1123 spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
1130 1124
1131 if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) { 1125 if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
@@ -1168,7 +1162,6 @@ int core_alua_do_port_transition(
1168 spin_lock(&local_lu_gp_mem->lu_gp_mem_lock); 1162 spin_lock(&local_lu_gp_mem->lu_gp_mem_lock);
1169 lu_gp = local_lu_gp_mem->lu_gp; 1163 lu_gp = local_lu_gp_mem->lu_gp;
1170 atomic_inc(&lu_gp->lu_gp_ref_cnt); 1164 atomic_inc(&lu_gp->lu_gp_ref_cnt);
1171 smp_mb__after_atomic();
1172 spin_unlock(&local_lu_gp_mem->lu_gp_mem_lock); 1165 spin_unlock(&local_lu_gp_mem->lu_gp_mem_lock);
1173 /* 1166 /*
1174 * For storage objects that are members of the 'default_lu_gp', 1167 * For storage objects that are members of the 'default_lu_gp',
@@ -1184,8 +1177,7 @@ int core_alua_do_port_transition(
1184 l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl; 1177 l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl;
1185 rc = core_alua_do_transition_tg_pt(l_tg_pt_gp, 1178 rc = core_alua_do_transition_tg_pt(l_tg_pt_gp,
1186 new_state, explicit); 1179 new_state, explicit);
1187 atomic_dec(&lu_gp->lu_gp_ref_cnt); 1180 atomic_dec_mb(&lu_gp->lu_gp_ref_cnt);
1188 smp_mb__after_atomic();
1189 return rc; 1181 return rc;
1190 } 1182 }
1191 /* 1183 /*
@@ -1198,8 +1190,7 @@ int core_alua_do_port_transition(
1198 lu_gp_mem_list) { 1190 lu_gp_mem_list) {
1199 1191
1200 dev = lu_gp_mem->lu_gp_mem_dev; 1192 dev = lu_gp_mem->lu_gp_mem_dev;
1201 atomic_inc(&lu_gp_mem->lu_gp_mem_ref_cnt); 1193 atomic_inc_mb(&lu_gp_mem->lu_gp_mem_ref_cnt);
1202 smp_mb__after_atomic();
1203 spin_unlock(&lu_gp->lu_gp_lock); 1194 spin_unlock(&lu_gp->lu_gp_lock);
1204 1195
1205 spin_lock(&dev->t10_alua.tg_pt_gps_lock); 1196 spin_lock(&dev->t10_alua.tg_pt_gps_lock);
@@ -1227,8 +1218,7 @@ int core_alua_do_port_transition(
1227 tg_pt_gp->tg_pt_gp_alua_port = NULL; 1218 tg_pt_gp->tg_pt_gp_alua_port = NULL;
1228 tg_pt_gp->tg_pt_gp_alua_nacl = NULL; 1219 tg_pt_gp->tg_pt_gp_alua_nacl = NULL;
1229 } 1220 }
1230 atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); 1221 atomic_inc_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
1231 smp_mb__after_atomic();
1232 spin_unlock(&dev->t10_alua.tg_pt_gps_lock); 1222 spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
1233 /* 1223 /*
1234 * core_alua_do_transition_tg_pt() will always return 1224 * core_alua_do_transition_tg_pt() will always return
@@ -1238,16 +1228,14 @@ int core_alua_do_port_transition(
1238 new_state, explicit); 1228 new_state, explicit);
1239 1229
1240 spin_lock(&dev->t10_alua.tg_pt_gps_lock); 1230 spin_lock(&dev->t10_alua.tg_pt_gps_lock);
1241 atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); 1231 atomic_dec_mb(&tg_pt_gp->tg_pt_gp_ref_cnt);
1242 smp_mb__after_atomic();
1243 if (rc) 1232 if (rc)
1244 break; 1233 break;
1245 } 1234 }
1246 spin_unlock(&dev->t10_alua.tg_pt_gps_lock); 1235 spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
1247 1236
1248 spin_lock(&lu_gp->lu_gp_lock); 1237 spin_lock(&lu_gp->lu_gp_lock);
1249 atomic_dec(&lu_gp_mem->lu_gp_mem_ref_cnt); 1238 atomic_dec_mb(&lu_gp_mem->lu_gp_mem_ref_cnt);
1250 smp_mb__after_atomic();
1251 } 1239 }
1252 spin_unlock(&lu_gp->lu_gp_lock); 1240 spin_unlock(&lu_gp->lu_gp_lock);
1253 1241
@@ -1260,8 +1248,7 @@ int core_alua_do_port_transition(
1260 core_alua_dump_state(new_state)); 1248 core_alua_dump_state(new_state));
1261 } 1249 }
1262 1250
1263 atomic_dec(&lu_gp->lu_gp_ref_cnt); 1251 atomic_dec_mb(&lu_gp->lu_gp_ref_cnt);
1264 smp_mb__after_atomic();
1265 return rc; 1252 return rc;
1266} 1253}
1267 1254
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 756def38c77a..79f9296a08ae 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -665,6 +665,9 @@ SE_DEV_ATTR(is_nonrot, S_IRUGO | S_IWUSR);
665DEF_DEV_ATTRIB(emulate_rest_reord); 665DEF_DEV_ATTRIB(emulate_rest_reord);
666SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR); 666SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR);
667 667
668DEF_DEV_ATTRIB(force_pr_aptpl);
669SE_DEV_ATTR(force_pr_aptpl, S_IRUGO | S_IWUSR);
670
668DEF_DEV_ATTRIB_RO(hw_block_size); 671DEF_DEV_ATTRIB_RO(hw_block_size);
669SE_DEV_ATTR_RO(hw_block_size); 672SE_DEV_ATTR_RO(hw_block_size);
670 673
@@ -719,6 +722,7 @@ static struct configfs_attribute *target_core_dev_attrib_attrs[] = {
719 &target_core_dev_attrib_hw_pi_prot_type.attr, 722 &target_core_dev_attrib_hw_pi_prot_type.attr,
720 &target_core_dev_attrib_pi_prot_format.attr, 723 &target_core_dev_attrib_pi_prot_format.attr,
721 &target_core_dev_attrib_enforce_pr_isids.attr, 724 &target_core_dev_attrib_enforce_pr_isids.attr,
725 &target_core_dev_attrib_force_pr_aptpl.attr,
722 &target_core_dev_attrib_is_nonrot.attr, 726 &target_core_dev_attrib_is_nonrot.attr,
723 &target_core_dev_attrib_emulate_rest_reord.attr, 727 &target_core_dev_attrib_emulate_rest_reord.attr,
724 &target_core_dev_attrib_hw_block_size.attr, 728 &target_core_dev_attrib_hw_block_size.attr,
@@ -1263,7 +1267,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
1263{ 1267{
1264 unsigned char *i_fabric = NULL, *i_port = NULL, *isid = NULL; 1268 unsigned char *i_fabric = NULL, *i_port = NULL, *isid = NULL;
1265 unsigned char *t_fabric = NULL, *t_port = NULL; 1269 unsigned char *t_fabric = NULL, *t_port = NULL;
1266 char *orig, *ptr, *arg_p, *opts; 1270 char *orig, *ptr, *opts;
1267 substring_t args[MAX_OPT_ARGS]; 1271 substring_t args[MAX_OPT_ARGS];
1268 unsigned long long tmp_ll; 1272 unsigned long long tmp_ll;
1269 u64 sa_res_key = 0; 1273 u64 sa_res_key = 0;
@@ -1295,14 +1299,14 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
1295 token = match_token(ptr, tokens, args); 1299 token = match_token(ptr, tokens, args);
1296 switch (token) { 1300 switch (token) {
1297 case Opt_initiator_fabric: 1301 case Opt_initiator_fabric:
1298 i_fabric = match_strdup(&args[0]); 1302 i_fabric = match_strdup(args);
1299 if (!i_fabric) { 1303 if (!i_fabric) {
1300 ret = -ENOMEM; 1304 ret = -ENOMEM;
1301 goto out; 1305 goto out;
1302 } 1306 }
1303 break; 1307 break;
1304 case Opt_initiator_node: 1308 case Opt_initiator_node:
1305 i_port = match_strdup(&args[0]); 1309 i_port = match_strdup(args);
1306 if (!i_port) { 1310 if (!i_port) {
1307 ret = -ENOMEM; 1311 ret = -ENOMEM;
1308 goto out; 1312 goto out;
@@ -1316,7 +1320,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
1316 } 1320 }
1317 break; 1321 break;
1318 case Opt_initiator_sid: 1322 case Opt_initiator_sid:
1319 isid = match_strdup(&args[0]); 1323 isid = match_strdup(args);
1320 if (!isid) { 1324 if (!isid) {
1321 ret = -ENOMEM; 1325 ret = -ENOMEM;
1322 goto out; 1326 goto out;
@@ -1330,15 +1334,9 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
1330 } 1334 }
1331 break; 1335 break;
1332 case Opt_sa_res_key: 1336 case Opt_sa_res_key:
1333 arg_p = match_strdup(&args[0]); 1337 ret = kstrtoull(args->from, 0, &tmp_ll);
1334 if (!arg_p) {
1335 ret = -ENOMEM;
1336 goto out;
1337 }
1338 ret = kstrtoull(arg_p, 0, &tmp_ll);
1339 if (ret < 0) { 1338 if (ret < 0) {
1340 pr_err("kstrtoull() failed for" 1339 pr_err("kstrtoull() failed for sa_res_key=\n");
1341 " sa_res_key=\n");
1342 goto out; 1340 goto out;
1343 } 1341 }
1344 sa_res_key = (u64)tmp_ll; 1342 sa_res_key = (u64)tmp_ll;
@@ -1370,14 +1368,14 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
1370 * PR APTPL Metadata for Target Port 1368 * PR APTPL Metadata for Target Port
1371 */ 1369 */
1372 case Opt_target_fabric: 1370 case Opt_target_fabric:
1373 t_fabric = match_strdup(&args[0]); 1371 t_fabric = match_strdup(args);
1374 if (!t_fabric) { 1372 if (!t_fabric) {
1375 ret = -ENOMEM; 1373 ret = -ENOMEM;
1376 goto out; 1374 goto out;
1377 } 1375 }
1378 break; 1376 break;
1379 case Opt_target_node: 1377 case Opt_target_node:
1380 t_port = match_strdup(&args[0]); 1378 t_port = match_strdup(args);
1381 if (!t_port) { 1379 if (!t_port) {
1382 ret = -ENOMEM; 1380 ret = -ENOMEM;
1383 goto out; 1381 goto out;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 98da90167159..c45f9e907e44 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -224,8 +224,7 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(
224 if (port->sep_rtpi != rtpi) 224 if (port->sep_rtpi != rtpi)
225 continue; 225 continue;
226 226
227 atomic_inc(&deve->pr_ref_count); 227 atomic_inc_mb(&deve->pr_ref_count);
228 smp_mb__after_atomic();
229 spin_unlock_irq(&nacl->device_list_lock); 228 spin_unlock_irq(&nacl->device_list_lock);
230 229
231 return deve; 230 return deve;
@@ -1019,6 +1018,23 @@ int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag)
1019 return 0; 1018 return 0;
1020} 1019}
1021 1020
1021int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag)
1022{
1023 if ((flag != 0) && (flag != 1)) {
1024 printk(KERN_ERR "Illegal value %d\n", flag);
1025 return -EINVAL;
1026 }
1027 if (dev->export_count) {
1028 pr_err("dev[%p]: Unable to set force_pr_aptpl while"
1029 " export_count is %d\n", dev, dev->export_count);
1030 return -EINVAL;
1031 }
1032
1033 dev->dev_attrib.force_pr_aptpl = flag;
1034 pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag);
1035 return 0;
1036}
1037
1022int se_dev_set_is_nonrot(struct se_device *dev, int flag) 1038int se_dev_set_is_nonrot(struct se_device *dev, int flag)
1023{ 1039{
1024 if ((flag != 0) && (flag != 1)) { 1040 if ((flag != 0) && (flag != 1)) {
@@ -1250,24 +1266,16 @@ struct se_lun *core_dev_add_lun(
1250 * 1266 *
1251 * 1267 *
1252 */ 1268 */
1253int core_dev_del_lun( 1269void core_dev_del_lun(
1254 struct se_portal_group *tpg, 1270 struct se_portal_group *tpg,
1255 u32 unpacked_lun) 1271 struct se_lun *lun)
1256{ 1272{
1257 struct se_lun *lun; 1273 pr_debug("%s_TPG[%u]_LUN[%u] - Deactivating %s Logical Unit from"
1258
1259 lun = core_tpg_pre_dellun(tpg, unpacked_lun);
1260 if (IS_ERR(lun))
1261 return PTR_ERR(lun);
1262
1263 core_tpg_post_dellun(tpg, lun);
1264
1265 pr_debug("%s_TPG[%u]_LUN[%u] - Deactivated %s Logical Unit from"
1266 " device object\n", tpg->se_tpg_tfo->get_fabric_name(), 1274 " device object\n", tpg->se_tpg_tfo->get_fabric_name(),
1267 tpg->se_tpg_tfo->tpg_get_tag(tpg), unpacked_lun, 1275 tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
1268 tpg->se_tpg_tfo->get_fabric_name()); 1276 tpg->se_tpg_tfo->get_fabric_name());
1269 1277
1270 return 0; 1278 core_tpg_remove_lun(tpg, lun);
1271} 1279}
1272 1280
1273struct se_lun *core_get_lun_from_tpg(struct se_portal_group *tpg, u32 unpacked_lun) 1281struct se_lun *core_get_lun_from_tpg(struct se_portal_group *tpg, u32 unpacked_lun)
@@ -1396,8 +1404,7 @@ int core_dev_add_initiator_node_lun_acl(
1396 1404
1397 spin_lock(&lun->lun_acl_lock); 1405 spin_lock(&lun->lun_acl_lock);
1398 list_add_tail(&lacl->lacl_list, &lun->lun_acl_list); 1406 list_add_tail(&lacl->lacl_list, &lun->lun_acl_list);
1399 atomic_inc(&lun->lun_acl_count); 1407 atomic_inc_mb(&lun->lun_acl_count);
1400 smp_mb__after_atomic();
1401 spin_unlock(&lun->lun_acl_lock); 1408 spin_unlock(&lun->lun_acl_lock);
1402 1409
1403 pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for " 1410 pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for "
@@ -1409,7 +1416,8 @@ int core_dev_add_initiator_node_lun_acl(
1409 * Check to see if there are any existing persistent reservation APTPL 1416 * Check to see if there are any existing persistent reservation APTPL
1410 * pre-registrations that need to be enabled for this LUN ACL.. 1417 * pre-registrations that need to be enabled for this LUN ACL..
1411 */ 1418 */
1412 core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, lacl); 1419 core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl,
1420 lacl->mapped_lun);
1413 return 0; 1421 return 0;
1414} 1422}
1415 1423
@@ -1430,8 +1438,7 @@ int core_dev_del_initiator_node_lun_acl(
1430 1438
1431 spin_lock(&lun->lun_acl_lock); 1439 spin_lock(&lun->lun_acl_lock);
1432 list_del(&lacl->lacl_list); 1440 list_del(&lacl->lacl_list);
1433 atomic_dec(&lun->lun_acl_count); 1441 atomic_dec_mb(&lun->lun_acl_count);
1434 smp_mb__after_atomic();
1435 spin_unlock(&lun->lun_acl_lock); 1442 spin_unlock(&lun->lun_acl_lock);
1436 1443
1437 core_disable_device_list_for_node(lun, NULL, lacl->mapped_lun, 1444 core_disable_device_list_for_node(lun, NULL, lacl->mapped_lun,
@@ -1554,6 +1561,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
1554 dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC; 1561 dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC;
1555 dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT; 1562 dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT;
1556 dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS; 1563 dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS;
1564 dev->dev_attrib.force_pr_aptpl = DA_FORCE_PR_APTPL;
1557 dev->dev_attrib.is_nonrot = DA_IS_NONROT; 1565 dev->dev_attrib.is_nonrot = DA_IS_NONROT;
1558 dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD; 1566 dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD;
1559 dev->dev_attrib.max_unmap_lba_count = DA_MAX_UNMAP_LBA_COUNT; 1567 dev->dev_attrib.max_unmap_lba_count = DA_MAX_UNMAP_LBA_COUNT;
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 7de9f0475d05..0c3f90130b7d 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -320,7 +320,7 @@ static struct config_group *target_fabric_make_mappedlun(
320 struct se_node_acl, acl_group); 320 struct se_node_acl, acl_group);
321 struct se_portal_group *se_tpg = se_nacl->se_tpg; 321 struct se_portal_group *se_tpg = se_nacl->se_tpg;
322 struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf; 322 struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
323 struct se_lun_acl *lacl; 323 struct se_lun_acl *lacl = NULL;
324 struct config_item *acl_ci; 324 struct config_item *acl_ci;
325 struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL; 325 struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL;
326 char *buf; 326 char *buf;
@@ -406,6 +406,7 @@ static struct config_group *target_fabric_make_mappedlun(
406out: 406out:
407 if (lacl_cg) 407 if (lacl_cg)
408 kfree(lacl_cg->default_groups); 408 kfree(lacl_cg->default_groups);
409 kfree(lacl);
409 kfree(buf); 410 kfree(buf);
410 return ERR_PTR(ret); 411 return ERR_PTR(ret);
411} 412}
@@ -821,7 +822,7 @@ static int target_fabric_port_unlink(
821 tf->tf_ops.fabric_pre_unlink(se_tpg, lun); 822 tf->tf_ops.fabric_pre_unlink(se_tpg, lun);
822 } 823 }
823 824
824 core_dev_del_lun(se_tpg, lun->unpacked_lun); 825 core_dev_del_lun(se_tpg, lun);
825 return 0; 826 return 0;
826} 827}
827 828
@@ -910,16 +911,12 @@ static struct config_group *target_fabric_make_lun(
910 GFP_KERNEL); 911 GFP_KERNEL);
911 if (!port_stat_grp->default_groups) { 912 if (!port_stat_grp->default_groups) {
912 pr_err("Unable to allocate port_stat_grp->default_groups\n"); 913 pr_err("Unable to allocate port_stat_grp->default_groups\n");
913 errno = -ENOMEM; 914 kfree(lun_cg->default_groups);
914 goto out; 915 return ERR_PTR(-ENOMEM);
915 } 916 }
916 target_stat_setup_port_default_groups(lun); 917 target_stat_setup_port_default_groups(lun);
917 918
918 return &lun->lun_group; 919 return &lun->lun_group;
919out:
920 if (lun_cg)
921 kfree(lun_cg->default_groups);
922 return ERR_PTR(errno);
923} 920}
924 921
925static void target_fabric_drop_lun( 922static void target_fabric_drop_lun(
diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index 0d1cf8b4f49f..35bfe77160d8 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c
@@ -394,9 +394,9 @@ char *iscsi_parse_pr_out_transport_id(
394 * If the caller wants the TransportID Length, we set that value for the 394 * If the caller wants the TransportID Length, we set that value for the
395 * entire iSCSI Tarnsport ID now. 395 * entire iSCSI Tarnsport ID now.
396 */ 396 */
397 if (out_tid_len != NULL) { 397 if (out_tid_len) {
398 add_len = ((buf[2] >> 8) & 0xff); 398 /* The shift works thanks to integer promotion rules */
399 add_len |= (buf[3] & 0xff); 399 add_len = (buf[2] << 8) | buf[3];
400 400
401 tid_len = strlen(&buf[4]); 401 tid_len = strlen(&buf[4]);
402 tid_len += 4; /* Add four bytes for iSCSI Transport ID header */ 402 tid_len += 4; /* Add four bytes for iSCSI Transport ID header */
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 7d6cddaec525..72c83d98662b 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -415,7 +415,7 @@ fd_execute_sync_cache(struct se_cmd *cmd)
415 } else { 415 } else {
416 start = cmd->t_task_lba * dev->dev_attrib.block_size; 416 start = cmd->t_task_lba * dev->dev_attrib.block_size;
417 if (cmd->data_length) 417 if (cmd->data_length)
418 end = start + cmd->data_length; 418 end = start + cmd->data_length - 1;
419 else 419 else
420 end = LLONG_MAX; 420 end = LLONG_MAX;
421 } 421 }
@@ -680,7 +680,12 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
680 struct fd_dev *fd_dev = FD_DEV(dev); 680 struct fd_dev *fd_dev = FD_DEV(dev);
681 loff_t start = cmd->t_task_lba * 681 loff_t start = cmd->t_task_lba *
682 dev->dev_attrib.block_size; 682 dev->dev_attrib.block_size;
683 loff_t end = start + cmd->data_length; 683 loff_t end;
684
685 if (cmd->data_length)
686 end = start + cmd->data_length - 1;
687 else
688 end = LLONG_MAX;
684 689
685 vfs_fsync_range(fd_dev->fd_file, start, end, 1); 690 vfs_fsync_range(fd_dev->fd_file, start, end, 1);
686 } 691 }
@@ -762,7 +767,9 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev,
762 fd_dev->fbd_flags |= FBDF_HAS_SIZE; 767 fd_dev->fbd_flags |= FBDF_HAS_SIZE;
763 break; 768 break;
764 case Opt_fd_buffered_io: 769 case Opt_fd_buffered_io:
765 match_int(args, &arg); 770 ret = match_int(args, &arg);
771 if (ret)
772 goto out;
766 if (arg != 1) { 773 if (arg != 1) {
767 pr_err("bogus fd_buffered_io=%d value\n", arg); 774 pr_err("bogus fd_buffered_io=%d value\n", arg);
768 ret = -EINVAL; 775 ret = -EINVAL;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index de9cab708f45..e31f42f369ff 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -38,6 +38,7 @@ int se_dev_set_emulate_3pc(struct se_device *, int);
38int se_dev_set_pi_prot_type(struct se_device *, int); 38int se_dev_set_pi_prot_type(struct se_device *, int);
39int se_dev_set_pi_prot_format(struct se_device *, int); 39int se_dev_set_pi_prot_format(struct se_device *, int);
40int se_dev_set_enforce_pr_isids(struct se_device *, int); 40int se_dev_set_enforce_pr_isids(struct se_device *, int);
41int se_dev_set_force_pr_aptpl(struct se_device *, int);
41int se_dev_set_is_nonrot(struct se_device *, int); 42int se_dev_set_is_nonrot(struct se_device *, int);
42int se_dev_set_emulate_rest_reord(struct se_device *dev, int); 43int se_dev_set_emulate_rest_reord(struct se_device *dev, int);
43int se_dev_set_queue_depth(struct se_device *, u32); 44int se_dev_set_queue_depth(struct se_device *, u32);
@@ -46,7 +47,7 @@ int se_dev_set_fabric_max_sectors(struct se_device *, u32);
46int se_dev_set_optimal_sectors(struct se_device *, u32); 47int se_dev_set_optimal_sectors(struct se_device *, u32);
47int se_dev_set_block_size(struct se_device *, u32); 48int se_dev_set_block_size(struct se_device *, u32);
48struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32); 49struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32);
49int core_dev_del_lun(struct se_portal_group *, u32); 50void core_dev_del_lun(struct se_portal_group *, struct se_lun *);
50struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32); 51struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32);
51struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *, 52struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *,
52 struct se_node_acl *, u32, int *); 53 struct se_node_acl *, u32, int *);
@@ -82,8 +83,7 @@ void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *);
82struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32); 83struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u32);
83int core_tpg_add_lun(struct se_portal_group *, struct se_lun *, 84int core_tpg_add_lun(struct se_portal_group *, struct se_lun *,
84 u32, struct se_device *); 85 u32, struct se_device *);
85struct se_lun *core_tpg_pre_dellun(struct se_portal_group *, u32 unpacked_lun); 86void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *);
86int core_tpg_post_dellun(struct se_portal_group *, struct se_lun *);
87 87
88/* target_core_transport.c */ 88/* target_core_transport.c */
89extern struct kmem_cache *se_tmr_req_cache; 89extern struct kmem_cache *se_tmr_req_cache;
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index df357862286e..8c60a1a1ae8d 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -674,8 +674,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
674 */ 674 */
675 spin_lock(&dev->se_port_lock); 675 spin_lock(&dev->se_port_lock);
676 list_for_each_entry_safe(port, port_tmp, &dev->dev_sep_list, sep_list) { 676 list_for_each_entry_safe(port, port_tmp, &dev->dev_sep_list, sep_list) {
677 atomic_inc(&port->sep_tg_pt_ref_cnt); 677 atomic_inc_mb(&port->sep_tg_pt_ref_cnt);
678 smp_mb__after_atomic();
679 spin_unlock(&dev->se_port_lock); 678 spin_unlock(&dev->se_port_lock);
680 679
681 spin_lock_bh(&port->sep_alua_lock); 680 spin_lock_bh(&port->sep_alua_lock);
@@ -709,8 +708,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
709 if (strcmp(nacl->initiatorname, nacl_tmp->initiatorname)) 708 if (strcmp(nacl->initiatorname, nacl_tmp->initiatorname))
710 continue; 709 continue;
711 710
712 atomic_inc(&deve_tmp->pr_ref_count); 711 atomic_inc_mb(&deve_tmp->pr_ref_count);
713 smp_mb__after_atomic();
714 spin_unlock_bh(&port->sep_alua_lock); 712 spin_unlock_bh(&port->sep_alua_lock);
715 /* 713 /*
716 * Grab a configfs group dependency that is released 714 * Grab a configfs group dependency that is released
@@ -722,10 +720,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
722 if (ret < 0) { 720 if (ret < 0) {
723 pr_err("core_scsi3_lunacl_depend" 721 pr_err("core_scsi3_lunacl_depend"
724 "_item() failed\n"); 722 "_item() failed\n");
725 atomic_dec(&port->sep_tg_pt_ref_cnt); 723 atomic_dec_mb(&port->sep_tg_pt_ref_cnt);
726 smp_mb__after_atomic(); 724 atomic_dec_mb(&deve_tmp->pr_ref_count);
727 atomic_dec(&deve_tmp->pr_ref_count);
728 smp_mb__after_atomic();
729 goto out; 725 goto out;
730 } 726 }
731 /* 727 /*
@@ -739,10 +735,8 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
739 nacl_tmp, deve_tmp, NULL, 735 nacl_tmp, deve_tmp, NULL,
740 sa_res_key, all_tg_pt, aptpl); 736 sa_res_key, all_tg_pt, aptpl);
741 if (!pr_reg_atp) { 737 if (!pr_reg_atp) {
742 atomic_dec(&port->sep_tg_pt_ref_cnt); 738 atomic_dec_mb(&port->sep_tg_pt_ref_cnt);
743 smp_mb__after_atomic(); 739 atomic_dec_mb(&deve_tmp->pr_ref_count);
744 atomic_dec(&deve_tmp->pr_ref_count);
745 smp_mb__after_atomic();
746 core_scsi3_lunacl_undepend_item(deve_tmp); 740 core_scsi3_lunacl_undepend_item(deve_tmp);
747 goto out; 741 goto out;
748 } 742 }
@@ -754,8 +748,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
754 spin_unlock_bh(&port->sep_alua_lock); 748 spin_unlock_bh(&port->sep_alua_lock);
755 749
756 spin_lock(&dev->se_port_lock); 750 spin_lock(&dev->se_port_lock);
757 atomic_dec(&port->sep_tg_pt_ref_cnt); 751 atomic_dec_mb(&port->sep_tg_pt_ref_cnt);
758 smp_mb__after_atomic();
759 } 752 }
760 spin_unlock(&dev->se_port_lock); 753 spin_unlock(&dev->se_port_lock);
761 754
@@ -902,6 +895,7 @@ static int __core_scsi3_check_aptpl_registration(
902 spin_lock(&pr_tmpl->aptpl_reg_lock); 895 spin_lock(&pr_tmpl->aptpl_reg_lock);
903 list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->aptpl_reg_list, 896 list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->aptpl_reg_list,
904 pr_reg_aptpl_list) { 897 pr_reg_aptpl_list) {
898
905 if (!strcmp(pr_reg->pr_iport, i_port) && 899 if (!strcmp(pr_reg->pr_iport, i_port) &&
906 (pr_reg->pr_res_mapped_lun == deve->mapped_lun) && 900 (pr_reg->pr_res_mapped_lun == deve->mapped_lun) &&
907 !(strcmp(pr_reg->pr_tport, t_port)) && 901 !(strcmp(pr_reg->pr_tport, t_port)) &&
@@ -944,10 +938,10 @@ int core_scsi3_check_aptpl_registration(
944 struct se_device *dev, 938 struct se_device *dev,
945 struct se_portal_group *tpg, 939 struct se_portal_group *tpg,
946 struct se_lun *lun, 940 struct se_lun *lun,
947 struct se_lun_acl *lun_acl) 941 struct se_node_acl *nacl,
942 u32 mapped_lun)
948{ 943{
949 struct se_node_acl *nacl = lun_acl->se_lun_nacl; 944 struct se_dev_entry *deve = nacl->device_list[mapped_lun];
950 struct se_dev_entry *deve = nacl->device_list[lun_acl->mapped_lun];
951 945
952 if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) 946 if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
953 return 0; 947 return 0;
@@ -1109,8 +1103,7 @@ static struct t10_pr_registration *__core_scsi3_locate_pr_reg(
1109 if (dev->dev_attrib.enforce_pr_isids) 1103 if (dev->dev_attrib.enforce_pr_isids)
1110 continue; 1104 continue;
1111 } 1105 }
1112 atomic_inc(&pr_reg->pr_res_holders); 1106 atomic_inc_mb(&pr_reg->pr_res_holders);
1113 smp_mb__after_atomic();
1114 spin_unlock(&pr_tmpl->registration_lock); 1107 spin_unlock(&pr_tmpl->registration_lock);
1115 return pr_reg; 1108 return pr_reg;
1116 } 1109 }
@@ -1124,8 +1117,7 @@ static struct t10_pr_registration *__core_scsi3_locate_pr_reg(
1124 if (strcmp(isid, pr_reg->pr_reg_isid)) 1117 if (strcmp(isid, pr_reg->pr_reg_isid))
1125 continue; 1118 continue;
1126 1119
1127 atomic_inc(&pr_reg->pr_res_holders); 1120 atomic_inc_mb(&pr_reg->pr_res_holders);
1128 smp_mb__after_atomic();
1129 spin_unlock(&pr_tmpl->registration_lock); 1121 spin_unlock(&pr_tmpl->registration_lock);
1130 return pr_reg; 1122 return pr_reg;
1131 } 1123 }
@@ -1154,8 +1146,7 @@ static struct t10_pr_registration *core_scsi3_locate_pr_reg(
1154 1146
1155static void core_scsi3_put_pr_reg(struct t10_pr_registration *pr_reg) 1147static void core_scsi3_put_pr_reg(struct t10_pr_registration *pr_reg)
1156{ 1148{
1157 atomic_dec(&pr_reg->pr_res_holders); 1149 atomic_dec_mb(&pr_reg->pr_res_holders);
1158 smp_mb__after_atomic();
1159} 1150}
1160 1151
1161static int core_scsi3_check_implicit_release( 1152static int core_scsi3_check_implicit_release(
@@ -1348,8 +1339,7 @@ static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg)
1348 configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, 1339 configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
1349 &tpg->tpg_group.cg_item); 1340 &tpg->tpg_group.cg_item);
1350 1341
1351 atomic_dec(&tpg->tpg_pr_ref_count); 1342 atomic_dec_mb(&tpg->tpg_pr_ref_count);
1352 smp_mb__after_atomic();
1353} 1343}
1354 1344
1355static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl) 1345static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl)
@@ -1368,16 +1358,14 @@ static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl)
1368 struct se_portal_group *tpg = nacl->se_tpg; 1358 struct se_portal_group *tpg = nacl->se_tpg;
1369 1359
1370 if (nacl->dynamic_node_acl) { 1360 if (nacl->dynamic_node_acl) {
1371 atomic_dec(&nacl->acl_pr_ref_count); 1361 atomic_dec_mb(&nacl->acl_pr_ref_count);
1372 smp_mb__after_atomic();
1373 return; 1362 return;
1374 } 1363 }
1375 1364
1376 configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, 1365 configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
1377 &nacl->acl_group.cg_item); 1366 &nacl->acl_group.cg_item);
1378 1367
1379 atomic_dec(&nacl->acl_pr_ref_count); 1368 atomic_dec_mb(&nacl->acl_pr_ref_count);
1380 smp_mb__after_atomic();
1381} 1369}
1382 1370
1383static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve) 1371static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve)
@@ -1407,8 +1395,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
1407 * For nacl->dynamic_node_acl=1 1395 * For nacl->dynamic_node_acl=1
1408 */ 1396 */
1409 if (!lun_acl) { 1397 if (!lun_acl) {
1410 atomic_dec(&se_deve->pr_ref_count); 1398 atomic_dec_mb(&se_deve->pr_ref_count);
1411 smp_mb__after_atomic();
1412 return; 1399 return;
1413 } 1400 }
1414 nacl = lun_acl->se_lun_nacl; 1401 nacl = lun_acl->se_lun_nacl;
@@ -1417,8 +1404,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
1417 configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys, 1404 configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
1418 &lun_acl->se_lun_group.cg_item); 1405 &lun_acl->se_lun_group.cg_item);
1419 1406
1420 atomic_dec(&se_deve->pr_ref_count); 1407 atomic_dec_mb(&se_deve->pr_ref_count);
1421 smp_mb__after_atomic();
1422} 1408}
1423 1409
1424static sense_reason_t 1410static sense_reason_t
@@ -1551,15 +1537,13 @@ core_scsi3_decode_spec_i_port(
1551 if (!i_str) 1537 if (!i_str)
1552 continue; 1538 continue;
1553 1539
1554 atomic_inc(&tmp_tpg->tpg_pr_ref_count); 1540 atomic_inc_mb(&tmp_tpg->tpg_pr_ref_count);
1555 smp_mb__after_atomic();
1556 spin_unlock(&dev->se_port_lock); 1541 spin_unlock(&dev->se_port_lock);
1557 1542
1558 if (core_scsi3_tpg_depend_item(tmp_tpg)) { 1543 if (core_scsi3_tpg_depend_item(tmp_tpg)) {
1559 pr_err(" core_scsi3_tpg_depend_item()" 1544 pr_err(" core_scsi3_tpg_depend_item()"
1560 " for tmp_tpg\n"); 1545 " for tmp_tpg\n");
1561 atomic_dec(&tmp_tpg->tpg_pr_ref_count); 1546 atomic_dec_mb(&tmp_tpg->tpg_pr_ref_count);
1562 smp_mb__after_atomic();
1563 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1547 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1564 goto out_unmap; 1548 goto out_unmap;
1565 } 1549 }
@@ -1571,10 +1555,8 @@ core_scsi3_decode_spec_i_port(
1571 spin_lock_irq(&tmp_tpg->acl_node_lock); 1555 spin_lock_irq(&tmp_tpg->acl_node_lock);
1572 dest_node_acl = __core_tpg_get_initiator_node_acl( 1556 dest_node_acl = __core_tpg_get_initiator_node_acl(
1573 tmp_tpg, i_str); 1557 tmp_tpg, i_str);
1574 if (dest_node_acl) { 1558 if (dest_node_acl)
1575 atomic_inc(&dest_node_acl->acl_pr_ref_count); 1559 atomic_inc_mb(&dest_node_acl->acl_pr_ref_count);
1576 smp_mb__after_atomic();
1577 }
1578 spin_unlock_irq(&tmp_tpg->acl_node_lock); 1560 spin_unlock_irq(&tmp_tpg->acl_node_lock);
1579 1561
1580 if (!dest_node_acl) { 1562 if (!dest_node_acl) {
@@ -1586,8 +1568,7 @@ core_scsi3_decode_spec_i_port(
1586 if (core_scsi3_nodeacl_depend_item(dest_node_acl)) { 1568 if (core_scsi3_nodeacl_depend_item(dest_node_acl)) {
1587 pr_err("configfs_depend_item() failed" 1569 pr_err("configfs_depend_item() failed"
1588 " for dest_node_acl->acl_group\n"); 1570 " for dest_node_acl->acl_group\n");
1589 atomic_dec(&dest_node_acl->acl_pr_ref_count); 1571 atomic_dec_mb(&dest_node_acl->acl_pr_ref_count);
1590 smp_mb__after_atomic();
1591 core_scsi3_tpg_undepend_item(tmp_tpg); 1572 core_scsi3_tpg_undepend_item(tmp_tpg);
1592 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1573 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1593 goto out_unmap; 1574 goto out_unmap;
@@ -1646,8 +1627,7 @@ core_scsi3_decode_spec_i_port(
1646 if (core_scsi3_lunacl_depend_item(dest_se_deve)) { 1627 if (core_scsi3_lunacl_depend_item(dest_se_deve)) {
1647 pr_err("core_scsi3_lunacl_depend_item()" 1628 pr_err("core_scsi3_lunacl_depend_item()"
1648 " failed\n"); 1629 " failed\n");
1649 atomic_dec(&dest_se_deve->pr_ref_count); 1630 atomic_dec_mb(&dest_se_deve->pr_ref_count);
1650 smp_mb__after_atomic();
1651 core_scsi3_nodeacl_undepend_item(dest_node_acl); 1631 core_scsi3_nodeacl_undepend_item(dest_node_acl);
1652 core_scsi3_tpg_undepend_item(dest_tpg); 1632 core_scsi3_tpg_undepend_item(dest_tpg);
1653 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1633 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -3167,15 +3147,13 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
3167 if (!dest_tf_ops) 3147 if (!dest_tf_ops)
3168 continue; 3148 continue;
3169 3149
3170 atomic_inc(&dest_se_tpg->tpg_pr_ref_count); 3150 atomic_inc_mb(&dest_se_tpg->tpg_pr_ref_count);
3171 smp_mb__after_atomic();
3172 spin_unlock(&dev->se_port_lock); 3151 spin_unlock(&dev->se_port_lock);
3173 3152
3174 if (core_scsi3_tpg_depend_item(dest_se_tpg)) { 3153 if (core_scsi3_tpg_depend_item(dest_se_tpg)) {
3175 pr_err("core_scsi3_tpg_depend_item() failed" 3154 pr_err("core_scsi3_tpg_depend_item() failed"
3176 " for dest_se_tpg\n"); 3155 " for dest_se_tpg\n");
3177 atomic_dec(&dest_se_tpg->tpg_pr_ref_count); 3156 atomic_dec_mb(&dest_se_tpg->tpg_pr_ref_count);
3178 smp_mb__after_atomic();
3179 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 3157 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
3180 goto out_put_pr_reg; 3158 goto out_put_pr_reg;
3181 } 3159 }
@@ -3271,10 +3249,8 @@ after_iport_check:
3271 spin_lock_irq(&dest_se_tpg->acl_node_lock); 3249 spin_lock_irq(&dest_se_tpg->acl_node_lock);
3272 dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg, 3250 dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg,
3273 initiator_str); 3251 initiator_str);
3274 if (dest_node_acl) { 3252 if (dest_node_acl)
3275 atomic_inc(&dest_node_acl->acl_pr_ref_count); 3253 atomic_inc_mb(&dest_node_acl->acl_pr_ref_count);
3276 smp_mb__after_atomic();
3277 }
3278 spin_unlock_irq(&dest_se_tpg->acl_node_lock); 3254 spin_unlock_irq(&dest_se_tpg->acl_node_lock);
3279 3255
3280 if (!dest_node_acl) { 3256 if (!dest_node_acl) {
@@ -3288,8 +3264,7 @@ after_iport_check:
3288 if (core_scsi3_nodeacl_depend_item(dest_node_acl)) { 3264 if (core_scsi3_nodeacl_depend_item(dest_node_acl)) {
3289 pr_err("core_scsi3_nodeacl_depend_item() for" 3265 pr_err("core_scsi3_nodeacl_depend_item() for"
3290 " dest_node_acl\n"); 3266 " dest_node_acl\n");
3291 atomic_dec(&dest_node_acl->acl_pr_ref_count); 3267 atomic_dec_mb(&dest_node_acl->acl_pr_ref_count);
3292 smp_mb__after_atomic();
3293 dest_node_acl = NULL; 3268 dest_node_acl = NULL;
3294 ret = TCM_INVALID_PARAMETER_LIST; 3269 ret = TCM_INVALID_PARAMETER_LIST;
3295 goto out; 3270 goto out;
@@ -3313,8 +3288,7 @@ after_iport_check:
3313 3288
3314 if (core_scsi3_lunacl_depend_item(dest_se_deve)) { 3289 if (core_scsi3_lunacl_depend_item(dest_se_deve)) {
3315 pr_err("core_scsi3_lunacl_depend_item() failed\n"); 3290 pr_err("core_scsi3_lunacl_depend_item() failed\n");
3316 atomic_dec(&dest_se_deve->pr_ref_count); 3291 atomic_dec_mb(&dest_se_deve->pr_ref_count);
3317 smp_mb__after_atomic();
3318 dest_se_deve = NULL; 3292 dest_se_deve = NULL;
3319 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 3293 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
3320 goto out; 3294 goto out;
@@ -3497,6 +3471,7 @@ static unsigned long long core_scsi3_extract_reservation_key(unsigned char *cdb)
3497sense_reason_t 3471sense_reason_t
3498target_scsi3_emulate_pr_out(struct se_cmd *cmd) 3472target_scsi3_emulate_pr_out(struct se_cmd *cmd)
3499{ 3473{
3474 struct se_device *dev = cmd->se_dev;
3500 unsigned char *cdb = &cmd->t_task_cdb[0]; 3475 unsigned char *cdb = &cmd->t_task_cdb[0];
3501 unsigned char *buf; 3476 unsigned char *buf;
3502 u64 res_key, sa_res_key; 3477 u64 res_key, sa_res_key;
@@ -3561,6 +3536,13 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
3561 aptpl = (buf[17] & 0x01); 3536 aptpl = (buf[17] & 0x01);
3562 unreg = (buf[17] & 0x02); 3537 unreg = (buf[17] & 0x02);
3563 } 3538 }
3539 /*
3540 * If the backend device has been configured to force APTPL metadata
3541 * write-out, go ahead and propigate aptpl=1 down now.
3542 */
3543 if (dev->dev_attrib.force_pr_aptpl)
3544 aptpl = 1;
3545
3564 transport_kunmap_data_sg(cmd); 3546 transport_kunmap_data_sg(cmd);
3565 buf = NULL; 3547 buf = NULL;
3566 3548
@@ -3803,7 +3785,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
3803 if (!buf) 3785 if (!buf)
3804 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 3786 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
3805 3787
3806 buf[0] = ((add_len << 8) & 0xff); 3788 buf[0] = ((add_len >> 8) & 0xff);
3807 buf[1] = (add_len & 0xff); 3789 buf[1] = (add_len & 0xff);
3808 buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */ 3790 buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
3809 buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */ 3791 buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
@@ -3879,8 +3861,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
3879 se_tpg = pr_reg->pr_reg_nacl->se_tpg; 3861 se_tpg = pr_reg->pr_reg_nacl->se_tpg;
3880 add_desc_len = 0; 3862 add_desc_len = 0;
3881 3863
3882 atomic_inc(&pr_reg->pr_res_holders); 3864 atomic_inc_mb(&pr_reg->pr_res_holders);
3883 smp_mb__after_atomic();
3884 spin_unlock(&pr_tmpl->registration_lock); 3865 spin_unlock(&pr_tmpl->registration_lock);
3885 /* 3866 /*
3886 * Determine expected length of $FABRIC_MOD specific 3867 * Determine expected length of $FABRIC_MOD specific
@@ -3893,8 +3874,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
3893 pr_warn("SPC-3 PRIN READ_FULL_STATUS ran" 3874 pr_warn("SPC-3 PRIN READ_FULL_STATUS ran"
3894 " out of buffer: %d\n", cmd->data_length); 3875 " out of buffer: %d\n", cmd->data_length);
3895 spin_lock(&pr_tmpl->registration_lock); 3876 spin_lock(&pr_tmpl->registration_lock);
3896 atomic_dec(&pr_reg->pr_res_holders); 3877 atomic_dec_mb(&pr_reg->pr_res_holders);
3897 smp_mb__after_atomic();
3898 break; 3878 break;
3899 } 3879 }
3900 /* 3880 /*
@@ -3955,8 +3935,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
3955 se_nacl, pr_reg, &format_code, &buf[off+4]); 3935 se_nacl, pr_reg, &format_code, &buf[off+4]);
3956 3936
3957 spin_lock(&pr_tmpl->registration_lock); 3937 spin_lock(&pr_tmpl->registration_lock);
3958 atomic_dec(&pr_reg->pr_res_holders); 3938 atomic_dec_mb(&pr_reg->pr_res_holders);
3959 smp_mb__after_atomic();
3960 /* 3939 /*
3961 * Set the ADDITIONAL DESCRIPTOR LENGTH 3940 * Set the ADDITIONAL DESCRIPTOR LENGTH
3962 */ 3941 */
diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h
index 2ee2936fa0bd..749fd7bb7510 100644
--- a/drivers/target/target_core_pr.h
+++ b/drivers/target/target_core_pr.h
@@ -60,7 +60,7 @@ extern int core_scsi3_alloc_aptpl_registration(
60 unsigned char *, u16, u32, int, int, u8); 60 unsigned char *, u16, u32, int, int, u8);
61extern int core_scsi3_check_aptpl_registration(struct se_device *, 61extern int core_scsi3_check_aptpl_registration(struct se_device *,
62 struct se_portal_group *, struct se_lun *, 62 struct se_portal_group *, struct se_lun *,
63 struct se_lun_acl *); 63 struct se_node_acl *, u32);
64extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *, 64extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *,
65 struct se_node_acl *); 65 struct se_node_acl *);
66extern void core_scsi3_free_all_registrations(struct se_device *); 66extern void core_scsi3_free_all_registrations(struct se_device *);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 70d9f6dabba0..7c8291f0bbbc 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -749,14 +749,18 @@ static ssize_t pscsi_set_configfs_dev_params(struct se_device *dev,
749 ret = -EINVAL; 749 ret = -EINVAL;
750 goto out; 750 goto out;
751 } 751 }
752 match_int(args, &arg); 752 ret = match_int(args, &arg);
753 if (ret)
754 goto out;
753 pdv->pdv_host_id = arg; 755 pdv->pdv_host_id = arg;
754 pr_debug("PSCSI[%d]: Referencing SCSI Host ID:" 756 pr_debug("PSCSI[%d]: Referencing SCSI Host ID:"
755 " %d\n", phv->phv_host_id, pdv->pdv_host_id); 757 " %d\n", phv->phv_host_id, pdv->pdv_host_id);
756 pdv->pdv_flags |= PDF_HAS_VIRT_HOST_ID; 758 pdv->pdv_flags |= PDF_HAS_VIRT_HOST_ID;
757 break; 759 break;
758 case Opt_scsi_channel_id: 760 case Opt_scsi_channel_id:
759 match_int(args, &arg); 761 ret = match_int(args, &arg);
762 if (ret)
763 goto out;
760 pdv->pdv_channel_id = arg; 764 pdv->pdv_channel_id = arg;
761 pr_debug("PSCSI[%d]: Referencing SCSI Channel" 765 pr_debug("PSCSI[%d]: Referencing SCSI Channel"
762 " ID: %d\n", phv->phv_host_id, 766 " ID: %d\n", phv->phv_host_id,
@@ -764,7 +768,9 @@ static ssize_t pscsi_set_configfs_dev_params(struct se_device *dev,
764 pdv->pdv_flags |= PDF_HAS_CHANNEL_ID; 768 pdv->pdv_flags |= PDF_HAS_CHANNEL_ID;
765 break; 769 break;
766 case Opt_scsi_target_id: 770 case Opt_scsi_target_id:
767 match_int(args, &arg); 771 ret = match_int(args, &arg);
772 if (ret)
773 goto out;
768 pdv->pdv_target_id = arg; 774 pdv->pdv_target_id = arg;
769 pr_debug("PSCSI[%d]: Referencing SCSI Target" 775 pr_debug("PSCSI[%d]: Referencing SCSI Target"
770 " ID: %d\n", phv->phv_host_id, 776 " ID: %d\n", phv->phv_host_id,
@@ -772,7 +778,9 @@ static ssize_t pscsi_set_configfs_dev_params(struct se_device *dev,
772 pdv->pdv_flags |= PDF_HAS_TARGET_ID; 778 pdv->pdv_flags |= PDF_HAS_TARGET_ID;
773 break; 779 break;
774 case Opt_scsi_lun_id: 780 case Opt_scsi_lun_id:
775 match_int(args, &arg); 781 ret = match_int(args, &arg);
782 if (ret)
783 goto out;
776 pdv->pdv_lun_id = arg; 784 pdv->pdv_lun_id = arg;
777 pr_debug("PSCSI[%d]: Referencing SCSI LUN ID:" 785 pr_debug("PSCSI[%d]: Referencing SCSI LUN ID:"
778 " %d\n", phv->phv_host_id, pdv->pdv_lun_id); 786 " %d\n", phv->phv_host_id, pdv->pdv_lun_id);
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index bd78d9235ac6..ebe62afb957d 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -948,7 +948,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
948 } 948 }
949 949
950 /* reject any command that we don't have a handler for */ 950 /* reject any command that we don't have a handler for */
951 if (!(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) && !cmd->execute_cmd) 951 if (!cmd->execute_cmd)
952 return TCM_UNSUPPORTED_SCSI_OPCODE; 952 return TCM_UNSUPPORTED_SCSI_OPCODE;
953 953
954 if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { 954 if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) {
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index f7cd95e8111a..fa5e157db47b 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -64,21 +64,17 @@ int core_tmr_alloc_req(
64} 64}
65EXPORT_SYMBOL(core_tmr_alloc_req); 65EXPORT_SYMBOL(core_tmr_alloc_req);
66 66
67void core_tmr_release_req( 67void core_tmr_release_req(struct se_tmr_req *tmr)
68 struct se_tmr_req *tmr)
69{ 68{
70 struct se_device *dev = tmr->tmr_dev; 69 struct se_device *dev = tmr->tmr_dev;
71 unsigned long flags; 70 unsigned long flags;
72 71
73 if (!dev) { 72 if (dev) {
74 kfree(tmr); 73 spin_lock_irqsave(&dev->se_tmr_lock, flags);
75 return; 74 list_del(&tmr->tmr_list);
75 spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
76 } 76 }
77 77
78 spin_lock_irqsave(&dev->se_tmr_lock, flags);
79 list_del(&tmr->tmr_list);
80 spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
81
82 kfree(tmr); 78 kfree(tmr);
83} 79}
84 80
@@ -90,9 +86,8 @@ static void core_tmr_handle_tas_abort(
90 bool remove = true; 86 bool remove = true;
91 /* 87 /*
92 * TASK ABORTED status (TAS) bit support 88 * TASK ABORTED status (TAS) bit support
93 */ 89 */
94 if ((tmr_nacl && 90 if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
95 (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
96 remove = false; 91 remove = false;
97 transport_send_task_abort(cmd); 92 transport_send_task_abort(cmd);
98 } 93 }
@@ -120,13 +115,12 @@ void core_tmr_abort_task(
120 struct se_tmr_req *tmr, 115 struct se_tmr_req *tmr,
121 struct se_session *se_sess) 116 struct se_session *se_sess)
122{ 117{
123 struct se_cmd *se_cmd, *tmp_cmd; 118 struct se_cmd *se_cmd;
124 unsigned long flags; 119 unsigned long flags;
125 int ref_tag; 120 int ref_tag;
126 121
127 spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); 122 spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
128 list_for_each_entry_safe(se_cmd, tmp_cmd, 123 list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
129 &se_sess->sess_cmd_list, se_cmd_list) {
130 124
131 if (dev != se_cmd->se_dev) 125 if (dev != se_cmd->se_dev)
132 continue; 126 continue;
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index be783f717f19..0696de9553d3 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -40,6 +40,7 @@
40#include <target/target_core_fabric.h> 40#include <target/target_core_fabric.h>
41 41
42#include "target_core_internal.h" 42#include "target_core_internal.h"
43#include "target_core_pr.h"
43 44
44extern struct se_device *g_lun0_dev; 45extern struct se_device *g_lun0_dev;
45 46
@@ -166,6 +167,13 @@ void core_tpg_add_node_to_devs(
166 167
167 core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun, 168 core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun,
168 lun_access, acl, tpg); 169 lun_access, acl, tpg);
170 /*
171 * Check to see if there are any existing persistent reservation
172 * APTPL pre-registrations that need to be enabled for this dynamic
173 * LUN ACL now..
174 */
175 core_scsi3_check_aptpl_registration(dev, tpg, lun, acl,
176 lun->unpacked_lun);
169 spin_lock(&tpg->tpg_lun_lock); 177 spin_lock(&tpg->tpg_lun_lock);
170 } 178 }
171 spin_unlock(&tpg->tpg_lun_lock); 179 spin_unlock(&tpg->tpg_lun_lock);
@@ -335,7 +343,7 @@ void core_tpg_clear_object_luns(struct se_portal_group *tpg)
335 continue; 343 continue;
336 344
337 spin_unlock(&tpg->tpg_lun_lock); 345 spin_unlock(&tpg->tpg_lun_lock);
338 core_dev_del_lun(tpg, lun->unpacked_lun); 346 core_dev_del_lun(tpg, lun);
339 spin_lock(&tpg->tpg_lun_lock); 347 spin_lock(&tpg->tpg_lun_lock);
340 } 348 }
341 spin_unlock(&tpg->tpg_lun_lock); 349 spin_unlock(&tpg->tpg_lun_lock);
@@ -663,13 +671,6 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg)
663 return 0; 671 return 0;
664} 672}
665 673
666static void core_tpg_release_virtual_lun0(struct se_portal_group *se_tpg)
667{
668 struct se_lun *lun = &se_tpg->tpg_virt_lun0;
669
670 core_tpg_post_dellun(se_tpg, lun);
671}
672
673int core_tpg_register( 674int core_tpg_register(
674 struct target_core_fabric_ops *tfo, 675 struct target_core_fabric_ops *tfo,
675 struct se_wwn *se_wwn, 676 struct se_wwn *se_wwn,
@@ -773,7 +774,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
773 spin_unlock_irq(&se_tpg->acl_node_lock); 774 spin_unlock_irq(&se_tpg->acl_node_lock);
774 775
775 if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) 776 if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
776 core_tpg_release_virtual_lun0(se_tpg); 777 core_tpg_remove_lun(se_tpg, &se_tpg->tpg_virt_lun0);
777 778
778 se_tpg->se_tpg_fabric_ptr = NULL; 779 se_tpg->se_tpg_fabric_ptr = NULL;
779 array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG); 780 array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG);
@@ -838,37 +839,7 @@ int core_tpg_add_lun(
838 return 0; 839 return 0;
839} 840}
840 841
841struct se_lun *core_tpg_pre_dellun( 842void core_tpg_remove_lun(
842 struct se_portal_group *tpg,
843 u32 unpacked_lun)
844{
845 struct se_lun *lun;
846
847 if (unpacked_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) {
848 pr_err("%s LUN: %u exceeds TRANSPORT_MAX_LUNS_PER_TPG"
849 "-1: %u for Target Portal Group: %u\n",
850 tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun,
851 TRANSPORT_MAX_LUNS_PER_TPG-1,
852 tpg->se_tpg_tfo->tpg_get_tag(tpg));
853 return ERR_PTR(-EOVERFLOW);
854 }
855
856 spin_lock(&tpg->tpg_lun_lock);
857 lun = tpg->tpg_lun_list[unpacked_lun];
858 if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) {
859 pr_err("%s Logical Unit Number: %u is not active on"
860 " Target Portal Group: %u, ignoring request.\n",
861 tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun,
862 tpg->se_tpg_tfo->tpg_get_tag(tpg));
863 spin_unlock(&tpg->tpg_lun_lock);
864 return ERR_PTR(-ENODEV);
865 }
866 spin_unlock(&tpg->tpg_lun_lock);
867
868 return lun;
869}
870
871int core_tpg_post_dellun(
872 struct se_portal_group *tpg, 843 struct se_portal_group *tpg,
873 struct se_lun *lun) 844 struct se_lun *lun)
874{ 845{
@@ -882,6 +853,4 @@ int core_tpg_post_dellun(
882 spin_unlock(&tpg->tpg_lun_lock); 853 spin_unlock(&tpg->tpg_lun_lock);
883 854
884 percpu_ref_exit(&lun->lun_ref); 855 percpu_ref_exit(&lun->lun_ref);
885
886 return 0;
887} 856}
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 7fa62fc93e0b..9ea0d5f03f7a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -232,6 +232,10 @@ void transport_subsystem_check_init(void)
232 if (ret != 0) 232 if (ret != 0)
233 pr_err("Unable to load target_core_pscsi\n"); 233 pr_err("Unable to load target_core_pscsi\n");
234 234
235 ret = request_module("target_core_user");
236 if (ret != 0)
237 pr_err("Unable to load target_core_user\n");
238
235 sub_api_initialized = 1; 239 sub_api_initialized = 1;
236} 240}
237 241
@@ -752,8 +756,7 @@ void target_qf_do_work(struct work_struct *work)
752 756
753 list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) { 757 list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) {
754 list_del(&cmd->se_qf_node); 758 list_del(&cmd->se_qf_node);
755 atomic_dec(&dev->dev_qf_count); 759 atomic_dec_mb(&dev->dev_qf_count);
756 smp_mb__after_atomic();
757 760
758 pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue" 761 pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue"
759 " context: %s\n", cmd->se_tfo->get_fabric_name(), cmd, 762 " context: %s\n", cmd->se_tfo->get_fabric_name(), cmd,
@@ -1166,7 +1169,6 @@ transport_check_alloc_task_attr(struct se_cmd *cmd)
1166 * Dormant to Active status. 1169 * Dormant to Active status.
1167 */ 1170 */
1168 cmd->se_ordered_id = atomic_inc_return(&dev->dev_ordered_id); 1171 cmd->se_ordered_id = atomic_inc_return(&dev->dev_ordered_id);
1169 smp_mb__after_atomic();
1170 pr_debug("Allocated se_ordered_id: %u for Task Attr: 0x%02x on %s\n", 1172 pr_debug("Allocated se_ordered_id: %u for Task Attr: 0x%02x on %s\n",
1171 cmd->se_ordered_id, cmd->sam_task_attr, 1173 cmd->se_ordered_id, cmd->sam_task_attr,
1172 dev->transport->name); 1174 dev->transport->name);
@@ -1722,8 +1724,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
1722 cmd->t_task_cdb[0], cmd->se_ordered_id); 1724 cmd->t_task_cdb[0], cmd->se_ordered_id);
1723 return false; 1725 return false;
1724 case MSG_ORDERED_TAG: 1726 case MSG_ORDERED_TAG:
1725 atomic_inc(&dev->dev_ordered_sync); 1727 atomic_inc_mb(&dev->dev_ordered_sync);
1726 smp_mb__after_atomic();
1727 1728
1728 pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, " 1729 pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, "
1729 " se_ordered_id: %u\n", 1730 " se_ordered_id: %u\n",
@@ -1740,8 +1741,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
1740 /* 1741 /*
1741 * For SIMPLE and UNTAGGED Task Attribute commands 1742 * For SIMPLE and UNTAGGED Task Attribute commands
1742 */ 1743 */
1743 atomic_inc(&dev->simple_cmds); 1744 atomic_inc_mb(&dev->simple_cmds);
1744 smp_mb__after_atomic();
1745 break; 1745 break;
1746 } 1746 }
1747 1747
@@ -1845,8 +1845,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
1845 return; 1845 return;
1846 1846
1847 if (cmd->sam_task_attr == MSG_SIMPLE_TAG) { 1847 if (cmd->sam_task_attr == MSG_SIMPLE_TAG) {
1848 atomic_dec(&dev->simple_cmds); 1848 atomic_dec_mb(&dev->simple_cmds);
1849 smp_mb__after_atomic();
1850 dev->dev_cur_ordered_id++; 1849 dev->dev_cur_ordered_id++;
1851 pr_debug("Incremented dev->dev_cur_ordered_id: %u for" 1850 pr_debug("Incremented dev->dev_cur_ordered_id: %u for"
1852 " SIMPLE: %u\n", dev->dev_cur_ordered_id, 1851 " SIMPLE: %u\n", dev->dev_cur_ordered_id,
@@ -1857,8 +1856,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
1857 " HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id, 1856 " HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id,
1858 cmd->se_ordered_id); 1857 cmd->se_ordered_id);
1859 } else if (cmd->sam_task_attr == MSG_ORDERED_TAG) { 1858 } else if (cmd->sam_task_attr == MSG_ORDERED_TAG) {
1860 atomic_dec(&dev->dev_ordered_sync); 1859 atomic_dec_mb(&dev->dev_ordered_sync);
1861 smp_mb__after_atomic();
1862 1860
1863 dev->dev_cur_ordered_id++; 1861 dev->dev_cur_ordered_id++;
1864 pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED:" 1862 pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED:"
@@ -1877,8 +1875,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
1877 if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) { 1875 if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
1878 trace_target_cmd_complete(cmd); 1876 trace_target_cmd_complete(cmd);
1879 ret = cmd->se_tfo->queue_status(cmd); 1877 ret = cmd->se_tfo->queue_status(cmd);
1880 if (ret) 1878 goto out;
1881 goto out;
1882 } 1879 }
1883 1880
1884 switch (cmd->data_direction) { 1881 switch (cmd->data_direction) {
@@ -1916,8 +1913,7 @@ static void transport_handle_queue_full(
1916{ 1913{
1917 spin_lock_irq(&dev->qf_cmd_lock); 1914 spin_lock_irq(&dev->qf_cmd_lock);
1918 list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list); 1915 list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list);
1919 atomic_inc(&dev->dev_qf_count); 1916 atomic_inc_mb(&dev->dev_qf_count);
1920 smp_mb__after_atomic();
1921 spin_unlock_irq(&cmd->se_dev->qf_cmd_lock); 1917 spin_unlock_irq(&cmd->se_dev->qf_cmd_lock);
1922 1918
1923 schedule_work(&cmd->se_dev->qf_work_queue); 1919 schedule_work(&cmd->se_dev->qf_work_queue);
@@ -2896,7 +2892,6 @@ void transport_send_task_abort(struct se_cmd *cmd)
2896 if (cmd->se_tfo->write_pending_status(cmd) != 0) { 2892 if (cmd->se_tfo->write_pending_status(cmd) != 0) {
2897 cmd->transport_state |= CMD_T_ABORTED; 2893 cmd->transport_state |= CMD_T_ABORTED;
2898 cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; 2894 cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
2899 smp_mb__after_atomic();
2900 return; 2895 return;
2901 } 2896 }
2902 } 2897 }
diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c
index 101858e245b3..1738b1646988 100644
--- a/drivers/target/target_core_ua.c
+++ b/drivers/target/target_core_ua.c
@@ -161,8 +161,7 @@ int core_scsi3_ua_allocate(
161 spin_unlock(&deve->ua_lock); 161 spin_unlock(&deve->ua_lock);
162 spin_unlock_irq(&nacl->device_list_lock); 162 spin_unlock_irq(&nacl->device_list_lock);
163 163
164 atomic_inc(&deve->ua_count); 164 atomic_inc_mb(&deve->ua_count);
165 smp_mb__after_atomic();
166 return 0; 165 return 0;
167 } 166 }
168 list_add_tail(&ua->ua_nacl_list, &deve->ua_list); 167 list_add_tail(&ua->ua_nacl_list, &deve->ua_list);
@@ -174,8 +173,7 @@ int core_scsi3_ua_allocate(
174 nacl->se_tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun, 173 nacl->se_tpg->se_tpg_tfo->get_fabric_name(), unpacked_lun,
175 asc, ascq); 174 asc, ascq);
176 175
177 atomic_inc(&deve->ua_count); 176 atomic_inc_mb(&deve->ua_count);
178 smp_mb__after_atomic();
179 return 0; 177 return 0;
180} 178}
181 179
@@ -189,8 +187,7 @@ void core_scsi3_ua_release_all(
189 list_del(&ua->ua_nacl_list); 187 list_del(&ua->ua_nacl_list);
190 kmem_cache_free(se_ua_cache, ua); 188 kmem_cache_free(se_ua_cache, ua);
191 189
192 atomic_dec(&deve->ua_count); 190 atomic_dec_mb(&deve->ua_count);
193 smp_mb__after_atomic();
194 } 191 }
195 spin_unlock(&deve->ua_lock); 192 spin_unlock(&deve->ua_lock);
196} 193}
@@ -250,8 +247,7 @@ void core_scsi3_ua_for_check_condition(
250 list_del(&ua->ua_nacl_list); 247 list_del(&ua->ua_nacl_list);
251 kmem_cache_free(se_ua_cache, ua); 248 kmem_cache_free(se_ua_cache, ua);
252 249
253 atomic_dec(&deve->ua_count); 250 atomic_dec_mb(&deve->ua_count);
254 smp_mb__after_atomic();
255 } 251 }
256 spin_unlock(&deve->ua_lock); 252 spin_unlock(&deve->ua_lock);
257 spin_unlock_irq(&nacl->device_list_lock); 253 spin_unlock_irq(&nacl->device_list_lock);
@@ -309,8 +305,7 @@ int core_scsi3_ua_clear_for_request_sense(
309 list_del(&ua->ua_nacl_list); 305 list_del(&ua->ua_nacl_list);
310 kmem_cache_free(se_ua_cache, ua); 306 kmem_cache_free(se_ua_cache, ua);
311 307
312 atomic_dec(&deve->ua_count); 308 atomic_dec_mb(&deve->ua_count);
313 smp_mb__after_atomic();
314 } 309 }
315 spin_unlock(&deve->ua_lock); 310 spin_unlock(&deve->ua_lock);
316 spin_unlock_irq(&nacl->device_list_lock); 311 spin_unlock_irq(&nacl->device_list_lock);
diff --git a/drivers/target/target_core_ua.h b/drivers/target/target_core_ua.h
index be912b36daae..a6b56b364e7a 100644
--- a/drivers/target/target_core_ua.h
+++ b/drivers/target/target_core_ua.h
@@ -1,4 +1,5 @@
1#ifndef TARGET_CORE_UA_H 1#ifndef TARGET_CORE_UA_H
2#define TARGET_CORE_UA_H
2 3
3/* 4/*
4 * From spc4r17, Table D.1: ASC and ASCQ Assignement 5 * From spc4r17, Table D.1: ASC and ASCQ Assignement
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
new file mode 100644
index 000000000000..9a1b314f6482
--- /dev/null
+++ b/drivers/target/target_core_user.c
@@ -0,0 +1,1167 @@
1/*
2 * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
3 * Copyright (C) 2014 Red Hat, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#include <linux/spinlock.h>
20#include <linux/module.h>
21#include <linux/idr.h>
22#include <linux/timer.h>
23#include <linux/parser.h>
24#include <scsi/scsi.h>
25#include <scsi/scsi_host.h>
26#include <linux/uio_driver.h>
27#include <net/genetlink.h>
28#include <target/target_core_base.h>
29#include <target/target_core_fabric.h>
30#include <target/target_core_backend.h>
31#include <linux/target_core_user.h>
32
33/*
34 * Define a shared-memory interface for LIO to pass SCSI commands and
35 * data to userspace for processing. This is to allow backends that
36 * are too complex for in-kernel support to be possible.
37 *
38 * It uses the UIO framework to do a lot of the device-creation and
39 * introspection work for us.
40 *
41 * See the .h file for how the ring is laid out. Note that while the
42 * command ring is defined, the particulars of the data area are
43 * not. Offset values in the command entry point to other locations
44 * internal to the mmap()ed area. There is separate space outside the
45 * command ring for data buffers. This leaves maximum flexibility for
46 * moving buffer allocations, or even page flipping or other
47 * allocation techniques, without altering the command ring layout.
48 *
49 * SECURITY:
50 * The user process must be assumed to be malicious. There's no way to
51 * prevent it breaking the command ring protocol if it wants, but in
52 * order to prevent other issues we must only ever read *data* from
53 * the shared memory area, not offsets or sizes. This applies to
54 * command ring entries as well as the mailbox. Extra code needed for
55 * this may have a 'UAM' comment.
56 */
57
58
59#define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
60
61#define CMDR_SIZE (16 * 4096)
62#define DATA_SIZE (257 * 4096)
63
64#define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
65
66static struct device *tcmu_root_device;
67
68struct tcmu_hba {
69 u32 host_id;
70};
71
72/* User wants all cmds or just some */
73enum passthru_level {
74 TCMU_PASS_ALL = 0,
75 TCMU_PASS_IO,
76 TCMU_PASS_INVALID,
77};
78
79#define TCMU_CONFIG_LEN 256
80
81struct tcmu_dev {
82 struct se_device se_dev;
83
84 char *name;
85 struct se_hba *hba;
86
87#define TCMU_DEV_BIT_OPEN 0
88#define TCMU_DEV_BIT_BROKEN 1
89 unsigned long flags;
90 enum passthru_level pass_level;
91
92 struct uio_info uio_info;
93
94 struct tcmu_mailbox *mb_addr;
95 size_t dev_size;
96 u32 cmdr_size;
97 u32 cmdr_last_cleaned;
98 /* Offset of data ring from start of mb */
99 size_t data_off;
100 size_t data_size;
101 /* Ring head + tail values. */
102 /* Must add data_off and mb_addr to get the address */
103 size_t data_head;
104 size_t data_tail;
105
106 wait_queue_head_t wait_cmdr;
107 /* TODO should this be a mutex? */
108 spinlock_t cmdr_lock;
109
110 struct idr commands;
111 spinlock_t commands_lock;
112
113 struct timer_list timeout;
114
115 char dev_config[TCMU_CONFIG_LEN];
116};
117
118#define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
119
120#define CMDR_OFF sizeof(struct tcmu_mailbox)
121
122struct tcmu_cmd {
123 struct se_cmd *se_cmd;
124 struct tcmu_dev *tcmu_dev;
125
126 uint16_t cmd_id;
127
128 /* Can't use se_cmd->data_length when cleaning up expired cmds, because if
129 cmd has been completed then accessing se_cmd is off limits */
130 size_t data_length;
131
132 unsigned long deadline;
133
134#define TCMU_CMD_BIT_EXPIRED 0
135 unsigned long flags;
136};
137
138static struct kmem_cache *tcmu_cmd_cache;
139
140/* multicast group */
141enum tcmu_multicast_groups {
142 TCMU_MCGRP_CONFIG,
143};
144
145static const struct genl_multicast_group tcmu_mcgrps[] = {
146 [TCMU_MCGRP_CONFIG] = { .name = "config", },
147};
148
149/* Our generic netlink family */
150static struct genl_family tcmu_genl_family = {
151 .id = GENL_ID_GENERATE,
152 .hdrsize = 0,
153 .name = "TCM-USER",
154 .version = 1,
155 .maxattr = TCMU_ATTR_MAX,
156 .mcgrps = tcmu_mcgrps,
157 .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
158};
159
160static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
161{
162 struct se_device *se_dev = se_cmd->se_dev;
163 struct tcmu_dev *udev = TCMU_DEV(se_dev);
164 struct tcmu_cmd *tcmu_cmd;
165 int cmd_id;
166
167 tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_KERNEL);
168 if (!tcmu_cmd)
169 return NULL;
170
171 tcmu_cmd->se_cmd = se_cmd;
172 tcmu_cmd->tcmu_dev = udev;
173 tcmu_cmd->data_length = se_cmd->data_length;
174
175 tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
176
177 idr_preload(GFP_KERNEL);
178 spin_lock_irq(&udev->commands_lock);
179 cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0,
180 USHRT_MAX, GFP_NOWAIT);
181 spin_unlock_irq(&udev->commands_lock);
182 idr_preload_end();
183
184 if (cmd_id < 0) {
185 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
186 return NULL;
187 }
188 tcmu_cmd->cmd_id = cmd_id;
189
190 return tcmu_cmd;
191}
192
193static inline void tcmu_flush_dcache_range(void *vaddr, size_t size)
194{
195 unsigned long offset = (unsigned long) vaddr & ~PAGE_MASK;
196
197 size = round_up(size+offset, PAGE_SIZE);
198 vaddr -= offset;
199
200 while (size) {
201 flush_dcache_page(virt_to_page(vaddr));
202 size -= PAGE_SIZE;
203 }
204}
205
206/*
207 * Some ring helper functions. We don't assume size is a power of 2 so
208 * we can't use circ_buf.h.
209 */
210static inline size_t spc_used(size_t head, size_t tail, size_t size)
211{
212 int diff = head - tail;
213
214 if (diff >= 0)
215 return diff;
216 else
217 return size + diff;
218}
219
220static inline size_t spc_free(size_t head, size_t tail, size_t size)
221{
222 /* Keep 1 byte unused or we can't tell full from empty */
223 return (size - spc_used(head, tail, size) - 1);
224}
225
226static inline size_t head_to_end(size_t head, size_t size)
227{
228 return size - head;
229}
230
231#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
232
233/*
234 * We can't queue a command until we have space available on the cmd ring *and* space
235 * space avail on the data ring.
236 *
237 * Called with ring lock held.
238 */
239static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed)
240{
241 struct tcmu_mailbox *mb = udev->mb_addr;
242 size_t space;
243 u32 cmd_head;
244 size_t cmd_needed;
245
246 tcmu_flush_dcache_range(mb, sizeof(*mb));
247
248 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
249
250 /*
251 * If cmd end-of-ring space is too small then we need space for a NOP plus
252 * original cmd - cmds are internally contiguous.
253 */
254 if (head_to_end(cmd_head, udev->cmdr_size) >= cmd_size)
255 cmd_needed = cmd_size;
256 else
257 cmd_needed = cmd_size + head_to_end(cmd_head, udev->cmdr_size);
258
259 space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size);
260 if (space < cmd_needed) {
261 pr_debug("no cmd space: %u %u %u\n", cmd_head,
262 udev->cmdr_last_cleaned, udev->cmdr_size);
263 return false;
264 }
265
266 space = spc_free(udev->data_head, udev->data_tail, udev->data_size);
267 if (space < data_needed) {
268 pr_debug("no data space: %zu %zu %zu\n", udev->data_head,
269 udev->data_tail, udev->data_size);
270 return false;
271 }
272
273 return true;
274}
275
276static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
277{
278 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
279 struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
280 size_t base_command_size, command_size;
281 struct tcmu_mailbox *mb;
282 struct tcmu_cmd_entry *entry;
283 int i;
284 struct scatterlist *sg;
285 struct iovec *iov;
286 int iov_cnt = 0;
287 uint32_t cmd_head;
288 uint64_t cdb_off;
289
290 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
291 return -EINVAL;
292
293 /*
294 * Must be a certain minimum size for response sense info, but
295 * also may be larger if the iov array is large.
296 *
297 * iovs = sgl_nents+1, for end-of-ring case, plus another 1
298 * b/c size == offsetof one-past-element.
299 */
300 base_command_size = max(offsetof(struct tcmu_cmd_entry,
301 req.iov[se_cmd->t_data_nents + 2]),
302 sizeof(struct tcmu_cmd_entry));
303 command_size = base_command_size
304 + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
305
306 WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
307
308 spin_lock_irq(&udev->cmdr_lock);
309
310 mb = udev->mb_addr;
311 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
312 if ((command_size > (udev->cmdr_size / 2))
313 || tcmu_cmd->data_length > (udev->data_size - 1))
314 pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
315 "cmd/data ring buffers\n", command_size, tcmu_cmd->data_length,
316 udev->cmdr_size, udev->data_size);
317
318 while (!is_ring_space_avail(udev, command_size, tcmu_cmd->data_length)) {
319 int ret;
320 DEFINE_WAIT(__wait);
321
322 prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
323
324 pr_debug("sleeping for ring space\n");
325 spin_unlock_irq(&udev->cmdr_lock);
326 ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
327 finish_wait(&udev->wait_cmdr, &__wait);
328 if (!ret) {
329 pr_warn("tcmu: command timed out\n");
330 return -ETIMEDOUT;
331 }
332
333 spin_lock_irq(&udev->cmdr_lock);
334
335 /* We dropped cmdr_lock, cmd_head is stale */
336 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
337 }
338
339 /* Insert a PAD if end-of-ring space is too small */
340 if (head_to_end(cmd_head, udev->cmdr_size) < command_size) {
341 size_t pad_size = head_to_end(cmd_head, udev->cmdr_size);
342
343 entry = (void *) mb + CMDR_OFF + cmd_head;
344 tcmu_flush_dcache_range(entry, sizeof(*entry));
345 tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
346 tcmu_hdr_set_len(&entry->hdr, pad_size);
347
348 UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
349
350 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
351 WARN_ON(cmd_head != 0);
352 }
353
354 entry = (void *) mb + CMDR_OFF + cmd_head;
355 tcmu_flush_dcache_range(entry, sizeof(*entry));
356 tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
357 tcmu_hdr_set_len(&entry->hdr, command_size);
358 entry->cmd_id = tcmu_cmd->cmd_id;
359
360 /*
361 * Fix up iovecs, and handle if allocation in data ring wrapped.
362 */
363 iov = &entry->req.iov[0];
364 for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
365 size_t copy_bytes = min((size_t)sg->length,
366 head_to_end(udev->data_head, udev->data_size));
367 void *from = kmap_atomic(sg_page(sg)) + sg->offset;
368 void *to = (void *) mb + udev->data_off + udev->data_head;
369
370 if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) {
371 memcpy(to, from, copy_bytes);
372 tcmu_flush_dcache_range(to, copy_bytes);
373 }
374
375 /* Even iov_base is relative to mb_addr */
376 iov->iov_len = copy_bytes;
377 iov->iov_base = (void *) udev->data_off + udev->data_head;
378 iov_cnt++;
379 iov++;
380
381 UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
382
383 /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */
384 if (sg->length != copy_bytes) {
385 from += copy_bytes;
386 copy_bytes = sg->length - copy_bytes;
387
388 iov->iov_len = copy_bytes;
389 iov->iov_base = (void *) udev->data_off + udev->data_head;
390
391 if (se_cmd->data_direction == DMA_TO_DEVICE) {
392 to = (void *) mb + udev->data_off + udev->data_head;
393 memcpy(to, from, copy_bytes);
394 tcmu_flush_dcache_range(to, copy_bytes);
395 }
396
397 iov_cnt++;
398 iov++;
399
400 UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
401 }
402
403 kunmap_atomic(from);
404 }
405 entry->req.iov_cnt = iov_cnt;
406
407 /* All offsets relative to mb_addr, not start of entry! */
408 cdb_off = CMDR_OFF + cmd_head + base_command_size;
409 memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
410 entry->req.cdb_off = cdb_off;
411 tcmu_flush_dcache_range(entry, sizeof(*entry));
412
413 UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
414 tcmu_flush_dcache_range(mb, sizeof(*mb));
415
416 spin_unlock_irq(&udev->cmdr_lock);
417
418 /* TODO: only if FLUSH and FUA? */
419 uio_event_notify(&udev->uio_info);
420
421 mod_timer(&udev->timeout,
422 round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
423
424 return 0;
425}
426
427static int tcmu_queue_cmd(struct se_cmd *se_cmd)
428{
429 struct se_device *se_dev = se_cmd->se_dev;
430 struct tcmu_dev *udev = TCMU_DEV(se_dev);
431 struct tcmu_cmd *tcmu_cmd;
432 int ret;
433
434 tcmu_cmd = tcmu_alloc_cmd(se_cmd);
435 if (!tcmu_cmd)
436 return -ENOMEM;
437
438 ret = tcmu_queue_cmd_ring(tcmu_cmd);
439 if (ret < 0) {
440 pr_err("TCMU: Could not queue command\n");
441 spin_lock_irq(&udev->commands_lock);
442 idr_remove(&udev->commands, tcmu_cmd->cmd_id);
443 spin_unlock_irq(&udev->commands_lock);
444
445 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
446 }
447
448 return ret;
449}
450
451static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
452{
453 struct se_cmd *se_cmd = cmd->se_cmd;
454 struct tcmu_dev *udev = cmd->tcmu_dev;
455
456 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
457 /* cmd has been completed already from timeout, just reclaim data
458 ring space */
459 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
460 return;
461 }
462
463 if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
464 memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
465 se_cmd->scsi_sense_length);
466
467 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
468 }
469 else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
470 struct scatterlist *sg;
471 int i;
472
473 /* It'd be easier to look at entry's iovec again, but UAM */
474 for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
475 size_t copy_bytes;
476 void *to;
477 void *from;
478
479 copy_bytes = min((size_t)sg->length,
480 head_to_end(udev->data_tail, udev->data_size));
481
482 to = kmap_atomic(sg_page(sg)) + sg->offset;
483 WARN_ON(sg->length + sg->offset > PAGE_SIZE);
484 from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
485 tcmu_flush_dcache_range(from, copy_bytes);
486 memcpy(to, from, copy_bytes);
487
488 UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
489
490 /* Uh oh, wrapped the data buffer for this sg's data */
491 if (sg->length != copy_bytes) {
492 from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
493 WARN_ON(udev->data_tail);
494 to += copy_bytes;
495 copy_bytes = sg->length - copy_bytes;
496 tcmu_flush_dcache_range(from, copy_bytes);
497 memcpy(to, from, copy_bytes);
498
499 UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
500 }
501
502 kunmap_atomic(to);
503 }
504
505 } else if (se_cmd->data_direction == DMA_TO_DEVICE) {
506 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
507 } else {
508 pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction);
509 }
510
511 target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
512 cmd->se_cmd = NULL;
513
514 kmem_cache_free(tcmu_cmd_cache, cmd);
515}
516
517static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
518{
519 struct tcmu_mailbox *mb;
520 LIST_HEAD(cpl_cmds);
521 unsigned long flags;
522 int handled = 0;
523
524 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
525 pr_err("ring broken, not handling completions\n");
526 return 0;
527 }
528
529 spin_lock_irqsave(&udev->cmdr_lock, flags);
530
531 mb = udev->mb_addr;
532 tcmu_flush_dcache_range(mb, sizeof(*mb));
533
534 while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) {
535
536 struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned;
537 struct tcmu_cmd *cmd;
538
539 tcmu_flush_dcache_range(entry, sizeof(*entry));
540
541 if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
542 UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
543 continue;
544 }
545 WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
546
547 spin_lock(&udev->commands_lock);
548 cmd = idr_find(&udev->commands, entry->cmd_id);
549 if (cmd)
550 idr_remove(&udev->commands, cmd->cmd_id);
551 spin_unlock(&udev->commands_lock);
552
553 if (!cmd) {
554 pr_err("cmd_id not found, ring is broken\n");
555 set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
556 break;
557 }
558
559 tcmu_handle_completion(cmd, entry);
560
561 UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
562
563 handled++;
564 }
565
566 if (mb->cmd_tail == mb->cmd_head)
567 del_timer(&udev->timeout); /* no more pending cmds */
568
569 spin_unlock_irqrestore(&udev->cmdr_lock, flags);
570
571 wake_up(&udev->wait_cmdr);
572
573 return handled;
574}
575
576static int tcmu_check_expired_cmd(int id, void *p, void *data)
577{
578 struct tcmu_cmd *cmd = p;
579
580 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
581 return 0;
582
583 if (!time_after(cmd->deadline, jiffies))
584 return 0;
585
586 set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
587 target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION);
588 cmd->se_cmd = NULL;
589
590 kmem_cache_free(tcmu_cmd_cache, cmd);
591
592 return 0;
593}
594
595static void tcmu_device_timedout(unsigned long data)
596{
597 struct tcmu_dev *udev = (struct tcmu_dev *)data;
598 unsigned long flags;
599 int handled;
600
601 handled = tcmu_handle_completions(udev);
602
603 pr_warn("%d completions handled from timeout\n", handled);
604
605 spin_lock_irqsave(&udev->commands_lock, flags);
606 idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
607 spin_unlock_irqrestore(&udev->commands_lock, flags);
608
609 /*
610 * We don't need to wakeup threads on wait_cmdr since they have their
611 * own timeout.
612 */
613}
614
615static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
616{
617 struct tcmu_hba *tcmu_hba;
618
619 tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL);
620 if (!tcmu_hba)
621 return -ENOMEM;
622
623 tcmu_hba->host_id = host_id;
624 hba->hba_ptr = tcmu_hba;
625
626 return 0;
627}
628
629static void tcmu_detach_hba(struct se_hba *hba)
630{
631 kfree(hba->hba_ptr);
632 hba->hba_ptr = NULL;
633}
634
635static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
636{
637 struct tcmu_dev *udev;
638
639 udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
640 if (!udev)
641 return NULL;
642
643 udev->name = kstrdup(name, GFP_KERNEL);
644 if (!udev->name) {
645 kfree(udev);
646 return NULL;
647 }
648
649 udev->hba = hba;
650
651 init_waitqueue_head(&udev->wait_cmdr);
652 spin_lock_init(&udev->cmdr_lock);
653
654 idr_init(&udev->commands);
655 spin_lock_init(&udev->commands_lock);
656
657 setup_timer(&udev->timeout, tcmu_device_timedout,
658 (unsigned long)udev);
659
660 udev->pass_level = TCMU_PASS_ALL;
661
662 return &udev->se_dev;
663}
664
665static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
666{
667 struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
668
669 tcmu_handle_completions(tcmu_dev);
670
671 return 0;
672}
673
674/*
675 * mmap code from uio.c. Copied here because we want to hook mmap()
676 * and this stuff must come along.
677 */
678static int tcmu_find_mem_index(struct vm_area_struct *vma)
679{
680 struct tcmu_dev *udev = vma->vm_private_data;
681 struct uio_info *info = &udev->uio_info;
682
683 if (vma->vm_pgoff < MAX_UIO_MAPS) {
684 if (info->mem[vma->vm_pgoff].size == 0)
685 return -1;
686 return (int)vma->vm_pgoff;
687 }
688 return -1;
689}
690
691static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
692{
693 struct tcmu_dev *udev = vma->vm_private_data;
694 struct uio_info *info = &udev->uio_info;
695 struct page *page;
696 unsigned long offset;
697 void *addr;
698
699 int mi = tcmu_find_mem_index(vma);
700 if (mi < 0)
701 return VM_FAULT_SIGBUS;
702
703 /*
704 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
705 * to use mem[N].
706 */
707 offset = (vmf->pgoff - mi) << PAGE_SHIFT;
708
709 addr = (void *)(unsigned long)info->mem[mi].addr + offset;
710 if (info->mem[mi].memtype == UIO_MEM_LOGICAL)
711 page = virt_to_page(addr);
712 else
713 page = vmalloc_to_page(addr);
714 get_page(page);
715 vmf->page = page;
716 return 0;
717}
718
719static const struct vm_operations_struct tcmu_vm_ops = {
720 .fault = tcmu_vma_fault,
721};
722
723static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
724{
725 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
726
727 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
728 vma->vm_ops = &tcmu_vm_ops;
729
730 vma->vm_private_data = udev;
731
732 /* Ensure the mmap is exactly the right size */
733 if (vma_pages(vma) != (TCMU_RING_SIZE >> PAGE_SHIFT))
734 return -EINVAL;
735
736 return 0;
737}
738
739static int tcmu_open(struct uio_info *info, struct inode *inode)
740{
741 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
742
743 /* O_EXCL not supported for char devs, so fake it? */
744 if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
745 return -EBUSY;
746
747 pr_debug("open\n");
748
749 return 0;
750}
751
752static int tcmu_release(struct uio_info *info, struct inode *inode)
753{
754 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
755
756 clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
757
758 pr_debug("close\n");
759
760 return 0;
761}
762
763static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor)
764{
765 struct sk_buff *skb;
766 void *msg_header;
767 int ret = -ENOMEM;
768
769 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
770 if (!skb)
771 return ret;
772
773 msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd);
774 if (!msg_header)
775 goto free_skb;
776
777 ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name);
778 if (ret < 0)
779 goto free_skb;
780
781 ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor);
782 if (ret < 0)
783 goto free_skb;
784
785 ret = genlmsg_end(skb, msg_header);
786 if (ret < 0)
787 goto free_skb;
788
789 ret = genlmsg_multicast(&tcmu_genl_family, skb, 0,
790 TCMU_MCGRP_CONFIG, GFP_KERNEL);
791
792 /* We don't care if no one is listening */
793 if (ret == -ESRCH)
794 ret = 0;
795
796 return ret;
797free_skb:
798 nlmsg_free(skb);
799 return ret;
800}
801
802static int tcmu_configure_device(struct se_device *dev)
803{
804 struct tcmu_dev *udev = TCMU_DEV(dev);
805 struct tcmu_hba *hba = udev->hba->hba_ptr;
806 struct uio_info *info;
807 struct tcmu_mailbox *mb;
808 size_t size;
809 size_t used;
810 int ret = 0;
811 char *str;
812
813 info = &udev->uio_info;
814
815 size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
816 udev->dev_config);
817 size += 1; /* for \0 */
818 str = kmalloc(size, GFP_KERNEL);
819 if (!str)
820 return -ENOMEM;
821
822 used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
823
824 if (udev->dev_config[0])
825 snprintf(str + used, size - used, "/%s", udev->dev_config);
826
827 info->name = str;
828
829 udev->mb_addr = vzalloc(TCMU_RING_SIZE);
830 if (!udev->mb_addr) {
831 ret = -ENOMEM;
832 goto err_vzalloc;
833 }
834
835 /* mailbox fits in first part of CMDR space */
836 udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
837 udev->data_off = CMDR_SIZE;
838 udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
839
840 mb = udev->mb_addr;
841 mb->version = 1;
842 mb->cmdr_off = CMDR_OFF;
843 mb->cmdr_size = udev->cmdr_size;
844
845 WARN_ON(!PAGE_ALIGNED(udev->data_off));
846 WARN_ON(udev->data_size % PAGE_SIZE);
847
848 info->version = "1";
849
850 info->mem[0].name = "tcm-user command & data buffer";
851 info->mem[0].addr = (phys_addr_t) udev->mb_addr;
852 info->mem[0].size = TCMU_RING_SIZE;
853 info->mem[0].memtype = UIO_MEM_VIRTUAL;
854
855 info->irqcontrol = tcmu_irqcontrol;
856 info->irq = UIO_IRQ_CUSTOM;
857
858 info->mmap = tcmu_mmap;
859 info->open = tcmu_open;
860 info->release = tcmu_release;
861
862 ret = uio_register_device(tcmu_root_device, info);
863 if (ret)
864 goto err_register;
865
866 /* Other attributes can be configured in userspace */
867 dev->dev_attrib.hw_block_size = 512;
868 dev->dev_attrib.hw_max_sectors = 128;
869 dev->dev_attrib.hw_queue_depth = 128;
870
871 ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
872 udev->uio_info.uio_dev->minor);
873 if (ret)
874 goto err_netlink;
875
876 return 0;
877
878err_netlink:
879 uio_unregister_device(&udev->uio_info);
880err_register:
881 vfree(udev->mb_addr);
882err_vzalloc:
883 kfree(info->name);
884
885 return ret;
886}
887
888static int tcmu_check_pending_cmd(int id, void *p, void *data)
889{
890 struct tcmu_cmd *cmd = p;
891
892 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
893 return 0;
894 return -EINVAL;
895}
896
897static void tcmu_free_device(struct se_device *dev)
898{
899 struct tcmu_dev *udev = TCMU_DEV(dev);
900 int i;
901
902 del_timer_sync(&udev->timeout);
903
904 vfree(udev->mb_addr);
905
906 /* Upper layer should drain all requests before calling this */
907 spin_lock_irq(&udev->commands_lock);
908 i = idr_for_each(&udev->commands, tcmu_check_pending_cmd, NULL);
909 idr_destroy(&udev->commands);
910 spin_unlock_irq(&udev->commands_lock);
911 WARN_ON(i);
912
913 /* Device was configured */
914 if (udev->uio_info.uio_dev) {
915 tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
916 udev->uio_info.uio_dev->minor);
917
918 uio_unregister_device(&udev->uio_info);
919 kfree(udev->uio_info.name);
920 kfree(udev->name);
921 }
922
923 kfree(udev);
924}
925
926enum {
927 Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level,
928};
929
930static match_table_t tokens = {
931 {Opt_dev_config, "dev_config=%s"},
932 {Opt_dev_size, "dev_size=%u"},
933 {Opt_pass_level, "pass_level=%u"},
934 {Opt_err, NULL}
935};
936
937static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
938 const char *page, ssize_t count)
939{
940 struct tcmu_dev *udev = TCMU_DEV(dev);
941 char *orig, *ptr, *opts, *arg_p;
942 substring_t args[MAX_OPT_ARGS];
943 int ret = 0, token;
944 int arg;
945
946 opts = kstrdup(page, GFP_KERNEL);
947 if (!opts)
948 return -ENOMEM;
949
950 orig = opts;
951
952 while ((ptr = strsep(&opts, ",\n")) != NULL) {
953 if (!*ptr)
954 continue;
955
956 token = match_token(ptr, tokens, args);
957 switch (token) {
958 case Opt_dev_config:
959 if (match_strlcpy(udev->dev_config, &args[0],
960 TCMU_CONFIG_LEN) == 0) {
961 ret = -EINVAL;
962 break;
963 }
964 pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config);
965 break;
966 case Opt_dev_size:
967 arg_p = match_strdup(&args[0]);
968 if (!arg_p) {
969 ret = -ENOMEM;
970 break;
971 }
972 ret = kstrtoul(arg_p, 0, (unsigned long *) &udev->dev_size);
973 kfree(arg_p);
974 if (ret < 0)
975 pr_err("kstrtoul() failed for dev_size=\n");
976 break;
977 case Opt_pass_level:
978 match_int(args, &arg);
979 if (arg >= TCMU_PASS_INVALID) {
980 pr_warn("TCMU: Invalid pass_level: %d\n", arg);
981 break;
982 }
983
984 pr_debug("TCMU: Setting pass_level to %d\n", arg);
985 udev->pass_level = arg;
986 break;
987 default:
988 break;
989 }
990 }
991
992 kfree(orig);
993 return (!ret) ? count : ret;
994}
995
996static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
997{
998 struct tcmu_dev *udev = TCMU_DEV(dev);
999 ssize_t bl = 0;
1000
1001 bl = sprintf(b + bl, "Config: %s ",
1002 udev->dev_config[0] ? udev->dev_config : "NULL");
1003 bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n",
1004 udev->dev_size, udev->pass_level);
1005
1006 return bl;
1007}
1008
1009static sector_t tcmu_get_blocks(struct se_device *dev)
1010{
1011 struct tcmu_dev *udev = TCMU_DEV(dev);
1012
1013 return div_u64(udev->dev_size - dev->dev_attrib.block_size,
1014 dev->dev_attrib.block_size);
1015}
1016
1017static sense_reason_t
1018tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents,
1019 enum dma_data_direction data_direction)
1020{
1021 int ret;
1022
1023 ret = tcmu_queue_cmd(se_cmd);
1024
1025 if (ret != 0)
1026 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1027 else
1028 return TCM_NO_SENSE;
1029}
1030
1031static sense_reason_t
1032tcmu_pass_op(struct se_cmd *se_cmd)
1033{
1034 int ret = tcmu_queue_cmd(se_cmd);
1035
1036 if (ret != 0)
1037 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1038 else
1039 return TCM_NO_SENSE;
1040}
1041
1042static struct sbc_ops tcmu_sbc_ops = {
1043 .execute_rw = tcmu_execute_rw,
1044 .execute_sync_cache = tcmu_pass_op,
1045 .execute_write_same = tcmu_pass_op,
1046 .execute_write_same_unmap = tcmu_pass_op,
1047 .execute_unmap = tcmu_pass_op,
1048};
1049
1050static sense_reason_t
1051tcmu_parse_cdb(struct se_cmd *cmd)
1052{
1053 unsigned char *cdb = cmd->t_task_cdb;
1054 struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev);
1055 sense_reason_t ret;
1056
1057 switch (udev->pass_level) {
1058 case TCMU_PASS_ALL:
1059 /* We're just like pscsi, then */
1060 /*
1061 * For REPORT LUNS we always need to emulate the response, for everything
1062 * else, pass it up.
1063 */
1064 switch (cdb[0]) {
1065 case REPORT_LUNS:
1066 cmd->execute_cmd = spc_emulate_report_luns;
1067 break;
1068 case READ_6:
1069 case READ_10:
1070 case READ_12:
1071 case READ_16:
1072 case WRITE_6:
1073 case WRITE_10:
1074 case WRITE_12:
1075 case WRITE_16:
1076 case WRITE_VERIFY:
1077 cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
1078 /* FALLTHROUGH */
1079 default:
1080 cmd->execute_cmd = tcmu_pass_op;
1081 }
1082 ret = TCM_NO_SENSE;
1083 break;
1084 case TCMU_PASS_IO:
1085 ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops);
1086 break;
1087 default:
1088 pr_err("Unknown tcm-user pass level %d\n", udev->pass_level);
1089 ret = TCM_CHECK_CONDITION_ABORT_CMD;
1090 }
1091
1092 return ret;
1093}
1094
1095static struct se_subsystem_api tcmu_template = {
1096 .name = "user",
1097 .inquiry_prod = "USER",
1098 .inquiry_rev = TCMU_VERSION,
1099 .owner = THIS_MODULE,
1100 .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV,
1101 .attach_hba = tcmu_attach_hba,
1102 .detach_hba = tcmu_detach_hba,
1103 .alloc_device = tcmu_alloc_device,
1104 .configure_device = tcmu_configure_device,
1105 .free_device = tcmu_free_device,
1106 .parse_cdb = tcmu_parse_cdb,
1107 .set_configfs_dev_params = tcmu_set_configfs_dev_params,
1108 .show_configfs_dev_params = tcmu_show_configfs_dev_params,
1109 .get_device_type = sbc_get_device_type,
1110 .get_blocks = tcmu_get_blocks,
1111};
1112
1113static int __init tcmu_module_init(void)
1114{
1115 int ret;
1116
1117 BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
1118
1119 tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
1120 sizeof(struct tcmu_cmd),
1121 __alignof__(struct tcmu_cmd),
1122 0, NULL);
1123 if (!tcmu_cmd_cache)
1124 return -ENOMEM;
1125
1126 tcmu_root_device = root_device_register("tcm_user");
1127 if (IS_ERR(tcmu_root_device)) {
1128 ret = PTR_ERR(tcmu_root_device);
1129 goto out_free_cache;
1130 }
1131
1132 ret = genl_register_family(&tcmu_genl_family);
1133 if (ret < 0) {
1134 goto out_unreg_device;
1135 }
1136
1137 ret = transport_subsystem_register(&tcmu_template);
1138 if (ret)
1139 goto out_unreg_genl;
1140
1141 return 0;
1142
1143out_unreg_genl:
1144 genl_unregister_family(&tcmu_genl_family);
1145out_unreg_device:
1146 root_device_unregister(tcmu_root_device);
1147out_free_cache:
1148 kmem_cache_destroy(tcmu_cmd_cache);
1149
1150 return ret;
1151}
1152
1153static void __exit tcmu_module_exit(void)
1154{
1155 transport_subsystem_release(&tcmu_template);
1156 genl_unregister_family(&tcmu_genl_family);
1157 root_device_unregister(tcmu_root_device);
1158 kmem_cache_destroy(tcmu_cmd_cache);
1159}
1160
1161MODULE_DESCRIPTION("TCM USER subsystem plugin");
1162MODULE_AUTHOR("Shaohua Li <shli@kernel.org>");
1163MODULE_AUTHOR("Andy Grover <agrover@redhat.com>");
1164MODULE_LICENSE("GPL");
1165
1166module_init(tcmu_module_init);
1167module_exit(tcmu_module_exit);
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index 21ce50880c79..ccee7e332a4d 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -98,7 +98,7 @@ static void ft_tport_delete(struct ft_tport *tport)
98 ft_sess_delete_all(tport); 98 ft_sess_delete_all(tport);
99 lport = tport->lport; 99 lport = tport->lport;
100 BUG_ON(tport != lport->prov[FC_TYPE_FCP]); 100 BUG_ON(tport != lport->prov[FC_TYPE_FCP]);
101 rcu_assign_pointer(lport->prov[FC_TYPE_FCP], NULL); 101 RCU_INIT_POINTER(lport->prov[FC_TYPE_FCP], NULL);
102 102
103 tpg = tport->tpg; 103 tpg = tport->tpg;
104 if (tpg) { 104 if (tpg) {
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index ef5587fe2c69..f554d25b4399 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -84,6 +84,16 @@ config THERMAL_GOV_STEP_WISE
84 Enable this to manage platform thermals using a simple linear 84 Enable this to manage platform thermals using a simple linear
85 governor. 85 governor.
86 86
87config THERMAL_GOV_BANG_BANG
88 bool "Bang Bang thermal governor"
89 default n
90 help
91 Enable this to manage platform thermals using bang bang governor.
92
93 Say 'Y' here if you want to use two point temperature regulation
94 used for fans without throttling. Some fan drivers depend on this
95 governor to be enabled (e.g. acerhdf).
96
87config THERMAL_GOV_USER_SPACE 97config THERMAL_GOV_USER_SPACE
88 bool "User_space thermal governor" 98 bool "User_space thermal governor"
89 help 99 help
@@ -207,21 +217,6 @@ config X86_PKG_TEMP_THERMAL
207 two trip points which can be set by user to get notifications via thermal 217 two trip points which can be set by user to get notifications via thermal
208 notification methods. 218 notification methods.
209 219
210config ACPI_INT3403_THERMAL
211 tristate "ACPI INT3403 thermal driver"
212 depends on X86 && ACPI
213 help
214 Newer laptops and tablets that use ACPI may have thermal sensors
215 outside the core CPU/SOC for thermal safety reasons. These
216 temperature sensors are also exposed for the OS to use via the so
217 called INT3403 ACPI object. This driver will, on devices that have
218 such sensors, expose the temperature information from these sensors
219 to userspace via the normal thermal framework. This means that a wide
220 range of applications and GUI widgets can show this information to
221 the user or use this information for making decisions. For example,
222 the Intel Thermal Daemon can use this information to allow the user
223 to select his laptop to run without turning on the fans.
224
225config INTEL_SOC_DTS_THERMAL 220config INTEL_SOC_DTS_THERMAL
226 tristate "Intel SoCs DTS thermal driver" 221 tristate "Intel SoCs DTS thermal driver"
227 depends on X86 && IOSF_MBI 222 depends on X86 && IOSF_MBI
@@ -234,6 +229,30 @@ config INTEL_SOC_DTS_THERMAL
234 notification methods.The other trip is a critical trip point, which 229 notification methods.The other trip is a critical trip point, which
235 was set by the driver based on the TJ MAX temperature. 230 was set by the driver based on the TJ MAX temperature.
236 231
232config INT340X_THERMAL
233 tristate "ACPI INT340X thermal drivers"
234 depends on X86 && ACPI
235 select THERMAL_GOV_USER_SPACE
236 select ACPI_THERMAL_REL
237 select ACPI_FAN
238 help
239 Newer laptops and tablets that use ACPI may have thermal sensors and
240 other devices with thermal control capabilities outside the core
241 CPU/SOC, for thermal safety reasons.
242 They are exposed for the OS to use via the INT3400 ACPI device object
243 as the master, and INT3401~INT340B ACPI device objects as the slaves.
244 Enable this to expose the temperature information and cooling ability
245 from these objects to userspace via the normal thermal framework.
246 This means that a wide range of applications and GUI widgets can show
247 the information to the user or use this information for making
248 decisions. For example, the Intel Thermal Daemon can use this
249 information to allow the user to select his laptop to run without
250 turning on the fans.
251
252config ACPI_THERMAL_REL
253 tristate
254 depends on ACPI
255
237menu "Texas Instruments thermal drivers" 256menu "Texas Instruments thermal drivers"
238source "drivers/thermal/ti-soc-thermal/Kconfig" 257source "drivers/thermal/ti-soc-thermal/Kconfig"
239endmenu 258endmenu
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 31e232f84b6b..39c4fe87da2f 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -11,6 +11,7 @@ thermal_sys-$(CONFIG_THERMAL_OF) += of-thermal.o
11 11
12# governors 12# governors
13thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o 13thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o
14thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG) += gov_bang_bang.o
14thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o 15thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o
15thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o 16thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o
16 17
@@ -31,5 +32,5 @@ obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o
31obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o 32obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o
32obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o 33obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o
33obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ 34obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/
34obj-$(CONFIG_ACPI_INT3403_THERMAL) += int3403_thermal.o 35obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
35obj-$(CONFIG_ST_THERMAL) += st/ 36obj-$(CONFIG_ST_THERMAL) += st/
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 944ba2f340c8..6e0a3fbfae86 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <linux/thermal.h> 25#include <linux/thermal.h>
26#include <trace/events/thermal.h>
26 27
27#include "thermal_core.h" 28#include "thermal_core.h"
28 29
@@ -34,6 +35,7 @@ static int get_trip_level(struct thermal_zone_device *tz)
34{ 35{
35 int count = 0; 36 int count = 0;
36 unsigned long trip_temp; 37 unsigned long trip_temp;
38 enum thermal_trip_type trip_type;
37 39
38 if (tz->trips == 0 || !tz->ops->get_trip_temp) 40 if (tz->trips == 0 || !tz->ops->get_trip_temp)
39 return 0; 41 return 0;
@@ -43,6 +45,16 @@ static int get_trip_level(struct thermal_zone_device *tz)
43 if (tz->temperature < trip_temp) 45 if (tz->temperature < trip_temp)
44 break; 46 break;
45 } 47 }
48
49 /*
50 * count > 0 only if temperature is greater than first trip
51 * point, in which case, trip_point = count - 1
52 */
53 if (count > 0) {
54 tz->ops->get_trip_type(tz, count - 1, &trip_type);
55 trace_thermal_zone_trip(tz, count - 1, trip_type);
56 }
57
46 return count; 58 return count;
47} 59}
48 60
diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c
new file mode 100644
index 000000000000..c5dd76b2ee74
--- /dev/null
+++ b/drivers/thermal/gov_bang_bang.c
@@ -0,0 +1,131 @@
1/*
2 * gov_bang_bang.c - A simple thermal throttling governor using hysteresis
3 *
4 * Copyright (C) 2014 Peter Feuerer <peter@piie.net>
5 *
6 * Based on step_wise.c with following Copyrights:
7 * Copyright (C) 2012 Intel Corp
8 * Copyright (C) 2012 Durgadoss R <durgadoss.r@intel.com>
9 *
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, version 2.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU General Public License for more details.
19 *
20 */
21
22#include <linux/thermal.h>
23
24#include "thermal_core.h"
25
26static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
27{
28 long trip_temp;
29 unsigned long trip_hyst;
30 struct thermal_instance *instance;
31
32 tz->ops->get_trip_temp(tz, trip, &trip_temp);
33 tz->ops->get_trip_hyst(tz, trip, &trip_hyst);
34
35 dev_dbg(&tz->device, "Trip%d[temp=%ld]:temp=%d:hyst=%ld\n",
36 trip, trip_temp, tz->temperature,
37 trip_hyst);
38
39 mutex_lock(&tz->lock);
40
41 list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
42 if (instance->trip != trip)
43 continue;
44
45 /* in case fan is in initial state, switch the fan off */
46 if (instance->target == THERMAL_NO_TARGET)
47 instance->target = 0;
48
49 /* in case fan is neither on nor off set the fan to active */
50 if (instance->target != 0 && instance->target != 1) {
51 pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
52 instance->name, instance->target);
53 instance->target = 1;
54 }
55
56 /*
57 * enable fan when temperature exceeds trip_temp and disable
58 * the fan in case it falls below trip_temp minus hysteresis
59 */
60 if (instance->target == 0 && tz->temperature >= trip_temp)
61 instance->target = 1;
62 else if (instance->target == 1 &&
63 tz->temperature < trip_temp - trip_hyst)
64 instance->target = 0;
65
66 dev_dbg(&instance->cdev->device, "target=%d\n",
67 (int)instance->target);
68
69 instance->cdev->updated = false; /* cdev needs update */
70 }
71
72 mutex_unlock(&tz->lock);
73}
74
75/**
76 * bang_bang_control - controls devices associated with the given zone
77 * @tz - thermal_zone_device
78 * @trip - the trip point
79 *
80 * Regulation Logic: a two point regulation, deliver cooling state depending
81 * on the previous state shown in this diagram:
82 *
83 * Fan: OFF ON
84 *
85 * |
86 * |
87 * trip_temp: +---->+
88 * | | ^
89 * | | |
90 * | | Temperature
91 * (trip_temp - hyst): +<----+
92 * |
93 * |
94 * |
95 *
96 * * If the fan is not running and temperature exceeds trip_temp, the fan
97 * gets turned on.
98 * * In case the fan is running, temperature must fall below
99 * (trip_temp - hyst) so that the fan gets turned off again.
100 *
101 */
102static int bang_bang_control(struct thermal_zone_device *tz, int trip)
103{
104 struct thermal_instance *instance;
105
106 thermal_zone_trip_update(tz, trip);
107
108 mutex_lock(&tz->lock);
109
110 list_for_each_entry(instance, &tz->thermal_instances, tz_node)
111 thermal_cdev_update(instance->cdev);
112
113 mutex_unlock(&tz->lock);
114
115 return 0;
116}
117
118static struct thermal_governor thermal_gov_bang_bang = {
119 .name = "bang_bang",
120 .throttle = bang_bang_control,
121};
122
123int thermal_gov_bang_bang_register(void)
124{
125 return thermal_register_governor(&thermal_gov_bang_bang);
126}
127
128void thermal_gov_bang_bang_unregister(void)
129{
130 thermal_unregister_governor(&thermal_gov_bang_bang);
131}
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 2c516f2eebed..461bf3d033a0 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -19,6 +19,7 @@
19#include <linux/mfd/syscon.h> 19#include <linux/mfd/syscon.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/of.h> 21#include <linux/of.h>
22#include <linux/of_device.h>
22#include <linux/platform_device.h> 23#include <linux/platform_device.h>
23#include <linux/regmap.h> 24#include <linux/regmap.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
@@ -31,6 +32,11 @@
31 32
32#define MISC0 0x0150 33#define MISC0 0x0150
33#define MISC0_REFTOP_SELBIASOFF (1 << 3) 34#define MISC0_REFTOP_SELBIASOFF (1 << 3)
35#define MISC1 0x0160
36#define MISC1_IRQ_TEMPHIGH (1 << 29)
37/* Below LOW and PANIC bits are only for TEMPMON_IMX6SX */
38#define MISC1_IRQ_TEMPLOW (1 << 28)
39#define MISC1_IRQ_TEMPPANIC (1 << 27)
34 40
35#define TEMPSENSE0 0x0180 41#define TEMPSENSE0 0x0180
36#define TEMPSENSE0_ALARM_VALUE_SHIFT 20 42#define TEMPSENSE0_ALARM_VALUE_SHIFT 20
@@ -43,6 +49,12 @@
43 49
44#define TEMPSENSE1 0x0190 50#define TEMPSENSE1 0x0190
45#define TEMPSENSE1_MEASURE_FREQ 0xffff 51#define TEMPSENSE1_MEASURE_FREQ 0xffff
52/* Below TEMPSENSE2 is only for TEMPMON_IMX6SX */
53#define TEMPSENSE2 0x0290
54#define TEMPSENSE2_LOW_VALUE_SHIFT 0
55#define TEMPSENSE2_LOW_VALUE_MASK 0xfff
56#define TEMPSENSE2_PANIC_VALUE_SHIFT 16
57#define TEMPSENSE2_PANIC_VALUE_MASK 0xfff0000
46 58
47#define OCOTP_ANA1 0x04e0 59#define OCOTP_ANA1 0x04e0
48 60
@@ -66,6 +78,21 @@ enum imx_thermal_trip {
66#define FACTOR1 15976 78#define FACTOR1 15976
67#define FACTOR2 4297157 79#define FACTOR2 4297157
68 80
81#define TEMPMON_IMX6Q 1
82#define TEMPMON_IMX6SX 2
83
84struct thermal_soc_data {
85 u32 version;
86};
87
88static struct thermal_soc_data thermal_imx6q_data = {
89 .version = TEMPMON_IMX6Q,
90};
91
92static struct thermal_soc_data thermal_imx6sx_data = {
93 .version = TEMPMON_IMX6SX,
94};
95
69struct imx_thermal_data { 96struct imx_thermal_data {
70 struct thermal_zone_device *tz; 97 struct thermal_zone_device *tz;
71 struct thermal_cooling_device *cdev; 98 struct thermal_cooling_device *cdev;
@@ -79,8 +106,21 @@ struct imx_thermal_data {
79 bool irq_enabled; 106 bool irq_enabled;
80 int irq; 107 int irq;
81 struct clk *thermal_clk; 108 struct clk *thermal_clk;
109 const struct thermal_soc_data *socdata;
82}; 110};
83 111
112static void imx_set_panic_temp(struct imx_thermal_data *data,
113 signed long panic_temp)
114{
115 struct regmap *map = data->tempmon;
116 int critical_value;
117
118 critical_value = (data->c2 - panic_temp) / data->c1;
119 regmap_write(map, TEMPSENSE2 + REG_CLR, TEMPSENSE2_PANIC_VALUE_MASK);
120 regmap_write(map, TEMPSENSE2 + REG_SET, critical_value <<
121 TEMPSENSE2_PANIC_VALUE_SHIFT);
122}
123
84static void imx_set_alarm_temp(struct imx_thermal_data *data, 124static void imx_set_alarm_temp(struct imx_thermal_data *data,
85 signed long alarm_temp) 125 signed long alarm_temp)
86{ 126{
@@ -142,13 +182,17 @@ static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
142 /* See imx_get_sensor_data() for formula derivation */ 182 /* See imx_get_sensor_data() for formula derivation */
143 *temp = data->c2 - n_meas * data->c1; 183 *temp = data->c2 - n_meas * data->c1;
144 184
145 /* Update alarm value to next higher trip point */ 185 /* Update alarm value to next higher trip point for TEMPMON_IMX6Q */
146 if (data->alarm_temp == data->temp_passive && *temp >= data->temp_passive) 186 if (data->socdata->version == TEMPMON_IMX6Q) {
147 imx_set_alarm_temp(data, data->temp_critical); 187 if (data->alarm_temp == data->temp_passive &&
148 if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) { 188 *temp >= data->temp_passive)
149 imx_set_alarm_temp(data, data->temp_passive); 189 imx_set_alarm_temp(data, data->temp_critical);
150 dev_dbg(&tz->device, "thermal alarm off: T < %lu\n", 190 if (data->alarm_temp == data->temp_critical &&
151 data->alarm_temp / 1000); 191 *temp < data->temp_passive) {
192 imx_set_alarm_temp(data, data->temp_passive);
193 dev_dbg(&tz->device, "thermal alarm off: T < %lu\n",
194 data->alarm_temp / 1000);
195 }
152 } 196 }
153 197
154 if (*temp != data->last_temp) { 198 if (*temp != data->last_temp) {
@@ -398,8 +442,17 @@ static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev)
398 return IRQ_HANDLED; 442 return IRQ_HANDLED;
399} 443}
400 444
445static const struct of_device_id of_imx_thermal_match[] = {
446 { .compatible = "fsl,imx6q-tempmon", .data = &thermal_imx6q_data, },
447 { .compatible = "fsl,imx6sx-tempmon", .data = &thermal_imx6sx_data, },
448 { /* end */ }
449};
450MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
451
401static int imx_thermal_probe(struct platform_device *pdev) 452static int imx_thermal_probe(struct platform_device *pdev)
402{ 453{
454 const struct of_device_id *of_id =
455 of_match_device(of_imx_thermal_match, &pdev->dev);
403 struct imx_thermal_data *data; 456 struct imx_thermal_data *data;
404 struct cpumask clip_cpus; 457 struct cpumask clip_cpus;
405 struct regmap *map; 458 struct regmap *map;
@@ -418,6 +471,20 @@ static int imx_thermal_probe(struct platform_device *pdev)
418 } 471 }
419 data->tempmon = map; 472 data->tempmon = map;
420 473
474 data->socdata = of_id->data;
475
476 /* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
477 if (data->socdata->version == TEMPMON_IMX6SX) {
478 regmap_write(map, MISC1 + REG_CLR, MISC1_IRQ_TEMPHIGH |
479 MISC1_IRQ_TEMPLOW | MISC1_IRQ_TEMPPANIC);
480 /*
481 * reset value of LOW ALARM is incorrect, set it to lowest
482 * value to avoid false trigger of low alarm.
483 */
484 regmap_write(map, TEMPSENSE2 + REG_SET,
485 TEMPSENSE2_LOW_VALUE_MASK);
486 }
487
421 data->irq = platform_get_irq(pdev, 0); 488 data->irq = platform_get_irq(pdev, 0);
422 if (data->irq < 0) 489 if (data->irq < 0)
423 return data->irq; 490 return data->irq;
@@ -489,6 +556,10 @@ static int imx_thermal_probe(struct platform_device *pdev)
489 measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */ 556 measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */
490 regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq); 557 regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq);
491 imx_set_alarm_temp(data, data->temp_passive); 558 imx_set_alarm_temp(data, data->temp_passive);
559
560 if (data->socdata->version == TEMPMON_IMX6SX)
561 imx_set_panic_temp(data, data->temp_critical);
562
492 regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); 563 regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
493 regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); 564 regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
494 565
@@ -550,12 +621,6 @@ static int imx_thermal_resume(struct device *dev)
550static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops, 621static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops,
551 imx_thermal_suspend, imx_thermal_resume); 622 imx_thermal_suspend, imx_thermal_resume);
552 623
553static const struct of_device_id of_imx_thermal_match[] = {
554 { .compatible = "fsl,imx6q-tempmon", },
555 { /* end */ }
556};
557MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
558
559static struct platform_driver imx_thermal = { 624static struct platform_driver imx_thermal = {
560 .driver = { 625 .driver = {
561 .name = "imx_thermal", 626 .name = "imx_thermal",
diff --git a/drivers/thermal/int3403_thermal.c b/drivers/thermal/int3403_thermal.c
deleted file mode 100644
index 17554eeb3953..000000000000
--- a/drivers/thermal/int3403_thermal.c
+++ /dev/null
@@ -1,296 +0,0 @@
1/*
2 * ACPI INT3403 thermal driver
3 * Copyright (c) 2013, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/init.h>
18#include <linux/types.h>
19#include <linux/acpi.h>
20#include <linux/thermal.h>
21
22#define INT3403_TYPE_SENSOR 0x03
23#define INT3403_PERF_CHANGED_EVENT 0x80
24#define INT3403_THERMAL_EVENT 0x90
25
26#define DECI_KELVIN_TO_MILLI_CELSIUS(t, off) (((t) - (off)) * 100)
27#define KELVIN_OFFSET 2732
28#define MILLI_CELSIUS_TO_DECI_KELVIN(t, off) (((t) / 100) + (off))
29
30#define ACPI_INT3403_CLASS "int3403"
31#define ACPI_INT3403_FILE_STATE "state"
32
33struct int3403_sensor {
34 struct thermal_zone_device *tzone;
35 unsigned long *thresholds;
36 unsigned long crit_temp;
37 int crit_trip_id;
38 unsigned long psv_temp;
39 int psv_trip_id;
40};
41
42static int sys_get_curr_temp(struct thermal_zone_device *tzone,
43 unsigned long *temp)
44{
45 struct acpi_device *device = tzone->devdata;
46 unsigned long long tmp;
47 acpi_status status;
48
49 status = acpi_evaluate_integer(device->handle, "_TMP", NULL, &tmp);
50 if (ACPI_FAILURE(status))
51 return -EIO;
52
53 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(tmp, KELVIN_OFFSET);
54
55 return 0;
56}
57
58static int sys_get_trip_hyst(struct thermal_zone_device *tzone,
59 int trip, unsigned long *temp)
60{
61 struct acpi_device *device = tzone->devdata;
62 unsigned long long hyst;
63 acpi_status status;
64
65 status = acpi_evaluate_integer(device->handle, "GTSH", NULL, &hyst);
66 if (ACPI_FAILURE(status))
67 return -EIO;
68
69 /*
70 * Thermal hysteresis represents a temperature difference.
71 * Kelvin and Celsius have same degree size. So the
72 * conversion here between tenths of degree Kelvin unit
73 * and Milli-Celsius unit is just to multiply 100.
74 */
75 *temp = hyst * 100;
76
77 return 0;
78}
79
80static int sys_get_trip_temp(struct thermal_zone_device *tzone,
81 int trip, unsigned long *temp)
82{
83 struct acpi_device *device = tzone->devdata;
84 struct int3403_sensor *obj = acpi_driver_data(device);
85
86 if (trip == obj->crit_trip_id)
87 *temp = obj->crit_temp;
88 else if (trip == obj->psv_trip_id)
89 *temp = obj->psv_temp;
90 else {
91 /*
92 * get_trip_temp is a mandatory callback but
93 * PATx method doesn't return any value, so return
94 * cached value, which was last set from user space.
95 */
96 *temp = obj->thresholds[trip];
97 }
98
99 return 0;
100}
101
102static int sys_get_trip_type(struct thermal_zone_device *thermal,
103 int trip, enum thermal_trip_type *type)
104{
105 struct acpi_device *device = thermal->devdata;
106 struct int3403_sensor *obj = acpi_driver_data(device);
107
108 /* Mandatory callback, may not mean much here */
109 if (trip == obj->crit_trip_id)
110 *type = THERMAL_TRIP_CRITICAL;
111 else
112 *type = THERMAL_TRIP_PASSIVE;
113
114 return 0;
115}
116
117int sys_set_trip_temp(struct thermal_zone_device *tzone, int trip,
118 unsigned long temp)
119{
120 struct acpi_device *device = tzone->devdata;
121 acpi_status status;
122 char name[10];
123 int ret = 0;
124 struct int3403_sensor *obj = acpi_driver_data(device);
125
126 snprintf(name, sizeof(name), "PAT%d", trip);
127 if (acpi_has_method(device->handle, name)) {
128 status = acpi_execute_simple_method(device->handle, name,
129 MILLI_CELSIUS_TO_DECI_KELVIN(temp,
130 KELVIN_OFFSET));
131 if (ACPI_FAILURE(status))
132 ret = -EIO;
133 else
134 obj->thresholds[trip] = temp;
135 } else {
136 ret = -EIO;
137 dev_err(&device->dev, "sys_set_trip_temp: method not found\n");
138 }
139
140 return ret;
141}
142
143static struct thermal_zone_device_ops tzone_ops = {
144 .get_temp = sys_get_curr_temp,
145 .get_trip_temp = sys_get_trip_temp,
146 .get_trip_type = sys_get_trip_type,
147 .set_trip_temp = sys_set_trip_temp,
148 .get_trip_hyst = sys_get_trip_hyst,
149};
150
151static void acpi_thermal_notify(struct acpi_device *device, u32 event)
152{
153 struct int3403_sensor *obj;
154
155 if (!device)
156 return;
157
158 obj = acpi_driver_data(device);
159 if (!obj)
160 return;
161
162 switch (event) {
163 case INT3403_PERF_CHANGED_EVENT:
164 break;
165 case INT3403_THERMAL_EVENT:
166 thermal_zone_device_update(obj->tzone);
167 break;
168 default:
169 dev_err(&device->dev, "Unsupported event [0x%x]\n", event);
170 break;
171 }
172}
173
174static int sys_get_trip_crt(struct acpi_device *device, unsigned long *temp)
175{
176 unsigned long long crt;
177 acpi_status status;
178
179 status = acpi_evaluate_integer(device->handle, "_CRT", NULL, &crt);
180 if (ACPI_FAILURE(status))
181 return -EIO;
182
183 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(crt, KELVIN_OFFSET);
184
185 return 0;
186}
187
188static int sys_get_trip_psv(struct acpi_device *device, unsigned long *temp)
189{
190 unsigned long long psv;
191 acpi_status status;
192
193 status = acpi_evaluate_integer(device->handle, "_PSV", NULL, &psv);
194 if (ACPI_FAILURE(status))
195 return -EIO;
196
197 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(psv, KELVIN_OFFSET);
198
199 return 0;
200}
201
202static int acpi_int3403_add(struct acpi_device *device)
203{
204 int result = 0;
205 unsigned long long ptyp;
206 acpi_status status;
207 struct int3403_sensor *obj;
208 unsigned long long trip_cnt;
209 int trip_mask = 0;
210
211 if (!device)
212 return -EINVAL;
213
214 status = acpi_evaluate_integer(device->handle, "PTYP", NULL, &ptyp);
215 if (ACPI_FAILURE(status))
216 return -EINVAL;
217
218 if (ptyp != INT3403_TYPE_SENSOR)
219 return -EINVAL;
220
221 obj = devm_kzalloc(&device->dev, sizeof(*obj), GFP_KERNEL);
222 if (!obj)
223 return -ENOMEM;
224
225 device->driver_data = obj;
226
227 status = acpi_evaluate_integer(device->handle, "PATC", NULL,
228 &trip_cnt);
229 if (ACPI_FAILURE(status))
230 trip_cnt = 0;
231
232 if (trip_cnt) {
233 /* We have to cache, thresholds can't be readback */
234 obj->thresholds = devm_kzalloc(&device->dev,
235 sizeof(*obj->thresholds) * trip_cnt,
236 GFP_KERNEL);
237 if (!obj->thresholds)
238 return -ENOMEM;
239 trip_mask = BIT(trip_cnt) - 1;
240 }
241
242 obj->psv_trip_id = -1;
243 if (!sys_get_trip_psv(device, &obj->psv_temp))
244 obj->psv_trip_id = trip_cnt++;
245
246 obj->crit_trip_id = -1;
247 if (!sys_get_trip_crt(device, &obj->crit_temp))
248 obj->crit_trip_id = trip_cnt++;
249
250 obj->tzone = thermal_zone_device_register(acpi_device_bid(device),
251 trip_cnt, trip_mask, device, &tzone_ops,
252 NULL, 0, 0);
253 if (IS_ERR(obj->tzone)) {
254 result = PTR_ERR(obj->tzone);
255 return result;
256 }
257
258 strcpy(acpi_device_name(device), "INT3403");
259 strcpy(acpi_device_class(device), ACPI_INT3403_CLASS);
260
261 return 0;
262}
263
264static int acpi_int3403_remove(struct acpi_device *device)
265{
266 struct int3403_sensor *obj;
267
268 obj = acpi_driver_data(device);
269 thermal_zone_device_unregister(obj->tzone);
270
271 return 0;
272}
273
274ACPI_MODULE_NAME("int3403");
275static const struct acpi_device_id int3403_device_ids[] = {
276 {"INT3403", 0},
277 {"", 0},
278};
279MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
280
281static struct acpi_driver acpi_int3403_driver = {
282 .name = "INT3403",
283 .class = ACPI_INT3403_CLASS,
284 .ids = int3403_device_ids,
285 .ops = {
286 .add = acpi_int3403_add,
287 .remove = acpi_int3403_remove,
288 .notify = acpi_thermal_notify,
289 },
290};
291
292module_acpi_driver(acpi_int3403_driver);
293
294MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
295MODULE_LICENSE("GPL v2");
296MODULE_DESCRIPTION("ACPI INT3403 thermal driver");
diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
new file mode 100644
index 000000000000..ffe40bffaf1a
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_INT340X_THERMAL) += int3400_thermal.o
2obj-$(CONFIG_INT340X_THERMAL) += int3402_thermal.o
3obj-$(CONFIG_INT340X_THERMAL) += int3403_thermal.o
4obj-$(CONFIG_ACPI_THERMAL_REL) += acpi_thermal_rel.o
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
new file mode 100644
index 000000000000..0d8db808f0ae
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
@@ -0,0 +1,400 @@
1/* acpi_thermal_rel.c driver for exporting ACPI thermal relationship
2 *
3 * Copyright (c) 2014 Intel Corp
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 *
9 */
10
11/*
12 * Two functionalities included:
13 * 1. Export _TRT, _ART, via misc device interface to the userspace.
14 * 2. Provide parsing result to kernel drivers
15 *
16 */
17#include <linux/init.h>
18#include <linux/export.h>
19#include <linux/module.h>
20#include <linux/device.h>
21#include <linux/platform_device.h>
22#include <linux/io.h>
23#include <linux/acpi.h>
24#include <linux/uaccess.h>
25#include <linux/miscdevice.h>
26#include "acpi_thermal_rel.h"
27
28static acpi_handle acpi_thermal_rel_handle;
29static DEFINE_SPINLOCK(acpi_thermal_rel_chrdev_lock);
30static int acpi_thermal_rel_chrdev_count; /* #times opened */
31static int acpi_thermal_rel_chrdev_exclu; /* already open exclusive? */
32
33static int acpi_thermal_rel_open(struct inode *inode, struct file *file)
34{
35 spin_lock(&acpi_thermal_rel_chrdev_lock);
36 if (acpi_thermal_rel_chrdev_exclu ||
37 (acpi_thermal_rel_chrdev_count && (file->f_flags & O_EXCL))) {
38 spin_unlock(&acpi_thermal_rel_chrdev_lock);
39 return -EBUSY;
40 }
41
42 if (file->f_flags & O_EXCL)
43 acpi_thermal_rel_chrdev_exclu = 1;
44 acpi_thermal_rel_chrdev_count++;
45
46 spin_unlock(&acpi_thermal_rel_chrdev_lock);
47
48 return nonseekable_open(inode, file);
49}
50
51static int acpi_thermal_rel_release(struct inode *inode, struct file *file)
52{
53 spin_lock(&acpi_thermal_rel_chrdev_lock);
54 acpi_thermal_rel_chrdev_count--;
55 acpi_thermal_rel_chrdev_exclu = 0;
56 spin_unlock(&acpi_thermal_rel_chrdev_lock);
57
58 return 0;
59}
60
61/**
62 * acpi_parse_trt - Thermal Relationship Table _TRT for passive cooling
63 *
64 * @handle: ACPI handle of the device contains _TRT
65 * @art_count: the number of valid entries resulted from parsing _TRT
66 * @artp: pointer to pointer of array of art entries in parsing result
67 * @create_dev: whether to create platform devices for target and source
68 *
69 */
70int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
71 bool create_dev)
72{
73 acpi_status status;
74 int result = 0;
75 int i;
76 int nr_bad_entries = 0;
77 struct trt *trts;
78 struct acpi_device *adev;
79 union acpi_object *p;
80 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
81 struct acpi_buffer element = { 0, NULL };
82 struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
83
84 if (!acpi_has_method(handle, "_TRT"))
85 return 0;
86
87 status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
88 if (ACPI_FAILURE(status))
89 return -ENODEV;
90
91 p = buffer.pointer;
92 if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
93 pr_err("Invalid _TRT data\n");
94 result = -EFAULT;
95 goto end;
96 }
97
98 *trt_count = p->package.count;
99 trts = kzalloc(*trt_count * sizeof(struct trt), GFP_KERNEL);
100 if (!trts) {
101 result = -ENOMEM;
102 goto end;
103 }
104
105 for (i = 0; i < *trt_count; i++) {
106 struct trt *trt = &trts[i - nr_bad_entries];
107
108 element.length = sizeof(struct trt);
109 element.pointer = trt;
110
111 status = acpi_extract_package(&(p->package.elements[i]),
112 &trt_format, &element);
113 if (ACPI_FAILURE(status)) {
114 nr_bad_entries++;
115 pr_warn("_TRT package %d is invalid, ignored\n", i);
116 continue;
117 }
118 if (!create_dev)
119 continue;
120
121 result = acpi_bus_get_device(trt->source, &adev);
122 if (!result)
123 acpi_create_platform_device(adev);
124 else
125 pr_warn("Failed to get source ACPI device\n");
126
127 result = acpi_bus_get_device(trt->target, &adev);
128 if (!result)
129 acpi_create_platform_device(adev);
130 else
131 pr_warn("Failed to get target ACPI device\n");
132 }
133
134 *trtp = trts;
135 /* don't count bad entries */
136 *trt_count -= nr_bad_entries;
137end:
138 kfree(buffer.pointer);
139 return result;
140}
141EXPORT_SYMBOL(acpi_parse_trt);
142
143/**
144 * acpi_parse_art - Parse Active Relationship Table _ART
145 *
146 * @handle: ACPI handle of the device contains _ART
147 * @art_count: the number of valid entries resulted from parsing _ART
148 * @artp: pointer to pointer of array of art entries in parsing result
149 * @create_dev: whether to create platform devices for target and source
150 *
151 */
152int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
153 bool create_dev)
154{
155 acpi_status status;
156 int result = 0;
157 int i;
158 int nr_bad_entries = 0;
159 struct art *arts;
160 struct acpi_device *adev;
161 union acpi_object *p;
162 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
163 struct acpi_buffer element = { 0, NULL };
164 struct acpi_buffer art_format = {
165 sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
166
167 if (!acpi_has_method(handle, "_ART"))
168 return 0;
169
170 status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
171 if (ACPI_FAILURE(status))
172 return -ENODEV;
173
174 p = buffer.pointer;
175 if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
176 pr_err("Invalid _ART data\n");
177 result = -EFAULT;
178 goto end;
179 }
180
181 /* ignore p->package.elements[0], as this is _ART Revision field */
182 *art_count = p->package.count - 1;
183 arts = kzalloc(*art_count * sizeof(struct art), GFP_KERNEL);
184 if (!arts) {
185 result = -ENOMEM;
186 goto end;
187 }
188
189 for (i = 0; i < *art_count; i++) {
190 struct art *art = &arts[i - nr_bad_entries];
191
192 element.length = sizeof(struct art);
193 element.pointer = art;
194
195 status = acpi_extract_package(&(p->package.elements[i + 1]),
196 &art_format, &element);
197 if (ACPI_FAILURE(status)) {
198 pr_warn("_ART package %d is invalid, ignored", i);
199 nr_bad_entries++;
200 continue;
201 }
202 if (!create_dev)
203 continue;
204
205 if (art->source) {
206 result = acpi_bus_get_device(art->source, &adev);
207 if (!result)
208 acpi_create_platform_device(adev);
209 else
210 pr_warn("Failed to get source ACPI device\n");
211 }
212 if (art->target) {
213 result = acpi_bus_get_device(art->target, &adev);
214 if (!result)
215 acpi_create_platform_device(adev);
216 else
217 pr_warn("Failed to get source ACPI device\n");
218 }
219 }
220
221 *artp = arts;
222 /* don't count bad entries */
223 *art_count -= nr_bad_entries;
224end:
225 kfree(buffer.pointer);
226 return result;
227}
228EXPORT_SYMBOL(acpi_parse_art);
229
230
231/* get device name from acpi handle */
232static void get_single_name(acpi_handle handle, char *name)
233{
234 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
235
236 if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)))
237 pr_warn("Failed get name from handle\n");
238 else {
239 memcpy(name, buffer.pointer, ACPI_NAME_SIZE);
240 kfree(buffer.pointer);
241 }
242}
243
244static int fill_art(char __user *ubuf)
245{
246 int i;
247 int ret;
248 int count;
249 int art_len;
250 struct art *arts = NULL;
251 union art_object *art_user;
252
253 ret = acpi_parse_art(acpi_thermal_rel_handle, &count, &arts, false);
254 if (ret)
255 goto free_art;
256 art_len = count * sizeof(union art_object);
257 art_user = kzalloc(art_len, GFP_KERNEL);
258 if (!art_user) {
259 ret = -ENOMEM;
260 goto free_art;
261 }
262 /* now fill in user art data */
263 for (i = 0; i < count; i++) {
264 /* userspace art needs device name instead of acpi reference */
265 get_single_name(arts[i].source, art_user[i].source_device);
266 get_single_name(arts[i].target, art_user[i].target_device);
267 /* copy the rest int data in addition to source and target */
268 memcpy(&art_user[i].weight, &arts[i].weight,
269 sizeof(u64) * (ACPI_NR_ART_ELEMENTS - 2));
270 }
271
272 if (copy_to_user(ubuf, art_user, art_len))
273 ret = -EFAULT;
274 kfree(art_user);
275free_art:
276 kfree(arts);
277 return ret;
278}
279
280static int fill_trt(char __user *ubuf)
281{
282 int i;
283 int ret;
284 int count;
285 int trt_len;
286 struct trt *trts = NULL;
287 union trt_object *trt_user;
288
289 ret = acpi_parse_trt(acpi_thermal_rel_handle, &count, &trts, false);
290 if (ret)
291 goto free_trt;
292 trt_len = count * sizeof(union trt_object);
293 trt_user = kzalloc(trt_len, GFP_KERNEL);
294 if (!trt_user) {
295 ret = -ENOMEM;
296 goto free_trt;
297 }
298 /* now fill in user trt data */
299 for (i = 0; i < count; i++) {
300 /* userspace trt needs device name instead of acpi reference */
301 get_single_name(trts[i].source, trt_user[i].source_device);
302 get_single_name(trts[i].target, trt_user[i].target_device);
303 trt_user[i].sample_period = trts[i].sample_period;
304 trt_user[i].influence = trts[i].influence;
305 }
306
307 if (copy_to_user(ubuf, trt_user, trt_len))
308 ret = -EFAULT;
309 kfree(trt_user);
310free_trt:
311 kfree(trts);
312 return ret;
313}
314
315static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
316 unsigned long __arg)
317{
318 int ret = 0;
319 unsigned long length = 0;
320 unsigned long count = 0;
321 char __user *arg = (void __user *)__arg;
322 struct trt *trts;
323 struct art *arts;
324
325 switch (cmd) {
326 case ACPI_THERMAL_GET_TRT_COUNT:
327 ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
328 &trts, false);
329 kfree(trts);
330 if (!ret)
331 return put_user(count, (unsigned long __user *)__arg);
332 return ret;
333 case ACPI_THERMAL_GET_TRT_LEN:
334 ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
335 &trts, false);
336 kfree(trts);
337 length = count * sizeof(union trt_object);
338 if (!ret)
339 return put_user(length, (unsigned long __user *)__arg);
340 return ret;
341 case ACPI_THERMAL_GET_TRT:
342 return fill_trt(arg);
343 case ACPI_THERMAL_GET_ART_COUNT:
344 ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
345 &arts, false);
346 kfree(arts);
347 if (!ret)
348 return put_user(count, (unsigned long __user *)__arg);
349 return ret;
350 case ACPI_THERMAL_GET_ART_LEN:
351 ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
352 &arts, false);
353 kfree(arts);
354 length = count * sizeof(union art_object);
355 if (!ret)
356 return put_user(length, (unsigned long __user *)__arg);
357 return ret;
358
359 case ACPI_THERMAL_GET_ART:
360 return fill_art(arg);
361
362 default:
363 return -ENOTTY;
364 }
365}
366
367static const struct file_operations acpi_thermal_rel_fops = {
368 .owner = THIS_MODULE,
369 .open = acpi_thermal_rel_open,
370 .release = acpi_thermal_rel_release,
371 .unlocked_ioctl = acpi_thermal_rel_ioctl,
372 .llseek = no_llseek,
373};
374
375static struct miscdevice acpi_thermal_rel_misc_device = {
376 .minor = MISC_DYNAMIC_MINOR,
377 "acpi_thermal_rel",
378 &acpi_thermal_rel_fops
379};
380
381int acpi_thermal_rel_misc_device_add(acpi_handle handle)
382{
383 acpi_thermal_rel_handle = handle;
384
385 return misc_register(&acpi_thermal_rel_misc_device);
386}
387EXPORT_SYMBOL(acpi_thermal_rel_misc_device_add);
388
389int acpi_thermal_rel_misc_device_remove(acpi_handle handle)
390{
391 misc_deregister(&acpi_thermal_rel_misc_device);
392
393 return 0;
394}
395EXPORT_SYMBOL(acpi_thermal_rel_misc_device_remove);
396
397MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
398MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com");
399MODULE_DESCRIPTION("Intel acpi thermal rel misc dev driver");
400MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.h b/drivers/thermal/int340x_thermal/acpi_thermal_rel.h
new file mode 100644
index 000000000000..f00700bc9d79
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.h
@@ -0,0 +1,84 @@
1#ifndef __ACPI_ACPI_THERMAL_H
2#define __ACPI_ACPI_THERMAL_H
3
4#include <asm/ioctl.h>
5
6#define ACPI_THERMAL_MAGIC 's'
7
8#define ACPI_THERMAL_GET_TRT_LEN _IOR(ACPI_THERMAL_MAGIC, 1, unsigned long)
9#define ACPI_THERMAL_GET_ART_LEN _IOR(ACPI_THERMAL_MAGIC, 2, unsigned long)
10#define ACPI_THERMAL_GET_TRT_COUNT _IOR(ACPI_THERMAL_MAGIC, 3, unsigned long)
11#define ACPI_THERMAL_GET_ART_COUNT _IOR(ACPI_THERMAL_MAGIC, 4, unsigned long)
12
13#define ACPI_THERMAL_GET_TRT _IOR(ACPI_THERMAL_MAGIC, 5, unsigned long)
14#define ACPI_THERMAL_GET_ART _IOR(ACPI_THERMAL_MAGIC, 6, unsigned long)
15
16struct art {
17 acpi_handle source;
18 acpi_handle target;
19 u64 weight;
20 u64 ac0_max;
21 u64 ac1_max;
22 u64 ac2_max;
23 u64 ac3_max;
24 u64 ac4_max;
25 u64 ac5_max;
26 u64 ac6_max;
27 u64 ac7_max;
28 u64 ac8_max;
29 u64 ac9_max;
30} __packed;
31
32struct trt {
33 acpi_handle source;
34 acpi_handle target;
35 u64 influence;
36 u64 sample_period;
37 u64 reverved1;
38 u64 reverved2;
39 u64 reverved3;
40 u64 reverved4;
41} __packed;
42
43#define ACPI_NR_ART_ELEMENTS 13
44/* for usrspace */
45union art_object {
46 struct {
47 char source_device[8]; /* ACPI single name */
48 char target_device[8]; /* ACPI single name */
49 u64 weight;
50 u64 ac0_max_level;
51 u64 ac1_max_level;
52 u64 ac2_max_level;
53 u64 ac3_max_level;
54 u64 ac4_max_level;
55 u64 ac5_max_level;
56 u64 ac6_max_level;
57 u64 ac7_max_level;
58 u64 ac8_max_level;
59 u64 ac9_max_level;
60 };
61 u64 __data[ACPI_NR_ART_ELEMENTS];
62};
63
64union trt_object {
65 struct {
66 char source_device[8]; /* ACPI single name */
67 char target_device[8]; /* ACPI single name */
68 u64 influence;
69 u64 sample_period;
70 u64 reserved[4];
71 };
72 u64 __data[8];
73};
74
75#ifdef __KERNEL__
76int acpi_thermal_rel_misc_device_add(acpi_handle handle);
77int acpi_thermal_rel_misc_device_remove(acpi_handle handle);
78int acpi_parse_art(acpi_handle handle, int *art_count, struct art **arts,
79 bool create_dev);
80int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trts,
81 bool create_dev);
82#endif
83
84#endif /* __ACPI_ACPI_THERMAL_H */
diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
new file mode 100644
index 000000000000..edc1cce117ba
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c
@@ -0,0 +1,271 @@
1/*
2 * INT3400 thermal driver
3 *
4 * Copyright (C) 2014, Intel Corporation
5 * Authors: Zhang Rui <rui.zhang@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/platform_device.h>
15#include <linux/acpi.h>
16#include <linux/thermal.h>
17#include "acpi_thermal_rel.h"
18
19enum int3400_thermal_uuid {
20 INT3400_THERMAL_PASSIVE_1,
21 INT3400_THERMAL_PASSIVE_2,
22 INT3400_THERMAL_ACTIVE,
23 INT3400_THERMAL_CRITICAL,
24 INT3400_THERMAL_COOLING_MODE,
25 INT3400_THERMAL_MAXIMUM_UUID,
26};
27
28static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
29 "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
30 "9E04115A-AE87-4D1C-9500-0F3E340BFE75",
31 "3A95C389-E4B8-4629-A526-C52C88626BAE",
32 "97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
33 "16CAF1B7-DD38-40ed-B1C1-1B8A1913D531",
34};
35
36struct int3400_thermal_priv {
37 struct acpi_device *adev;
38 struct thermal_zone_device *thermal;
39 int mode;
40 int art_count;
41 struct art *arts;
42 int trt_count;
43 struct trt *trts;
44 u8 uuid_bitmap;
45 int rel_misc_dev_res;
46};
47
48static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
49{
50 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL};
51 union acpi_object *obja, *objb;
52 int i, j;
53 int result = 0;
54 acpi_status status;
55
56 status = acpi_evaluate_object(priv->adev->handle, "IDSP", NULL, &buf);
57 if (ACPI_FAILURE(status))
58 return -ENODEV;
59
60 obja = (union acpi_object *)buf.pointer;
61 if (obja->type != ACPI_TYPE_PACKAGE) {
62 result = -EINVAL;
63 goto end;
64 }
65
66 for (i = 0; i < obja->package.count; i++) {
67 objb = &obja->package.elements[i];
68 if (objb->type != ACPI_TYPE_BUFFER) {
69 result = -EINVAL;
70 goto end;
71 }
72
73 /* UUID must be 16 bytes */
74 if (objb->buffer.length != 16) {
75 result = -EINVAL;
76 goto end;
77 }
78
79 for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
80 u8 uuid[16];
81
82 acpi_str_to_uuid(int3400_thermal_uuids[j], uuid);
83 if (!strncmp(uuid, objb->buffer.pointer, 16)) {
84 priv->uuid_bitmap |= (1 << j);
85 break;
86 }
87 }
88 }
89
90end:
91 kfree(buf.pointer);
92 return result;
93}
94
95static int int3400_thermal_run_osc(acpi_handle handle,
96 enum int3400_thermal_uuid uuid, bool enable)
97{
98 u32 ret, buf[2];
99 acpi_status status;
100 int result = 0;
101 struct acpi_osc_context context = {
102 .uuid_str = int3400_thermal_uuids[uuid],
103 .rev = 1,
104 .cap.length = 8,
105 };
106
107 buf[OSC_QUERY_DWORD] = 0;
108 buf[OSC_SUPPORT_DWORD] = enable;
109
110 context.cap.pointer = buf;
111
112 status = acpi_run_osc(handle, &context);
113 if (ACPI_SUCCESS(status)) {
114 ret = *((u32 *)(context.ret.pointer + 4));
115 if (ret != enable)
116 result = -EPERM;
117 } else
118 result = -EPERM;
119
120 kfree(context.ret.pointer);
121 return result;
122}
123
124static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
125 unsigned long *temp)
126{
127 *temp = 20 * 1000; /* faked temp sensor with 20C */
128 return 0;
129}
130
131static int int3400_thermal_get_mode(struct thermal_zone_device *thermal,
132 enum thermal_device_mode *mode)
133{
134 struct int3400_thermal_priv *priv = thermal->devdata;
135
136 if (!priv)
137 return -EINVAL;
138
139 *mode = priv->mode;
140
141 return 0;
142}
143
144static int int3400_thermal_set_mode(struct thermal_zone_device *thermal,
145 enum thermal_device_mode mode)
146{
147 struct int3400_thermal_priv *priv = thermal->devdata;
148 bool enable;
149 int result = 0;
150
151 if (!priv)
152 return -EINVAL;
153
154 if (mode == THERMAL_DEVICE_ENABLED)
155 enable = true;
156 else if (mode == THERMAL_DEVICE_DISABLED)
157 enable = false;
158 else
159 return -EINVAL;
160
161 if (enable != priv->mode) {
162 priv->mode = enable;
163 /* currently, only PASSIVE COOLING is supported */
164 result = int3400_thermal_run_osc(priv->adev->handle,
165 INT3400_THERMAL_PASSIVE_1, enable);
166 }
167 return result;
168}
169
170static struct thermal_zone_device_ops int3400_thermal_ops = {
171 .get_temp = int3400_thermal_get_temp,
172};
173
174static struct thermal_zone_params int3400_thermal_params = {
175 .governor_name = "user_space",
176 .no_hwmon = true,
177};
178
179static int int3400_thermal_probe(struct platform_device *pdev)
180{
181 struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
182 struct int3400_thermal_priv *priv;
183 int result;
184
185 if (!adev)
186 return -ENODEV;
187
188 priv = kzalloc(sizeof(struct int3400_thermal_priv), GFP_KERNEL);
189 if (!priv)
190 return -ENOMEM;
191
192 priv->adev = adev;
193
194 result = int3400_thermal_get_uuids(priv);
195 if (result)
196 goto free_priv;
197
198 result = acpi_parse_art(priv->adev->handle, &priv->art_count,
199 &priv->arts, true);
200 if (result)
201 goto free_priv;
202
203
204 result = acpi_parse_trt(priv->adev->handle, &priv->trt_count,
205 &priv->trts, true);
206 if (result)
207 goto free_art;
208
209 platform_set_drvdata(pdev, priv);
210
211 if (priv->uuid_bitmap & 1 << INT3400_THERMAL_PASSIVE_1) {
212 int3400_thermal_ops.get_mode = int3400_thermal_get_mode;
213 int3400_thermal_ops.set_mode = int3400_thermal_set_mode;
214 }
215 priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0,
216 priv, &int3400_thermal_ops,
217 &int3400_thermal_params, 0, 0);
218 if (IS_ERR(priv->thermal)) {
219 result = PTR_ERR(priv->thermal);
220 goto free_trt;
221 }
222
223 priv->rel_misc_dev_res = acpi_thermal_rel_misc_device_add(
224 priv->adev->handle);
225
226 return 0;
227free_trt:
228 kfree(priv->trts);
229free_art:
230 kfree(priv->arts);
231free_priv:
232 kfree(priv);
233 return result;
234}
235
236static int int3400_thermal_remove(struct platform_device *pdev)
237{
238 struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
239
240 if (!priv->rel_misc_dev_res)
241 acpi_thermal_rel_misc_device_remove(priv->adev->handle);
242
243 thermal_zone_device_unregister(priv->thermal);
244 kfree(priv->trts);
245 kfree(priv->arts);
246 kfree(priv);
247 return 0;
248}
249
250static const struct acpi_device_id int3400_thermal_match[] = {
251 {"INT3400", 0},
252 {}
253};
254
255MODULE_DEVICE_TABLE(acpi, int3400_thermal_match);
256
257static struct platform_driver int3400_thermal_driver = {
258 .probe = int3400_thermal_probe,
259 .remove = int3400_thermal_remove,
260 .driver = {
261 .name = "int3400 thermal",
262 .owner = THIS_MODULE,
263 .acpi_match_table = ACPI_PTR(int3400_thermal_match),
264 },
265};
266
267module_platform_driver(int3400_thermal_driver);
268
269MODULE_DESCRIPTION("INT3400 Thermal driver");
270MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
271MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/int340x_thermal/int3402_thermal.c b/drivers/thermal/int340x_thermal/int3402_thermal.c
new file mode 100644
index 000000000000..a5d08c14ba24
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3402_thermal.c
@@ -0,0 +1,242 @@
1/*
2 * INT3402 thermal driver for memory temperature reporting
3 *
4 * Copyright (C) 2014, Intel Corporation
5 * Authors: Aaron Lu <aaron.lu@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/platform_device.h>
15#include <linux/acpi.h>
16#include <linux/thermal.h>
17
18#define ACPI_ACTIVE_COOLING_MAX_NR 10
19
20struct active_trip {
21 unsigned long temp;
22 int id;
23 bool valid;
24};
25
26struct int3402_thermal_data {
27 unsigned long *aux_trips;
28 int aux_trip_nr;
29 unsigned long psv_temp;
30 int psv_trip_id;
31 unsigned long crt_temp;
32 int crt_trip_id;
33 unsigned long hot_temp;
34 int hot_trip_id;
35 struct active_trip act_trips[ACPI_ACTIVE_COOLING_MAX_NR];
36 acpi_handle *handle;
37};
38
39static int int3402_thermal_get_zone_temp(struct thermal_zone_device *zone,
40 unsigned long *temp)
41{
42 struct int3402_thermal_data *d = zone->devdata;
43 unsigned long long tmp;
44 acpi_status status;
45
46 status = acpi_evaluate_integer(d->handle, "_TMP", NULL, &tmp);
47 if (ACPI_FAILURE(status))
48 return -ENODEV;
49
50 /* _TMP returns the temperature in tenths of degrees Kelvin */
51 *temp = DECI_KELVIN_TO_MILLICELSIUS(tmp);
52
53 return 0;
54}
55
56static int int3402_thermal_get_trip_temp(struct thermal_zone_device *zone,
57 int trip, unsigned long *temp)
58{
59 struct int3402_thermal_data *d = zone->devdata;
60 int i;
61
62 if (trip < d->aux_trip_nr)
63 *temp = d->aux_trips[trip];
64 else if (trip == d->crt_trip_id)
65 *temp = d->crt_temp;
66 else if (trip == d->psv_trip_id)
67 *temp = d->psv_temp;
68 else if (trip == d->hot_trip_id)
69 *temp = d->hot_temp;
70 else {
71 for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
72 if (d->act_trips[i].valid &&
73 d->act_trips[i].id == trip) {
74 *temp = d->act_trips[i].temp;
75 break;
76 }
77 }
78 if (i == ACPI_ACTIVE_COOLING_MAX_NR)
79 return -EINVAL;
80 }
81 return 0;
82}
83
84static int int3402_thermal_get_trip_type(struct thermal_zone_device *zone,
85 int trip, enum thermal_trip_type *type)
86{
87 struct int3402_thermal_data *d = zone->devdata;
88 int i;
89
90 if (trip < d->aux_trip_nr)
91 *type = THERMAL_TRIP_PASSIVE;
92 else if (trip == d->crt_trip_id)
93 *type = THERMAL_TRIP_CRITICAL;
94 else if (trip == d->hot_trip_id)
95 *type = THERMAL_TRIP_HOT;
96 else if (trip == d->psv_trip_id)
97 *type = THERMAL_TRIP_PASSIVE;
98 else {
99 for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
100 if (d->act_trips[i].valid &&
101 d->act_trips[i].id == trip) {
102 *type = THERMAL_TRIP_ACTIVE;
103 break;
104 }
105 }
106 if (i == ACPI_ACTIVE_COOLING_MAX_NR)
107 return -EINVAL;
108 }
109 return 0;
110}
111
112static int int3402_thermal_set_trip_temp(struct thermal_zone_device *zone, int trip,
113 unsigned long temp)
114{
115 struct int3402_thermal_data *d = zone->devdata;
116 acpi_status status;
117 char name[10];
118
119 snprintf(name, sizeof(name), "PAT%d", trip);
120 status = acpi_execute_simple_method(d->handle, name,
121 MILLICELSIUS_TO_DECI_KELVIN(temp));
122 if (ACPI_FAILURE(status))
123 return -EIO;
124
125 d->aux_trips[trip] = temp;
126 return 0;
127}
128
129static struct thermal_zone_device_ops int3402_thermal_zone_ops = {
130 .get_temp = int3402_thermal_get_zone_temp,
131 .get_trip_temp = int3402_thermal_get_trip_temp,
132 .get_trip_type = int3402_thermal_get_trip_type,
133 .set_trip_temp = int3402_thermal_set_trip_temp,
134};
135
136static struct thermal_zone_params int3402_thermal_params = {
137 .governor_name = "user_space",
138 .no_hwmon = true,
139};
140
141static int int3402_thermal_get_temp(acpi_handle handle, char *name,
142 unsigned long *temp)
143{
144 unsigned long long r;
145 acpi_status status;
146
147 status = acpi_evaluate_integer(handle, name, NULL, &r);
148 if (ACPI_FAILURE(status))
149 return -EIO;
150
151 *temp = DECI_KELVIN_TO_MILLICELSIUS(r);
152 return 0;
153}
154
155static int int3402_thermal_probe(struct platform_device *pdev)
156{
157 struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
158 struct int3402_thermal_data *d;
159 struct thermal_zone_device *zone;
160 acpi_status status;
161 unsigned long long trip_cnt;
162 int trip_mask = 0, i;
163
164 if (!acpi_has_method(adev->handle, "_TMP"))
165 return -ENODEV;
166
167 d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
168 if (!d)
169 return -ENOMEM;
170
171 status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt);
172 if (ACPI_FAILURE(status))
173 trip_cnt = 0;
174 else {
175 d->aux_trips = devm_kzalloc(&pdev->dev,
176 sizeof(*d->aux_trips) * trip_cnt, GFP_KERNEL);
177 if (!d->aux_trips)
178 return -ENOMEM;
179 trip_mask = trip_cnt - 1;
180 d->handle = adev->handle;
181 d->aux_trip_nr = trip_cnt;
182 }
183
184 d->crt_trip_id = -1;
185 if (!int3402_thermal_get_temp(adev->handle, "_CRT", &d->crt_temp))
186 d->crt_trip_id = trip_cnt++;
187 d->hot_trip_id = -1;
188 if (!int3402_thermal_get_temp(adev->handle, "_HOT", &d->hot_temp))
189 d->hot_trip_id = trip_cnt++;
190 d->psv_trip_id = -1;
191 if (!int3402_thermal_get_temp(adev->handle, "_PSV", &d->psv_temp))
192 d->psv_trip_id = trip_cnt++;
193 for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
194 char name[5] = { '_', 'A', 'C', '0' + i, '\0' };
195 if (int3402_thermal_get_temp(adev->handle, name,
196 &d->act_trips[i].temp))
197 break;
198 d->act_trips[i].id = trip_cnt++;
199 d->act_trips[i].valid = true;
200 }
201
202 zone = thermal_zone_device_register(acpi_device_bid(adev), trip_cnt,
203 trip_mask, d,
204 &int3402_thermal_zone_ops,
205 &int3402_thermal_params,
206 0, 0);
207 if (IS_ERR(zone))
208 return PTR_ERR(zone);
209 platform_set_drvdata(pdev, zone);
210
211 return 0;
212}
213
214static int int3402_thermal_remove(struct platform_device *pdev)
215{
216 struct thermal_zone_device *zone = platform_get_drvdata(pdev);
217
218 thermal_zone_device_unregister(zone);
219 return 0;
220}
221
222static const struct acpi_device_id int3402_thermal_match[] = {
223 {"INT3402", 0},
224 {}
225};
226
227MODULE_DEVICE_TABLE(acpi, int3402_thermal_match);
228
229static struct platform_driver int3402_thermal_driver = {
230 .probe = int3402_thermal_probe,
231 .remove = int3402_thermal_remove,
232 .driver = {
233 .name = "int3402 thermal",
234 .owner = THIS_MODULE,
235 .acpi_match_table = int3402_thermal_match,
236 },
237};
238
239module_platform_driver(int3402_thermal_driver);
240
241MODULE_DESCRIPTION("INT3402 Thermal driver");
242MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
new file mode 100644
index 000000000000..d20dba986f0f
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -0,0 +1,477 @@
1/*
2 * ACPI INT3403 thermal driver
3 * Copyright (c) 2013, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/init.h>
18#include <linux/types.h>
19#include <linux/acpi.h>
20#include <linux/thermal.h>
21#include <linux/platform_device.h>
22
23#define INT3403_TYPE_SENSOR 0x03
24#define INT3403_TYPE_CHARGER 0x0B
25#define INT3403_TYPE_BATTERY 0x0C
26#define INT3403_PERF_CHANGED_EVENT 0x80
27#define INT3403_THERMAL_EVENT 0x90
28
29#define DECI_KELVIN_TO_MILLI_CELSIUS(t, off) (((t) - (off)) * 100)
30#define KELVIN_OFFSET 2732
31#define MILLI_CELSIUS_TO_DECI_KELVIN(t, off) (((t) / 100) + (off))
32
33struct int3403_sensor {
34 struct thermal_zone_device *tzone;
35 unsigned long *thresholds;
36 unsigned long crit_temp;
37 int crit_trip_id;
38 unsigned long psv_temp;
39 int psv_trip_id;
40
41};
42
43struct int3403_performance_state {
44 u64 performance;
45 u64 power;
46 u64 latency;
47 u64 linear;
48 u64 control;
49 u64 raw_performace;
50 char *raw_unit;
51 int reserved;
52};
53
54struct int3403_cdev {
55 struct thermal_cooling_device *cdev;
56 unsigned long max_state;
57};
58
59struct int3403_priv {
60 struct platform_device *pdev;
61 struct acpi_device *adev;
62 unsigned long long type;
63 void *priv;
64};
65
66static int sys_get_curr_temp(struct thermal_zone_device *tzone,
67 unsigned long *temp)
68{
69 struct int3403_priv *priv = tzone->devdata;
70 struct acpi_device *device = priv->adev;
71 unsigned long long tmp;
72 acpi_status status;
73
74 status = acpi_evaluate_integer(device->handle, "_TMP", NULL, &tmp);
75 if (ACPI_FAILURE(status))
76 return -EIO;
77
78 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(tmp, KELVIN_OFFSET);
79
80 return 0;
81}
82
83static int sys_get_trip_hyst(struct thermal_zone_device *tzone,
84 int trip, unsigned long *temp)
85{
86 struct int3403_priv *priv = tzone->devdata;
87 struct acpi_device *device = priv->adev;
88 unsigned long long hyst;
89 acpi_status status;
90
91 status = acpi_evaluate_integer(device->handle, "GTSH", NULL, &hyst);
92 if (ACPI_FAILURE(status))
93 return -EIO;
94
95 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(hyst, KELVIN_OFFSET);
96
97 return 0;
98}
99
100static int sys_get_trip_temp(struct thermal_zone_device *tzone,
101 int trip, unsigned long *temp)
102{
103 struct int3403_priv *priv = tzone->devdata;
104 struct int3403_sensor *obj = priv->priv;
105
106 if (priv->type != INT3403_TYPE_SENSOR || !obj)
107 return -EINVAL;
108
109 if (trip == obj->crit_trip_id)
110 *temp = obj->crit_temp;
111 else if (trip == obj->psv_trip_id)
112 *temp = obj->psv_temp;
113 else {
114 /*
115 * get_trip_temp is a mandatory callback but
116 * PATx method doesn't return any value, so return
117 * cached value, which was last set from user space
118 */
119 *temp = obj->thresholds[trip];
120 }
121
122 return 0;
123}
124
125static int sys_get_trip_type(struct thermal_zone_device *thermal,
126 int trip, enum thermal_trip_type *type)
127{
128 struct int3403_priv *priv = thermal->devdata;
129 struct int3403_sensor *obj = priv->priv;
130
131 /* Mandatory callback, may not mean much here */
132 if (trip == obj->crit_trip_id)
133 *type = THERMAL_TRIP_CRITICAL;
134 else
135 *type = THERMAL_TRIP_PASSIVE;
136
137 return 0;
138}
139
140int sys_set_trip_temp(struct thermal_zone_device *tzone, int trip,
141 unsigned long temp)
142{
143 struct int3403_priv *priv = tzone->devdata;
144 struct acpi_device *device = priv->adev;
145 struct int3403_sensor *obj = priv->priv;
146 acpi_status status;
147 char name[10];
148 int ret = 0;
149
150 snprintf(name, sizeof(name), "PAT%d", trip);
151 if (acpi_has_method(device->handle, name)) {
152 status = acpi_execute_simple_method(device->handle, name,
153 MILLI_CELSIUS_TO_DECI_KELVIN(temp,
154 KELVIN_OFFSET));
155 if (ACPI_FAILURE(status))
156 ret = -EIO;
157 else
158 obj->thresholds[trip] = temp;
159 } else {
160 ret = -EIO;
161 dev_err(&device->dev, "sys_set_trip_temp: method not found\n");
162 }
163
164 return ret;
165}
166
167static struct thermal_zone_device_ops tzone_ops = {
168 .get_temp = sys_get_curr_temp,
169 .get_trip_temp = sys_get_trip_temp,
170 .get_trip_type = sys_get_trip_type,
171 .set_trip_temp = sys_set_trip_temp,
172 .get_trip_hyst = sys_get_trip_hyst,
173};
174
175static struct thermal_zone_params int3403_thermal_params = {
176 .governor_name = "user_space",
177 .no_hwmon = true,
178};
179
180static void int3403_notify(acpi_handle handle,
181 u32 event, void *data)
182{
183 struct int3403_priv *priv = data;
184 struct int3403_sensor *obj;
185
186 if (!priv)
187 return;
188
189 obj = priv->priv;
190 if (priv->type != INT3403_TYPE_SENSOR || !obj)
191 return;
192
193 switch (event) {
194 case INT3403_PERF_CHANGED_EVENT:
195 break;
196 case INT3403_THERMAL_EVENT:
197 thermal_zone_device_update(obj->tzone);
198 break;
199 default:
200 dev_err(&priv->pdev->dev, "Unsupported event [0x%x]\n", event);
201 break;
202 }
203}
204
205static int sys_get_trip_crt(struct acpi_device *device, unsigned long *temp)
206{
207 unsigned long long crt;
208 acpi_status status;
209
210 status = acpi_evaluate_integer(device->handle, "_CRT", NULL, &crt);
211 if (ACPI_FAILURE(status))
212 return -EIO;
213
214 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(crt, KELVIN_OFFSET);
215
216 return 0;
217}
218
219static int sys_get_trip_psv(struct acpi_device *device, unsigned long *temp)
220{
221 unsigned long long psv;
222 acpi_status status;
223
224 status = acpi_evaluate_integer(device->handle, "_PSV", NULL, &psv);
225 if (ACPI_FAILURE(status))
226 return -EIO;
227
228 *temp = DECI_KELVIN_TO_MILLI_CELSIUS(psv, KELVIN_OFFSET);
229
230 return 0;
231}
232
233static int int3403_sensor_add(struct int3403_priv *priv)
234{
235 int result = 0;
236 acpi_status status;
237 struct int3403_sensor *obj;
238 unsigned long long trip_cnt;
239 int trip_mask = 0;
240
241 obj = devm_kzalloc(&priv->pdev->dev, sizeof(*obj), GFP_KERNEL);
242 if (!obj)
243 return -ENOMEM;
244
245 priv->priv = obj;
246
247 status = acpi_evaluate_integer(priv->adev->handle, "PATC", NULL,
248 &trip_cnt);
249 if (ACPI_FAILURE(status))
250 trip_cnt = 0;
251
252 if (trip_cnt) {
253 /* We have to cache, thresholds can't be readback */
254 obj->thresholds = devm_kzalloc(&priv->pdev->dev,
255 sizeof(*obj->thresholds) * trip_cnt,
256 GFP_KERNEL);
257 if (!obj->thresholds) {
258 result = -ENOMEM;
259 goto err_free_obj;
260 }
261 trip_mask = BIT(trip_cnt) - 1;
262 }
263
264 obj->psv_trip_id = -1;
265 if (!sys_get_trip_psv(priv->adev, &obj->psv_temp))
266 obj->psv_trip_id = trip_cnt++;
267
268 obj->crit_trip_id = -1;
269 if (!sys_get_trip_crt(priv->adev, &obj->crit_temp))
270 obj->crit_trip_id = trip_cnt++;
271
272 obj->tzone = thermal_zone_device_register(acpi_device_bid(priv->adev),
273 trip_cnt, trip_mask, priv, &tzone_ops,
274 &int3403_thermal_params, 0, 0);
275 if (IS_ERR(obj->tzone)) {
276 result = PTR_ERR(obj->tzone);
277 obj->tzone = NULL;
278 goto err_free_obj;
279 }
280
281 result = acpi_install_notify_handler(priv->adev->handle,
282 ACPI_DEVICE_NOTIFY, int3403_notify,
283 (void *)priv);
284 if (result)
285 goto err_free_obj;
286
287 return 0;
288
289 err_free_obj:
290 if (obj->tzone)
291 thermal_zone_device_unregister(obj->tzone);
292 return result;
293}
294
295static int int3403_sensor_remove(struct int3403_priv *priv)
296{
297 struct int3403_sensor *obj = priv->priv;
298
299 thermal_zone_device_unregister(obj->tzone);
300 return 0;
301}
302
303/* INT3403 Cooling devices */
304static int int3403_get_max_state(struct thermal_cooling_device *cdev,
305 unsigned long *state)
306{
307 struct int3403_priv *priv = cdev->devdata;
308 struct int3403_cdev *obj = priv->priv;
309
310 *state = obj->max_state;
311 return 0;
312}
313
314static int int3403_get_cur_state(struct thermal_cooling_device *cdev,
315 unsigned long *state)
316{
317 struct int3403_priv *priv = cdev->devdata;
318 unsigned long long level;
319 acpi_status status;
320
321 status = acpi_evaluate_integer(priv->adev->handle, "PPPC", NULL, &level);
322 if (ACPI_SUCCESS(status)) {
323 *state = level;
324 return 0;
325 } else
326 return -EINVAL;
327}
328
329static int
330int3403_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
331{
332 struct int3403_priv *priv = cdev->devdata;
333 acpi_status status;
334
335 status = acpi_execute_simple_method(priv->adev->handle, "SPPC", state);
336 if (ACPI_SUCCESS(status))
337 return 0;
338 else
339 return -EINVAL;
340}
341
342static const struct thermal_cooling_device_ops int3403_cooling_ops = {
343 .get_max_state = int3403_get_max_state,
344 .get_cur_state = int3403_get_cur_state,
345 .set_cur_state = int3403_set_cur_state,
346};
347
348static int int3403_cdev_add(struct int3403_priv *priv)
349{
350 int result = 0;
351 acpi_status status;
352 struct int3403_cdev *obj;
353 struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
354 union acpi_object *p;
355
356 obj = devm_kzalloc(&priv->pdev->dev, sizeof(*obj), GFP_KERNEL);
357 if (!obj)
358 return -ENOMEM;
359
360 status = acpi_evaluate_object(priv->adev->handle, "PPSS", NULL, &buf);
361 if (ACPI_FAILURE(status))
362 return -ENODEV;
363
364 p = buf.pointer;
365 if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
366 printk(KERN_WARNING "Invalid PPSS data\n");
367 return -EFAULT;
368 }
369
370 obj->max_state = p->package.count - 1;
371 obj->cdev =
372 thermal_cooling_device_register(acpi_device_bid(priv->adev),
373 priv, &int3403_cooling_ops);
374 if (IS_ERR(obj->cdev))
375 result = PTR_ERR(obj->cdev);
376
377 priv->priv = obj;
378
379 /* TODO: add ACPI notification support */
380
381 return result;
382}
383
384static int int3403_cdev_remove(struct int3403_priv *priv)
385{
386 struct int3403_cdev *obj = priv->priv;
387
388 thermal_cooling_device_unregister(obj->cdev);
389 return 0;
390}
391
392static int int3403_add(struct platform_device *pdev)
393{
394 struct int3403_priv *priv;
395 int result = 0;
396 acpi_status status;
397
398 priv = devm_kzalloc(&pdev->dev, sizeof(struct int3403_priv),
399 GFP_KERNEL);
400 if (!priv)
401 return -ENOMEM;
402
403 priv->pdev = pdev;
404 priv->adev = ACPI_COMPANION(&(pdev->dev));
405 if (!priv->adev) {
406 result = -EINVAL;
407 goto err;
408 }
409
410 status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
411 NULL, &priv->type);
412 if (ACPI_FAILURE(status)) {
413 result = -EINVAL;
414 goto err;
415 }
416
417 platform_set_drvdata(pdev, priv);
418 switch (priv->type) {
419 case INT3403_TYPE_SENSOR:
420 result = int3403_sensor_add(priv);
421 break;
422 case INT3403_TYPE_CHARGER:
423 case INT3403_TYPE_BATTERY:
424 result = int3403_cdev_add(priv);
425 break;
426 default:
427 result = -EINVAL;
428 }
429
430 if (result)
431 goto err;
432 return result;
433
434err:
435 return result;
436}
437
438static int int3403_remove(struct platform_device *pdev)
439{
440 struct int3403_priv *priv = platform_get_drvdata(pdev);
441
442 switch (priv->type) {
443 case INT3403_TYPE_SENSOR:
444 int3403_sensor_remove(priv);
445 break;
446 case INT3403_TYPE_CHARGER:
447 case INT3403_TYPE_BATTERY:
448 int3403_cdev_remove(priv);
449 break;
450 default:
451 break;
452 }
453
454 return 0;
455}
456
457static const struct acpi_device_id int3403_device_ids[] = {
458 {"INT3403", 0},
459 {"", 0},
460};
461MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
462
463static struct platform_driver int3403_driver = {
464 .probe = int3403_add,
465 .remove = int3403_remove,
466 .driver = {
467 .name = "int3403 thermal",
468 .owner = THIS_MODULE,
469 .acpi_match_table = int3403_device_ids,
470 },
471};
472
473module_platform_driver(int3403_driver);
474
475MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
476MODULE_LICENSE("GPL v2");
477MODULE_DESCRIPTION("ACPI INT3403 thermal driver");
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 4b2b999b7611..f8eb625b8400 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -401,6 +401,10 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
401 struct of_phandle_args sensor_specs; 401 struct of_phandle_args sensor_specs;
402 int ret, id; 402 int ret, id;
403 403
404 /* Check whether child is enabled or not */
405 if (!of_device_is_available(child))
406 continue;
407
404 /* For now, thermal framework supports only 1 sensor per zone */ 408 /* For now, thermal framework supports only 1 sensor per zone */
405 ret = of_parse_phandle_with_args(child, "thermal-sensors", 409 ret = of_parse_phandle_with_args(child, "thermal-sensors",
406 "#thermal-sensor-cells", 410 "#thermal-sensor-cells",
@@ -771,6 +775,10 @@ int __init of_parse_thermal_zones(void)
771 struct thermal_zone_device *zone; 775 struct thermal_zone_device *zone;
772 struct thermal_zone_params *tzp; 776 struct thermal_zone_params *tzp;
773 777
778 /* Check whether child is enabled or not */
779 if (!of_device_is_available(child))
780 continue;
781
774 tz = thermal_of_build_thermal_zone(child); 782 tz = thermal_of_build_thermal_zone(child);
775 if (IS_ERR(tz)) { 783 if (IS_ERR(tz)) {
776 pr_err("failed to build thermal zone %s: %ld\n", 784 pr_err("failed to build thermal zone %s: %ld\n",
@@ -838,6 +846,10 @@ void of_thermal_destroy_zones(void)
838 for_each_child_of_node(np, child) { 846 for_each_child_of_node(np, child) {
839 struct thermal_zone_device *zone; 847 struct thermal_zone_device *zone;
840 848
849 /* Check whether child is enabled or not */
850 if (!of_device_is_available(child))
851 continue;
852
841 zone = thermal_zone_get_zone_by_name(child->name); 853 zone = thermal_zone_get_zone_by_name(child->name);
842 if (IS_ERR(zone)) 854 if (IS_ERR(zone))
843 continue; 855 continue;
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index f251521baaa2..fdd1f523a1ed 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <linux/thermal.h> 25#include <linux/thermal.h>
26#include <trace/events/thermal.h>
26 27
27#include "thermal_core.h" 28#include "thermal_core.h"
28 29
@@ -76,7 +77,7 @@ static unsigned long get_target_state(struct thermal_instance *instance,
76 next_target = instance->upper; 77 next_target = instance->upper;
77 break; 78 break;
78 case THERMAL_TREND_DROPPING: 79 case THERMAL_TREND_DROPPING:
79 if (cur_state == instance->lower) { 80 if (cur_state <= instance->lower) {
80 if (!throttle) 81 if (!throttle)
81 next_target = THERMAL_NO_TARGET; 82 next_target = THERMAL_NO_TARGET;
82 } else { 83 } else {
@@ -129,8 +130,10 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
129 130
130 trend = get_tz_trend(tz, trip); 131 trend = get_tz_trend(tz, trip);
131 132
132 if (tz->temperature >= trip_temp) 133 if (tz->temperature >= trip_temp) {
133 throttle = true; 134 throttle = true;
135 trace_thermal_zone_trip(tz, trip, trip_type);
136 }
134 137
135 dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n", 138 dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n",
136 trip, trip_type, trip_temp, trend, throttle); 139 trip, trip_type, trip_temp, trend, throttle);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 1e23f4f8d2c2..9bf10aa6069b 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -38,6 +38,9 @@
38#include <net/netlink.h> 38#include <net/netlink.h>
39#include <net/genetlink.h> 39#include <net/genetlink.h>
40 40
41#define CREATE_TRACE_POINTS
42#include <trace/events/thermal.h>
43
41#include "thermal_core.h" 44#include "thermal_core.h"
42#include "thermal_hwmon.h" 45#include "thermal_hwmon.h"
43 46
@@ -368,6 +371,8 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
368 if (tz->temperature < trip_temp) 371 if (tz->temperature < trip_temp)
369 return; 372 return;
370 373
374 trace_thermal_zone_trip(tz, trip, trip_type);
375
371 if (tz->ops->notify) 376 if (tz->ops->notify)
372 tz->ops->notify(tz, trip, trip_type); 377 tz->ops->notify(tz, trip, trip_type);
373 378
@@ -463,6 +468,7 @@ static void update_temperature(struct thermal_zone_device *tz)
463 tz->temperature = temp; 468 tz->temperature = temp;
464 mutex_unlock(&tz->lock); 469 mutex_unlock(&tz->lock);
465 470
471 trace_thermal_temperature(tz);
466 dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", 472 dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
467 tz->last_temperature, tz->temperature); 473 tz->last_temperature, tz->temperature);
468} 474}
@@ -1287,6 +1293,7 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
1287 mutex_unlock(&cdev->lock); 1293 mutex_unlock(&cdev->lock);
1288 cdev->ops->set_cur_state(cdev, target); 1294 cdev->ops->set_cur_state(cdev, target);
1289 cdev->updated = true; 1295 cdev->updated = true;
1296 trace_cdev_update(cdev, target);
1290 dev_dbg(&cdev->device, "set to state %lu\n", target); 1297 dev_dbg(&cdev->device, "set to state %lu\n", target);
1291} 1298}
1292EXPORT_SYMBOL(thermal_cdev_update); 1299EXPORT_SYMBOL(thermal_cdev_update);
@@ -1790,6 +1797,10 @@ static int __init thermal_register_governors(void)
1790 if (result) 1797 if (result)
1791 return result; 1798 return result;
1792 1799
1800 result = thermal_gov_bang_bang_register();
1801 if (result)
1802 return result;
1803
1793 return thermal_gov_user_space_register(); 1804 return thermal_gov_user_space_register();
1794} 1805}
1795 1806
@@ -1797,6 +1808,7 @@ static void thermal_unregister_governors(void)
1797{ 1808{
1798 thermal_gov_step_wise_unregister(); 1809 thermal_gov_step_wise_unregister();
1799 thermal_gov_fair_share_unregister(); 1810 thermal_gov_fair_share_unregister();
1811 thermal_gov_bang_bang_unregister();
1800 thermal_gov_user_space_unregister(); 1812 thermal_gov_user_space_unregister();
1801} 1813}
1802 1814
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 3db339fb636f..d15d243de27a 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -69,6 +69,14 @@ static inline int thermal_gov_fair_share_register(void) { return 0; }
69static inline void thermal_gov_fair_share_unregister(void) {} 69static inline void thermal_gov_fair_share_unregister(void) {}
70#endif /* CONFIG_THERMAL_GOV_FAIR_SHARE */ 70#endif /* CONFIG_THERMAL_GOV_FAIR_SHARE */
71 71
72#ifdef CONFIG_THERMAL_GOV_BANG_BANG
73int thermal_gov_bang_bang_register(void);
74void thermal_gov_bang_bang_unregister(void);
75#else
76static inline int thermal_gov_bang_bang_register(void) { return 0; }
77static inline void thermal_gov_bang_bang_unregister(void) {}
78#endif /* CONFIG_THERMAL_GOV_BANG_BANG */
79
72#ifdef CONFIG_THERMAL_GOV_USER_SPACE 80#ifdef CONFIG_THERMAL_GOV_USER_SPACE
73int thermal_gov_user_space_register(void); 81int thermal_gov_user_space_register(void);
74void thermal_gov_user_space_unregister(void); 82void thermal_gov_user_space_unregister(void);
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a673e5b6a2e0..60fa6278fbce 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -28,18 +28,6 @@
28 28
29#define UIO_MAX_DEVICES (1U << MINORBITS) 29#define UIO_MAX_DEVICES (1U << MINORBITS)
30 30
31struct uio_device {
32 struct module *owner;
33 struct device *dev;
34 int minor;
35 atomic_t event;
36 struct fasync_struct *async_queue;
37 wait_queue_head_t wait;
38 struct uio_info *info;
39 struct kobject *map_dir;
40 struct kobject *portio_dir;
41};
42
43static int uio_major; 31static int uio_major;
44static struct cdev *uio_cdev; 32static struct cdev *uio_cdev;
45static DEFINE_IDR(uio_idr); 33static DEFINE_IDR(uio_idr);
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index e3d5bf0a5021..d0107d424ee4 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -87,6 +87,15 @@ config DA9055_WATCHDOG
87 This driver can also be built as a module. If so, the module 87 This driver can also be built as a module. If so, the module
88 will be called da9055_wdt. 88 will be called da9055_wdt.
89 89
90config DA9063_WATCHDOG
91 tristate "Dialog DA9063 Watchdog"
92 depends on MFD_DA9063
93 select WATCHDOG_CORE
94 help
95 Support for the watchdog in the DA9063 PMIC.
96
97 This driver can be built as a module. The module name is da9063_wdt.
98
90config GPIO_WATCHDOG 99config GPIO_WATCHDOG
91 tristate "Watchdog device controlled through GPIO-line" 100 tristate "Watchdog device controlled through GPIO-line"
92 depends on OF_GPIO 101 depends on OF_GPIO
@@ -123,6 +132,7 @@ config WM8350_WATCHDOG
123 132
124config XILINX_WATCHDOG 133config XILINX_WATCHDOG
125 tristate "Xilinx Watchdog timer" 134 tristate "Xilinx Watchdog timer"
135 depends on HAS_IOMEM
126 select WATCHDOG_CORE 136 select WATCHDOG_CORE
127 help 137 help
128 Watchdog driver for the xps_timebase_wdt ip core. 138 Watchdog driver for the xps_timebase_wdt ip core.
@@ -157,6 +167,14 @@ config AT91SAM9X_WATCHDOG
157 Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will 167 Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will
158 reboot your system when the timeout is reached. 168 reboot your system when the timeout is reached.
159 169
170config CADENCE_WATCHDOG
171 tristate "Cadence Watchdog Timer"
172 depends on ARM
173 select WATCHDOG_CORE
174 help
175 Say Y here if you want to include support for the watchdog
176 timer in the Xilinx Zynq.
177
160config 21285_WATCHDOG 178config 21285_WATCHDOG
161 tristate "DC21285 watchdog" 179 tristate "DC21285 watchdog"
162 depends on FOOTBRIDGE 180 depends on FOOTBRIDGE
@@ -319,6 +337,17 @@ config ORION_WATCHDOG
319 To compile this driver as a module, choose M here: the 337 To compile this driver as a module, choose M here: the
320 module will be called orion_wdt. 338 module will be called orion_wdt.
321 339
340config RN5T618_WATCHDOG
341 tristate "Ricoh RN5T618 watchdog"
342 depends on MFD_RN5T618
343 select WATCHDOG_CORE
344 help
345 If you say yes here you get support for watchdog on the Ricoh
346 RN5T618 PMIC.
347
348 This driver can also be built as a module. If so, the module
349 will be called rn5t618_wdt.
350
322config SUNXI_WATCHDOG 351config SUNXI_WATCHDOG
323 tristate "Allwinner SoCs watchdog support" 352 tristate "Allwinner SoCs watchdog support"
324 depends on ARCH_SUNXI 353 depends on ARCH_SUNXI
@@ -444,7 +473,7 @@ config SIRFSOC_WATCHDOG
444 473
445config TEGRA_WATCHDOG 474config TEGRA_WATCHDOG
446 tristate "Tegra watchdog" 475 tristate "Tegra watchdog"
447 depends on ARCH_TEGRA || COMPILE_TEST 476 depends on (ARCH_TEGRA || COMPILE_TEST) && HAS_IOMEM
448 select WATCHDOG_CORE 477 select WATCHDOG_CORE
449 help 478 help
450 Say Y here to include support for the watchdog timer 479 Say Y here to include support for the watchdog timer
@@ -453,6 +482,29 @@ config TEGRA_WATCHDOG
453 To compile this driver as a module, choose M here: the 482 To compile this driver as a module, choose M here: the
454 module will be called tegra_wdt. 483 module will be called tegra_wdt.
455 484
485config QCOM_WDT
486 tristate "QCOM watchdog"
487 depends on HAS_IOMEM
488 depends on ARCH_QCOM
489 select WATCHDOG_CORE
490 help
491 Say Y here to include Watchdog timer support for the watchdog found
492 on QCOM chipsets. Currently supported targets are the MSM8960,
493 APQ8064, and IPQ8064.
494
495 To compile this driver as a module, choose M here: the
496 module will be called qcom_wdt.
497
498config MESON_WATCHDOG
499 tristate "Amlogic Meson SoCs watchdog support"
500 depends on ARCH_MESON
501 select WATCHDOG_CORE
502 help
503 Say Y here to include support for the watchdog timer
504 in Amlogic Meson SoCs.
505 To compile this driver as a module, choose M here: the
506 module will be called meson_wdt.
507
456# AVR32 Architecture 508# AVR32 Architecture
457 509
458config AT32AP700X_WDT 510config AT32AP700X_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index de1701470c14..c569ec8f8a76 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_USBPCWATCHDOG) += pcwd_usb.o
32obj-$(CONFIG_ARM_SP805_WATCHDOG) += sp805_wdt.o 32obj-$(CONFIG_ARM_SP805_WATCHDOG) += sp805_wdt.o
33obj-$(CONFIG_AT91RM9200_WATCHDOG) += at91rm9200_wdt.o 33obj-$(CONFIG_AT91RM9200_WATCHDOG) += at91rm9200_wdt.o
34obj-$(CONFIG_AT91SAM9X_WATCHDOG) += at91sam9_wdt.o 34obj-$(CONFIG_AT91SAM9X_WATCHDOG) += at91sam9_wdt.o
35obj-$(CONFIG_CADENCE_WATCHDOG) += cadence_wdt.o
35obj-$(CONFIG_OMAP_WATCHDOG) += omap_wdt.o 36obj-$(CONFIG_OMAP_WATCHDOG) += omap_wdt.o
36obj-$(CONFIG_TWL4030_WATCHDOG) += twl4030_wdt.o 37obj-$(CONFIG_TWL4030_WATCHDOG) += twl4030_wdt.o
37obj-$(CONFIG_21285_WATCHDOG) += wdt285.o 38obj-$(CONFIG_21285_WATCHDOG) += wdt285.o
@@ -47,6 +48,7 @@ obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o
47obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o 48obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o
48obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o 49obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o
49obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o 50obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o
51obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o
50obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o 52obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
51obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o 53obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
52obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o 54obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
@@ -57,8 +59,10 @@ obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o
57obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o 59obj-$(CONFIG_BCM2835_WDT) += bcm2835_wdt.o
58obj-$(CONFIG_MOXART_WDT) += moxart_wdt.o 60obj-$(CONFIG_MOXART_WDT) += moxart_wdt.o
59obj-$(CONFIG_SIRFSOC_WATCHDOG) += sirfsoc_wdt.o 61obj-$(CONFIG_SIRFSOC_WATCHDOG) += sirfsoc_wdt.o
62obj-$(CONFIG_QCOM_WDT) += qcom-wdt.o
60obj-$(CONFIG_BCM_KONA_WDT) += bcm_kona_wdt.o 63obj-$(CONFIG_BCM_KONA_WDT) += bcm_kona_wdt.o
61obj-$(CONFIG_TEGRA_WATCHDOG) += tegra_wdt.o 64obj-$(CONFIG_TEGRA_WATCHDOG) += tegra_wdt.o
65obj-$(CONFIG_MESON_WATCHDOG) += meson_wdt.o
62 66
63# AVR32 Architecture 67# AVR32 Architecture
64obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o 68obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
@@ -173,6 +177,7 @@ obj-$(CONFIG_XEN_WDT) += xen_wdt.o
173# Architecture Independent 177# Architecture Independent
174obj-$(CONFIG_DA9052_WATCHDOG) += da9052_wdt.o 178obj-$(CONFIG_DA9052_WATCHDOG) += da9052_wdt.o
175obj-$(CONFIG_DA9055_WATCHDOG) += da9055_wdt.o 179obj-$(CONFIG_DA9055_WATCHDOG) += da9055_wdt.o
180obj-$(CONFIG_DA9063_WATCHDOG) += da9063_wdt.o
176obj-$(CONFIG_GPIO_WATCHDOG) += gpio_wdt.o 181obj-$(CONFIG_GPIO_WATCHDOG) += gpio_wdt.o
177obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o 182obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
178obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o 183obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index 08a785398eac..e96b09b135c8 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c
@@ -30,8 +30,6 @@
30 * occur, and the final time the board will reset. 30 * occur, and the final time the board will reset.
31 */ 31 */
32 32
33u32 booke_wdt_enabled;
34u32 booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT;
35 33
36#ifdef CONFIG_PPC_FSL_BOOK3E 34#ifdef CONFIG_PPC_FSL_BOOK3E
37#define WDTP(x) ((((x)&0x3)<<30)|(((x)&0x3c)<<15)) 35#define WDTP(x) ((((x)&0x3)<<30)|(((x)&0x3c)<<15))
@@ -41,27 +39,10 @@ u32 booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT;
41#define WDTP_MASK (TCR_WP_MASK) 39#define WDTP_MASK (TCR_WP_MASK)
42#endif 40#endif
43 41
44/* Checks wdt=x and wdt_period=xx command-line option */ 42static bool booke_wdt_enabled;
45notrace int __init early_parse_wdt(char *p) 43module_param(booke_wdt_enabled, bool, 0);
46{ 44static int booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT;
47 if (p && strncmp(p, "0", 1) != 0) 45module_param(booke_wdt_period, int, 0);
48 booke_wdt_enabled = 1;
49
50 return 0;
51}
52early_param("wdt", early_parse_wdt);
53
54int __init early_parse_wdt_period(char *p)
55{
56 unsigned long ret;
57 if (p) {
58 if (!kstrtol(p, 0, &ret))
59 booke_wdt_period = ret;
60 }
61
62 return 0;
63}
64early_param("wdt_period", early_parse_wdt_period);
65 46
66#ifdef CONFIG_PPC_FSL_BOOK3E 47#ifdef CONFIG_PPC_FSL_BOOK3E
67 48
@@ -259,5 +240,6 @@ static int __init booke_wdt_init(void)
259module_init(booke_wdt_init); 240module_init(booke_wdt_init);
260module_exit(booke_wdt_exit); 241module_exit(booke_wdt_exit);
261 242
243MODULE_ALIAS("booke_wdt");
262MODULE_DESCRIPTION("PowerPC Book-E watchdog driver"); 244MODULE_DESCRIPTION("PowerPC Book-E watchdog driver");
263MODULE_LICENSE("GPL"); 245MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
new file mode 100644
index 000000000000..5927c0a98a74
--- /dev/null
+++ b/drivers/watchdog/cadence_wdt.c
@@ -0,0 +1,516 @@
1/*
2 * Cadence WDT driver - Used by Xilinx Zynq
3 *
4 * Copyright (C) 2010 - 2014 Xilinx, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/clk.h>
13#include <linux/init.h>
14#include <linux/interrupt.h>
15#include <linux/io.h>
16#include <linux/irq.h>
17#include <linux/kernel.h>
18#include <linux/module.h>
19#include <linux/of.h>
20#include <linux/platform_device.h>
21#include <linux/reboot.h>
22#include <linux/watchdog.h>
23
24#define CDNS_WDT_DEFAULT_TIMEOUT 10
25/* Supports 1 - 516 sec */
26#define CDNS_WDT_MIN_TIMEOUT 1
27#define CDNS_WDT_MAX_TIMEOUT 516
28
29/* Restart key */
30#define CDNS_WDT_RESTART_KEY 0x00001999
31
32/* Counter register access key */
33#define CDNS_WDT_REGISTER_ACCESS_KEY 0x00920000
34
35/* Counter value divisor */
36#define CDNS_WDT_COUNTER_VALUE_DIVISOR 0x1000
37
38/* Clock prescaler value and selection */
39#define CDNS_WDT_PRESCALE_64 64
40#define CDNS_WDT_PRESCALE_512 512
41#define CDNS_WDT_PRESCALE_4096 4096
42#define CDNS_WDT_PRESCALE_SELECT_64 1
43#define CDNS_WDT_PRESCALE_SELECT_512 2
44#define CDNS_WDT_PRESCALE_SELECT_4096 3
45
46/* Input clock frequency */
47#define CDNS_WDT_CLK_10MHZ 10000000
48#define CDNS_WDT_CLK_75MHZ 75000000
49
50/* Counter maximum value */
51#define CDNS_WDT_COUNTER_MAX 0xFFF
52
53static int wdt_timeout = CDNS_WDT_DEFAULT_TIMEOUT;
54static int nowayout = WATCHDOG_NOWAYOUT;
55
56module_param(wdt_timeout, int, 0);
57MODULE_PARM_DESC(wdt_timeout,
58 "Watchdog time in seconds. (default="
59 __MODULE_STRING(CDNS_WDT_DEFAULT_TIMEOUT) ")");
60
61module_param(nowayout, int, 0);
62MODULE_PARM_DESC(nowayout,
63 "Watchdog cannot be stopped once started (default="
64 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
65
66/**
67 * struct cdns_wdt - Watchdog device structure
68 * @regs: baseaddress of device
69 * @rst: reset flag
70 * @clk: struct clk * of a clock source
71 * @prescaler: for saving prescaler value
72 * @ctrl_clksel: counter clock prescaler selection
73 * @io_lock: spinlock for IO register access
74 * @cdns_wdt_device: watchdog device structure
75 * @cdns_wdt_notifier: notifier structure
76 *
77 * Structure containing parameters specific to cadence watchdog.
78 */
79struct cdns_wdt {
80 void __iomem *regs;
81 bool rst;
82 struct clk *clk;
83 u32 prescaler;
84 u32 ctrl_clksel;
85 spinlock_t io_lock;
86 struct watchdog_device cdns_wdt_device;
87 struct notifier_block cdns_wdt_notifier;
88};
89
90/* Write access to Registers */
91static inline void cdns_wdt_writereg(struct cdns_wdt *wdt, u32 offset, u32 val)
92{
93 writel_relaxed(val, wdt->regs + offset);
94}
95
96/*************************Register Map**************************************/
97
98/* Register Offsets for the WDT */
99#define CDNS_WDT_ZMR_OFFSET 0x0 /* Zero Mode Register */
100#define CDNS_WDT_CCR_OFFSET 0x4 /* Counter Control Register */
101#define CDNS_WDT_RESTART_OFFSET 0x8 /* Restart Register */
102#define CDNS_WDT_SR_OFFSET 0xC /* Status Register */
103
104/*
105 * Zero Mode Register - This register controls how the time out is indicated
106 * and also contains the access code to allow writes to the register (0xABC).
107 */
108#define CDNS_WDT_ZMR_WDEN_MASK 0x00000001 /* Enable the WDT */
109#define CDNS_WDT_ZMR_RSTEN_MASK 0x00000002 /* Enable the reset output */
110#define CDNS_WDT_ZMR_IRQEN_MASK 0x00000004 /* Enable IRQ output */
111#define CDNS_WDT_ZMR_RSTLEN_16 0x00000030 /* Reset pulse of 16 pclk cycles */
112#define CDNS_WDT_ZMR_ZKEY_VAL 0x00ABC000 /* Access key, 0xABC << 12 */
113/*
114 * Counter Control register - This register controls how fast the timer runs
115 * and the reset value and also contains the access code to allow writes to
116 * the register.
117 */
118#define CDNS_WDT_CCR_CRV_MASK 0x00003FFC /* Counter reset value */
119
120/**
121 * cdns_wdt_stop - Stop the watchdog.
122 *
123 * @wdd: watchdog device
124 *
125 * Read the contents of the ZMR register, clear the WDEN bit
126 * in the register and set the access key for successful write.
127 *
128 * Return: always 0
129 */
130static int cdns_wdt_stop(struct watchdog_device *wdd)
131{
132 struct cdns_wdt *wdt = watchdog_get_drvdata(wdd);
133
134 spin_lock(&wdt->io_lock);
135 cdns_wdt_writereg(wdt, CDNS_WDT_ZMR_OFFSET,
136 CDNS_WDT_ZMR_ZKEY_VAL & (~CDNS_WDT_ZMR_WDEN_MASK));
137 spin_unlock(&wdt->io_lock);
138
139 return 0;
140}
141
142/**
143 * cdns_wdt_reload - Reload the watchdog timer (i.e. pat the watchdog).
144 *
145 * @wdd: watchdog device
146 *
147 * Write the restart key value (0x00001999) to the restart register.
148 *
149 * Return: always 0
150 */
151static int cdns_wdt_reload(struct watchdog_device *wdd)
152{
153 struct cdns_wdt *wdt = watchdog_get_drvdata(wdd);
154
155 spin_lock(&wdt->io_lock);
156 cdns_wdt_writereg(wdt, CDNS_WDT_RESTART_OFFSET,
157 CDNS_WDT_RESTART_KEY);
158 spin_unlock(&wdt->io_lock);
159
160 return 0;
161}
162
163/**
164 * cdns_wdt_start - Enable and start the watchdog.
165 *
166 * @wdd: watchdog device
167 *
168 * The counter value is calculated according to the formula:
169 * calculated count = (timeout * clock) / prescaler + 1.
170 * The calculated count is divided by 0x1000 to obtain the field value
171 * to write to counter control register.
172 * Clears the contents of prescaler and counter reset value. Sets the
173 * prescaler to 4096 and the calculated count and access key
174 * to write to CCR Register.
175 * Sets the WDT (WDEN bit) and either the Reset signal(RSTEN bit)
176 * or Interrupt signal(IRQEN) with a specified cycles and the access
177 * key to write to ZMR Register.
178 *
179 * Return: always 0
180 */
181static int cdns_wdt_start(struct watchdog_device *wdd)
182{
183 struct cdns_wdt *wdt = watchdog_get_drvdata(wdd);
184 unsigned int data = 0;
185 unsigned short count;
186 unsigned long clock_f = clk_get_rate(wdt->clk);
187
188 /*
189 * Counter value divisor to obtain the value of
190 * counter reset to be written to control register.
191 */
192 count = (wdd->timeout * (clock_f / wdt->prescaler)) /
193 CDNS_WDT_COUNTER_VALUE_DIVISOR + 1;
194
195 if (count > CDNS_WDT_COUNTER_MAX)
196 count = CDNS_WDT_COUNTER_MAX;
197
198 spin_lock(&wdt->io_lock);
199 cdns_wdt_writereg(wdt, CDNS_WDT_ZMR_OFFSET,
200 CDNS_WDT_ZMR_ZKEY_VAL);
201
202 count = (count << 2) & CDNS_WDT_CCR_CRV_MASK;
203
204 /* Write counter access key first to be able write to register */
205 data = count | CDNS_WDT_REGISTER_ACCESS_KEY | wdt->ctrl_clksel;
206 cdns_wdt_writereg(wdt, CDNS_WDT_CCR_OFFSET, data);
207 data = CDNS_WDT_ZMR_WDEN_MASK | CDNS_WDT_ZMR_RSTLEN_16 |
208 CDNS_WDT_ZMR_ZKEY_VAL;
209
210 /* Reset on timeout if specified in device tree. */
211 if (wdt->rst) {
212 data |= CDNS_WDT_ZMR_RSTEN_MASK;
213 data &= ~CDNS_WDT_ZMR_IRQEN_MASK;
214 } else {
215 data &= ~CDNS_WDT_ZMR_RSTEN_MASK;
216 data |= CDNS_WDT_ZMR_IRQEN_MASK;
217 }
218 cdns_wdt_writereg(wdt, CDNS_WDT_ZMR_OFFSET, data);
219 cdns_wdt_writereg(wdt, CDNS_WDT_RESTART_OFFSET,
220 CDNS_WDT_RESTART_KEY);
221 spin_unlock(&wdt->io_lock);
222
223 return 0;
224}
225
226/**
227 * cdns_wdt_settimeout - Set a new timeout value for the watchdog device.
228 *
229 * @wdd: watchdog device
230 * @new_time: new timeout value that needs to be set
231 * Return: 0 on success
232 *
233 * Update the watchdog_device timeout with new value which is used when
234 * cdns_wdt_start is called.
235 */
236static int cdns_wdt_settimeout(struct watchdog_device *wdd,
237 unsigned int new_time)
238{
239 wdd->timeout = new_time;
240
241 return cdns_wdt_start(wdd);
242}
243
244/**
245 * cdns_wdt_irq_handler - Notifies of watchdog timeout.
246 *
247 * @irq: interrupt number
248 * @dev_id: pointer to a platform device structure
249 * Return: IRQ_HANDLED
250 *
251 * The handler is invoked when the watchdog times out and a
252 * reset on timeout has not been enabled.
253 */
254static irqreturn_t cdns_wdt_irq_handler(int irq, void *dev_id)
255{
256 struct platform_device *pdev = dev_id;
257
258 dev_info(&pdev->dev,
259 "Watchdog timed out. Internal reset not enabled\n");
260
261 return IRQ_HANDLED;
262}
263
264/*
265 * Info structure used to indicate the features supported by the device
266 * to the upper layers. This is defined in watchdog.h header file.
267 */
268static struct watchdog_info cdns_wdt_info = {
269 .identity = "cdns_wdt watchdog",
270 .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
271 WDIOF_MAGICCLOSE,
272};
273
274/* Watchdog Core Ops */
275static struct watchdog_ops cdns_wdt_ops = {
276 .owner = THIS_MODULE,
277 .start = cdns_wdt_start,
278 .stop = cdns_wdt_stop,
279 .ping = cdns_wdt_reload,
280 .set_timeout = cdns_wdt_settimeout,
281};
282
283/**
284 * cdns_wdt_notify_sys - Notifier for reboot or shutdown.
285 *
286 * @this: handle to notifier block
287 * @code: turn off indicator
288 * @unused: unused
289 * Return: NOTIFY_DONE
290 *
291 * This notifier is invoked whenever the system reboot or shutdown occur
292 * because we need to disable the WDT before system goes down as WDT might
293 * reset on the next boot.
294 */
295static int cdns_wdt_notify_sys(struct notifier_block *this, unsigned long code,
296 void *unused)
297{
298 struct cdns_wdt *wdt = container_of(this, struct cdns_wdt,
299 cdns_wdt_notifier);
300 if (code == SYS_DOWN || code == SYS_HALT)
301 cdns_wdt_stop(&wdt->cdns_wdt_device);
302
303 return NOTIFY_DONE;
304}
305
306/************************Platform Operations*****************************/
307/**
308 * cdns_wdt_probe - Probe call for the device.
309 *
310 * @pdev: handle to the platform device structure.
311 * Return: 0 on success, negative error otherwise.
312 *
313 * It does all the memory allocation and registration for the device.
314 */
315static int cdns_wdt_probe(struct platform_device *pdev)
316{
317 struct resource *res;
318 int ret, irq;
319 unsigned long clock_f;
320 struct cdns_wdt *wdt;
321 struct watchdog_device *cdns_wdt_device;
322
323 wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
324 if (!wdt)
325 return -ENOMEM;
326
327 cdns_wdt_device = &wdt->cdns_wdt_device;
328 cdns_wdt_device->info = &cdns_wdt_info;
329 cdns_wdt_device->ops = &cdns_wdt_ops;
330 cdns_wdt_device->timeout = CDNS_WDT_DEFAULT_TIMEOUT;
331 cdns_wdt_device->min_timeout = CDNS_WDT_MIN_TIMEOUT;
332 cdns_wdt_device->max_timeout = CDNS_WDT_MAX_TIMEOUT;
333
334 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
335 wdt->regs = devm_ioremap_resource(&pdev->dev, res);
336 if (IS_ERR(wdt->regs))
337 return PTR_ERR(wdt->regs);
338
339 /* Register the interrupt */
340 wdt->rst = of_property_read_bool(pdev->dev.of_node, "reset-on-timeout");
341 irq = platform_get_irq(pdev, 0);
342 if (!wdt->rst && irq >= 0) {
343 ret = devm_request_irq(&pdev->dev, irq, cdns_wdt_irq_handler, 0,
344 pdev->name, pdev);
345 if (ret) {
346 dev_err(&pdev->dev,
347 "cannot register interrupt handler err=%d\n",
348 ret);
349 return ret;
350 }
351 }
352
353 /* Initialize the members of cdns_wdt structure */
354 cdns_wdt_device->parent = &pdev->dev;
355
356 ret = watchdog_init_timeout(cdns_wdt_device, wdt_timeout, &pdev->dev);
357 if (ret) {
358 dev_err(&pdev->dev, "unable to set timeout value\n");
359 return ret;
360 }
361
362 watchdog_set_nowayout(cdns_wdt_device, nowayout);
363 watchdog_set_drvdata(cdns_wdt_device, wdt);
364
365 wdt->clk = devm_clk_get(&pdev->dev, NULL);
366 if (IS_ERR(wdt->clk)) {
367 dev_err(&pdev->dev, "input clock not found\n");
368 ret = PTR_ERR(wdt->clk);
369 return ret;
370 }
371
372 ret = clk_prepare_enable(wdt->clk);
373 if (ret) {
374 dev_err(&pdev->dev, "unable to enable clock\n");
375 return ret;
376 }
377
378 clock_f = clk_get_rate(wdt->clk);
379 if (clock_f <= CDNS_WDT_CLK_75MHZ) {
380 wdt->prescaler = CDNS_WDT_PRESCALE_512;
381 wdt->ctrl_clksel = CDNS_WDT_PRESCALE_SELECT_512;
382 } else {
383 wdt->prescaler = CDNS_WDT_PRESCALE_4096;
384 wdt->ctrl_clksel = CDNS_WDT_PRESCALE_SELECT_4096;
385 }
386
387 spin_lock_init(&wdt->io_lock);
388
389 wdt->cdns_wdt_notifier.notifier_call = &cdns_wdt_notify_sys;
390 ret = register_reboot_notifier(&wdt->cdns_wdt_notifier);
391 if (ret != 0) {
392 dev_err(&pdev->dev, "cannot register reboot notifier err=%d)\n",
393 ret);
394 goto err_clk_disable;
395 }
396
397 ret = watchdog_register_device(cdns_wdt_device);
398 if (ret) {
399 dev_err(&pdev->dev, "Failed to register wdt device\n");
400 goto err_clk_disable;
401 }
402 platform_set_drvdata(pdev, wdt);
403
404 dev_dbg(&pdev->dev, "Xilinx Watchdog Timer at %p with timeout %ds%s\n",
405 wdt->regs, cdns_wdt_device->timeout,
406 nowayout ? ", nowayout" : "");
407
408 return 0;
409
410err_clk_disable:
411 clk_disable_unprepare(wdt->clk);
412
413 return ret;
414}
415
416/**
417 * cdns_wdt_remove - Probe call for the device.
418 *
419 * @pdev: handle to the platform device structure.
420 * Return: 0 on success, otherwise negative error.
421 *
422 * Unregister the device after releasing the resources.
423 */
424static int cdns_wdt_remove(struct platform_device *pdev)
425{
426 struct cdns_wdt *wdt = platform_get_drvdata(pdev);
427
428 cdns_wdt_stop(&wdt->cdns_wdt_device);
429 watchdog_unregister_device(&wdt->cdns_wdt_device);
430 unregister_reboot_notifier(&wdt->cdns_wdt_notifier);
431 clk_disable_unprepare(wdt->clk);
432
433 return 0;
434}
435
436/**
437 * cdns_wdt_shutdown - Stop the device.
438 *
439 * @pdev: handle to the platform structure.
440 *
441 */
442static void cdns_wdt_shutdown(struct platform_device *pdev)
443{
444 struct cdns_wdt *wdt = platform_get_drvdata(pdev);
445
446 cdns_wdt_stop(&wdt->cdns_wdt_device);
447 clk_disable_unprepare(wdt->clk);
448}
449
450/**
451 * cdns_wdt_suspend - Stop the device.
452 *
453 * @dev: handle to the device structure.
454 * Return: 0 always.
455 */
456static int __maybe_unused cdns_wdt_suspend(struct device *dev)
457{
458 struct platform_device *pdev = container_of(dev,
459 struct platform_device, dev);
460 struct cdns_wdt *wdt = platform_get_drvdata(pdev);
461
462 cdns_wdt_stop(&wdt->cdns_wdt_device);
463 clk_disable_unprepare(wdt->clk);
464
465 return 0;
466}
467
468/**
469 * cdns_wdt_resume - Resume the device.
470 *
471 * @dev: handle to the device structure.
472 * Return: 0 on success, errno otherwise.
473 */
474static int __maybe_unused cdns_wdt_resume(struct device *dev)
475{
476 int ret;
477 struct platform_device *pdev = container_of(dev,
478 struct platform_device, dev);
479 struct cdns_wdt *wdt = platform_get_drvdata(pdev);
480
481 ret = clk_prepare_enable(wdt->clk);
482 if (ret) {
483 dev_err(dev, "unable to enable clock\n");
484 return ret;
485 }
486 cdns_wdt_start(&wdt->cdns_wdt_device);
487
488 return 0;
489}
490
491static SIMPLE_DEV_PM_OPS(cdns_wdt_pm_ops, cdns_wdt_suspend, cdns_wdt_resume);
492
493static struct of_device_id cdns_wdt_of_match[] = {
494 { .compatible = "cdns,wdt-r1p2", },
495 { /* end of table */ }
496};
497MODULE_DEVICE_TABLE(of, cdns_wdt_of_match);
498
499/* Driver Structure */
500static struct platform_driver cdns_wdt_driver = {
501 .probe = cdns_wdt_probe,
502 .remove = cdns_wdt_remove,
503 .shutdown = cdns_wdt_shutdown,
504 .driver = {
505 .name = "cdns-wdt",
506 .owner = THIS_MODULE,
507 .of_match_table = cdns_wdt_of_match,
508 .pm = &cdns_wdt_pm_ops,
509 },
510};
511
512module_platform_driver(cdns_wdt_driver);
513
514MODULE_AUTHOR("Xilinx, Inc.");
515MODULE_DESCRIPTION("Watchdog driver for Cadence WDT");
516MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c
new file mode 100644
index 000000000000..2cd6b2c2dd2a
--- /dev/null
+++ b/drivers/watchdog/da9063_wdt.c
@@ -0,0 +1,191 @@
1/*
2 * Watchdog driver for DA9063 PMICs.
3 *
4 * Copyright(c) 2012 Dialog Semiconductor Ltd.
5 *
6 * Author: Mariusz Wojtasik <mariusz.wojtasik@diasemi.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/watchdog.h>
17#include <linux/platform_device.h>
18#include <linux/uaccess.h>
19#include <linux/slab.h>
20#include <linux/delay.h>
21#include <linux/mfd/da9063/registers.h>
22#include <linux/mfd/da9063/core.h>
23#include <linux/regmap.h>
24
25/*
26 * Watchdog selector to timeout in seconds.
27 * 0: WDT disabled;
28 * others: timeout = 2048 ms * 2^(TWDSCALE-1).
29 */
30static const unsigned int wdt_timeout[] = { 0, 2, 4, 8, 16, 32, 65, 131 };
31#define DA9063_TWDSCALE_DISABLE 0
32#define DA9063_TWDSCALE_MIN 1
33#define DA9063_TWDSCALE_MAX (ARRAY_SIZE(wdt_timeout) - 1)
34#define DA9063_WDT_MIN_TIMEOUT wdt_timeout[DA9063_TWDSCALE_MIN]
35#define DA9063_WDT_MAX_TIMEOUT wdt_timeout[DA9063_TWDSCALE_MAX]
36#define DA9063_WDG_TIMEOUT wdt_timeout[3]
37
38struct da9063_watchdog {
39 struct da9063 *da9063;
40 struct watchdog_device wdtdev;
41};
42
43static unsigned int da9063_wdt_timeout_to_sel(unsigned int secs)
44{
45 unsigned int i;
46
47 for (i = DA9063_TWDSCALE_MIN; i <= DA9063_TWDSCALE_MAX; i++) {
48 if (wdt_timeout[i] >= secs)
49 return i;
50 }
51
52 return DA9063_TWDSCALE_MAX;
53}
54
55static int _da9063_wdt_set_timeout(struct da9063 *da9063, unsigned int regval)
56{
57 return regmap_update_bits(da9063->regmap, DA9063_REG_CONTROL_D,
58 DA9063_TWDSCALE_MASK, regval);
59}
60
61static int da9063_wdt_start(struct watchdog_device *wdd)
62{
63 struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
64 unsigned int selector;
65 int ret;
66
67 selector = da9063_wdt_timeout_to_sel(wdt->wdtdev.timeout);
68 ret = _da9063_wdt_set_timeout(wdt->da9063, selector);
69 if (ret)
70 dev_err(wdt->da9063->dev, "Watchdog failed to start (err = %d)\n",
71 ret);
72
73 return ret;
74}
75
76static int da9063_wdt_stop(struct watchdog_device *wdd)
77{
78 struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
79 int ret;
80
81 ret = regmap_update_bits(wdt->da9063->regmap, DA9063_REG_CONTROL_D,
82 DA9063_TWDSCALE_MASK, DA9063_TWDSCALE_DISABLE);
83 if (ret)
84 dev_alert(wdt->da9063->dev, "Watchdog failed to stop (err = %d)\n",
85 ret);
86
87 return ret;
88}
89
90static int da9063_wdt_ping(struct watchdog_device *wdd)
91{
92 struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
93 int ret;
94
95 ret = regmap_write(wdt->da9063->regmap, DA9063_REG_CONTROL_F,
96 DA9063_WATCHDOG);
97 if (ret)
98 dev_alert(wdt->da9063->dev, "Failed to ping the watchdog (err = %d)\n",
99 ret);
100
101 return ret;
102}
103
104static int da9063_wdt_set_timeout(struct watchdog_device *wdd,
105 unsigned int timeout)
106{
107 struct da9063_watchdog *wdt = watchdog_get_drvdata(wdd);
108 unsigned int selector;
109 int ret;
110
111 selector = da9063_wdt_timeout_to_sel(timeout);
112 ret = _da9063_wdt_set_timeout(wdt->da9063, selector);
113 if (ret)
114 dev_err(wdt->da9063->dev, "Failed to set watchdog timeout (err = %d)\n",
115 ret);
116 else
117 wdd->timeout = wdt_timeout[selector];
118
119 return ret;
120}
121
122static const struct watchdog_info da9063_watchdog_info = {
123 .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
124 .identity = "DA9063 Watchdog",
125};
126
127static const struct watchdog_ops da9063_watchdog_ops = {
128 .owner = THIS_MODULE,
129 .start = da9063_wdt_start,
130 .stop = da9063_wdt_stop,
131 .ping = da9063_wdt_ping,
132 .set_timeout = da9063_wdt_set_timeout,
133};
134
135static int da9063_wdt_probe(struct platform_device *pdev)
136{
137 int ret;
138 struct da9063 *da9063;
139 struct da9063_watchdog *wdt;
140
141 if (!pdev->dev.parent)
142 return -EINVAL;
143
144 da9063 = dev_get_drvdata(pdev->dev.parent);
145 if (!da9063)
146 return -EINVAL;
147
148 wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
149 if (!wdt)
150 return -ENOMEM;
151
152 wdt->da9063 = da9063;
153
154 wdt->wdtdev.info = &da9063_watchdog_info;
155 wdt->wdtdev.ops = &da9063_watchdog_ops;
156 wdt->wdtdev.min_timeout = DA9063_WDT_MIN_TIMEOUT;
157 wdt->wdtdev.max_timeout = DA9063_WDT_MAX_TIMEOUT;
158 wdt->wdtdev.timeout = DA9063_WDG_TIMEOUT;
159
160 wdt->wdtdev.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
161
162 watchdog_set_drvdata(&wdt->wdtdev, wdt);
163 dev_set_drvdata(&pdev->dev, wdt);
164
165 ret = watchdog_register_device(&wdt->wdtdev);
166
167 return ret;
168}
169
170static int da9063_wdt_remove(struct platform_device *pdev)
171{
172 struct da9063_watchdog *wdt = dev_get_drvdata(&pdev->dev);
173
174 watchdog_unregister_device(&wdt->wdtdev);
175
176 return 0;
177}
178
179static struct platform_driver da9063_wdt_driver = {
180 .probe = da9063_wdt_probe,
181 .remove = da9063_wdt_remove,
182 .driver = {
183 .name = DA9063_DRVNAME_WATCHDOG,
184 },
185};
186module_platform_driver(da9063_wdt_driver);
187
188MODULE_AUTHOR("Mariusz Wojtasik <mariusz.wojtasik@diasemi.com>");
189MODULE_DESCRIPTION("Watchdog driver for Dialog DA9063");
190MODULE_LICENSE("GPL");
191MODULE_ALIAS("platform:" DA9063_DRVNAME_WATCHDOG);
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
index 9f210299de24..9e577a64ec9e 100644
--- a/drivers/watchdog/dw_wdt.c
+++ b/drivers/watchdog/dw_wdt.c
@@ -21,6 +21,7 @@
21 21
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/clk.h> 23#include <linux/clk.h>
24#include <linux/delay.h>
24#include <linux/device.h> 25#include <linux/device.h>
25#include <linux/err.h> 26#include <linux/err.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
@@ -29,9 +30,11 @@
29#include <linux/miscdevice.h> 30#include <linux/miscdevice.h>
30#include <linux/module.h> 31#include <linux/module.h>
31#include <linux/moduleparam.h> 32#include <linux/moduleparam.h>
33#include <linux/notifier.h>
32#include <linux/of.h> 34#include <linux/of.h>
33#include <linux/pm.h> 35#include <linux/pm.h>
34#include <linux/platform_device.h> 36#include <linux/platform_device.h>
37#include <linux/reboot.h>
35#include <linux/spinlock.h> 38#include <linux/spinlock.h>
36#include <linux/timer.h> 39#include <linux/timer.h>
37#include <linux/uaccess.h> 40#include <linux/uaccess.h>
@@ -40,6 +43,7 @@
40#define WDOG_CONTROL_REG_OFFSET 0x00 43#define WDOG_CONTROL_REG_OFFSET 0x00
41#define WDOG_CONTROL_REG_WDT_EN_MASK 0x01 44#define WDOG_CONTROL_REG_WDT_EN_MASK 0x01
42#define WDOG_TIMEOUT_RANGE_REG_OFFSET 0x04 45#define WDOG_TIMEOUT_RANGE_REG_OFFSET 0x04
46#define WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT 4
43#define WDOG_CURRENT_COUNT_REG_OFFSET 0x08 47#define WDOG_CURRENT_COUNT_REG_OFFSET 0x08
44#define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c 48#define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c
45#define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 49#define WDOG_COUNTER_RESTART_KICK_VALUE 0x76
@@ -62,6 +66,7 @@ static struct {
62 unsigned long next_heartbeat; 66 unsigned long next_heartbeat;
63 struct timer_list timer; 67 struct timer_list timer;
64 int expect_close; 68 int expect_close;
69 struct notifier_block restart_handler;
65} dw_wdt; 70} dw_wdt;
66 71
67static inline int dw_wdt_is_enabled(void) 72static inline int dw_wdt_is_enabled(void)
@@ -106,7 +111,8 @@ static int dw_wdt_set_top(unsigned top_s)
106 } 111 }
107 112
108 /* Set the new value in the watchdog. */ 113 /* Set the new value in the watchdog. */
109 writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); 114 writel(top_val | top_val << WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT,
115 dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
110 116
111 dw_wdt_set_next_heartbeat(); 117 dw_wdt_set_next_heartbeat();
112 118
@@ -119,6 +125,26 @@ static void dw_wdt_keepalive(void)
119 WDOG_COUNTER_RESTART_REG_OFFSET); 125 WDOG_COUNTER_RESTART_REG_OFFSET);
120} 126}
121 127
128static int dw_wdt_restart_handle(struct notifier_block *this,
129 unsigned long mode, void *cmd)
130{
131 u32 val;
132
133 writel(0, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
134 val = readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
135 if (val & WDOG_CONTROL_REG_WDT_EN_MASK)
136 writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs +
137 WDOG_COUNTER_RESTART_REG_OFFSET);
138 else
139 writel(WDOG_CONTROL_REG_WDT_EN_MASK,
140 dw_wdt.regs + WDOG_CONTROL_REG_OFFSET);
141
142 /* wait for reset to assert... */
143 mdelay(500);
144
145 return NOTIFY_DONE;
146}
147
122static void dw_wdt_ping(unsigned long data) 148static void dw_wdt_ping(unsigned long data)
123{ 149{
124 if (time_before(jiffies, dw_wdt.next_heartbeat) || 150 if (time_before(jiffies, dw_wdt.next_heartbeat) ||
@@ -314,6 +340,12 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
314 if (ret) 340 if (ret)
315 goto out_disable_clk; 341 goto out_disable_clk;
316 342
343 dw_wdt.restart_handler.notifier_call = dw_wdt_restart_handle;
344 dw_wdt.restart_handler.priority = 128;
345 ret = register_restart_handler(&dw_wdt.restart_handler);
346 if (ret)
347 pr_warn("cannot register restart handler\n");
348
317 dw_wdt_set_next_heartbeat(); 349 dw_wdt_set_next_heartbeat();
318 setup_timer(&dw_wdt.timer, dw_wdt_ping, 0); 350 setup_timer(&dw_wdt.timer, dw_wdt_ping, 0);
319 mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); 351 mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT);
@@ -328,6 +360,8 @@ out_disable_clk:
328 360
329static int dw_wdt_drv_remove(struct platform_device *pdev) 361static int dw_wdt_drv_remove(struct platform_device *pdev)
330{ 362{
363 unregister_restart_handler(&dw_wdt.restart_handler);
364
331 misc_deregister(&dw_wdt_miscdev); 365 misc_deregister(&dw_wdt_miscdev);
332 366
333 clk_disable_unprepare(dw_wdt.clk); 367 clk_disable_unprepare(dw_wdt.clk);
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 68c3d379ffa8..7e12f88bb4a6 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -22,14 +22,17 @@
22 */ 22 */
23 23
24#include <linux/clk.h> 24#include <linux/clk.h>
25#include <linux/delay.h>
25#include <linux/init.h> 26#include <linux/init.h>
26#include <linux/io.h> 27#include <linux/io.h>
27#include <linux/jiffies.h> 28#include <linux/jiffies.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
29#include <linux/module.h> 30#include <linux/module.h>
30#include <linux/moduleparam.h> 31#include <linux/moduleparam.h>
32#include <linux/notifier.h>
31#include <linux/of_address.h> 33#include <linux/of_address.h>
32#include <linux/platform_device.h> 34#include <linux/platform_device.h>
35#include <linux/reboot.h>
33#include <linux/regmap.h> 36#include <linux/regmap.h>
34#include <linux/timer.h> 37#include <linux/timer.h>
35#include <linux/watchdog.h> 38#include <linux/watchdog.h>
@@ -59,6 +62,7 @@ struct imx2_wdt_device {
59 struct regmap *regmap; 62 struct regmap *regmap;
60 struct timer_list timer; /* Pings the watchdog when closed */ 63 struct timer_list timer; /* Pings the watchdog when closed */
61 struct watchdog_device wdog; 64 struct watchdog_device wdog;
65 struct notifier_block restart_handler;
62}; 66};
63 67
64static bool nowayout = WATCHDOG_NOWAYOUT; 68static bool nowayout = WATCHDOG_NOWAYOUT;
@@ -77,6 +81,31 @@ static const struct watchdog_info imx2_wdt_info = {
77 .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, 81 .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE,
78}; 82};
79 83
84static int imx2_restart_handler(struct notifier_block *this, unsigned long mode,
85 void *cmd)
86{
87 unsigned int wcr_enable = IMX2_WDT_WCR_WDE;
88 struct imx2_wdt_device *wdev = container_of(this,
89 struct imx2_wdt_device,
90 restart_handler);
91 /* Assert SRS signal */
92 regmap_write(wdev->regmap, 0, wcr_enable);
93 /*
94 * Due to imx6q errata ERR004346 (WDOG: WDOG SRS bit requires to be
95 * written twice), we add another two writes to ensure there must be at
96 * least two writes happen in the same one 32kHz clock period. We save
97 * the target check here, since the writes shouldn't be a huge burden
98 * for other platforms.
99 */
100 regmap_write(wdev->regmap, 0, wcr_enable);
101 regmap_write(wdev->regmap, 0, wcr_enable);
102
103 /* wait for reset to assert... */
104 mdelay(500);
105
106 return NOTIFY_DONE;
107}
108
80static inline void imx2_wdt_setup(struct watchdog_device *wdog) 109static inline void imx2_wdt_setup(struct watchdog_device *wdog)
81{ 110{
82 struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); 111 struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
@@ -191,12 +220,10 @@ static struct regmap_config imx2_wdt_regmap_config = {
191 220
192static int __init imx2_wdt_probe(struct platform_device *pdev) 221static int __init imx2_wdt_probe(struct platform_device *pdev)
193{ 222{
194 struct device_node *np = pdev->dev.of_node;
195 struct imx2_wdt_device *wdev; 223 struct imx2_wdt_device *wdev;
196 struct watchdog_device *wdog; 224 struct watchdog_device *wdog;
197 struct resource *res; 225 struct resource *res;
198 void __iomem *base; 226 void __iomem *base;
199 bool big_endian;
200 int ret; 227 int ret;
201 u32 val; 228 u32 val;
202 229
@@ -204,10 +231,6 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
204 if (!wdev) 231 if (!wdev)
205 return -ENOMEM; 232 return -ENOMEM;
206 233
207 big_endian = of_property_read_bool(np, "big-endian");
208 if (big_endian)
209 imx2_wdt_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
210
211 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 234 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
212 base = devm_ioremap_resource(&pdev->dev, res); 235 base = devm_ioremap_resource(&pdev->dev, res);
213 if (IS_ERR(base)) 236 if (IS_ERR(base))
@@ -257,6 +280,12 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
257 return ret; 280 return ret;
258 } 281 }
259 282
283 wdev->restart_handler.notifier_call = imx2_restart_handler;
284 wdev->restart_handler.priority = 128;
285 ret = register_restart_handler(&wdev->restart_handler);
286 if (ret)
287 dev_err(&pdev->dev, "cannot register restart handler\n");
288
260 dev_info(&pdev->dev, "timeout %d sec (nowayout=%d)\n", 289 dev_info(&pdev->dev, "timeout %d sec (nowayout=%d)\n",
261 wdog->timeout, nowayout); 290 wdog->timeout, nowayout);
262 291
@@ -268,6 +297,8 @@ static int __exit imx2_wdt_remove(struct platform_device *pdev)
268 struct watchdog_device *wdog = platform_get_drvdata(pdev); 297 struct watchdog_device *wdog = platform_get_drvdata(pdev);
269 struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); 298 struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
270 299
300 unregister_restart_handler(&wdev->restart_handler);
301
271 watchdog_unregister_device(wdog); 302 watchdog_unregister_device(wdog);
272 303
273 if (imx2_wdt_is_running(wdev)) { 304 if (imx2_wdt_is_running(wdev)) {
diff --git a/drivers/watchdog/meson_wdt.c b/drivers/watchdog/meson_wdt.c
new file mode 100644
index 000000000000..ef6a298e8c45
--- /dev/null
+++ b/drivers/watchdog/meson_wdt.c
@@ -0,0 +1,236 @@
1/*
2 * Meson Watchdog Driver
3 *
4 * Copyright (c) 2014 Carlo Caione
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/clk.h>
13#include <linux/delay.h>
14#include <linux/err.h>
15#include <linux/init.h>
16#include <linux/io.h>
17#include <linux/kernel.h>
18#include <linux/module.h>
19#include <linux/moduleparam.h>
20#include <linux/notifier.h>
21#include <linux/of.h>
22#include <linux/platform_device.h>
23#include <linux/reboot.h>
24#include <linux/types.h>
25#include <linux/watchdog.h>
26
27#define DRV_NAME "meson_wdt"
28
29#define MESON_WDT_TC 0x00
30#define MESON_WDT_TC_EN BIT(22)
31#define MESON_WDT_TC_TM_MASK 0x3fffff
32#define MESON_WDT_DC_RESET (3 << 24)
33
34#define MESON_WDT_RESET 0x04
35
36#define MESON_WDT_TIMEOUT 30
37#define MESON_WDT_MIN_TIMEOUT 1
38#define MESON_WDT_MAX_TIMEOUT (MESON_WDT_TC_TM_MASK / 100000)
39
40#define MESON_SEC_TO_TC(s) ((s) * 100000)
41
42static bool nowayout = WATCHDOG_NOWAYOUT;
43static unsigned int timeout = MESON_WDT_TIMEOUT;
44
45struct meson_wdt_dev {
46 struct watchdog_device wdt_dev;
47 void __iomem *wdt_base;
48 struct notifier_block restart_handler;
49};
50
51static int meson_restart_handle(struct notifier_block *this, unsigned long mode,
52 void *cmd)
53{
54 u32 tc_reboot = MESON_WDT_DC_RESET | MESON_WDT_TC_EN;
55 struct meson_wdt_dev *meson_wdt = container_of(this,
56 struct meson_wdt_dev,
57 restart_handler);
58
59 while (1) {
60 writel(tc_reboot, meson_wdt->wdt_base + MESON_WDT_TC);
61 mdelay(5);
62 }
63
64 return NOTIFY_DONE;
65}
66
67static int meson_wdt_ping(struct watchdog_device *wdt_dev)
68{
69 struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
70
71 writel(0, meson_wdt->wdt_base + MESON_WDT_RESET);
72
73 return 0;
74}
75
76static void meson_wdt_change_timeout(struct watchdog_device *wdt_dev,
77 unsigned int timeout)
78{
79 struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
80 u32 reg;
81
82 reg = readl(meson_wdt->wdt_base + MESON_WDT_TC);
83 reg &= ~MESON_WDT_TC_TM_MASK;
84 reg |= MESON_SEC_TO_TC(timeout);
85 writel(reg, meson_wdt->wdt_base + MESON_WDT_TC);
86}
87
88static int meson_wdt_set_timeout(struct watchdog_device *wdt_dev,
89 unsigned int timeout)
90{
91 wdt_dev->timeout = timeout;
92
93 meson_wdt_change_timeout(wdt_dev, timeout);
94 meson_wdt_ping(wdt_dev);
95
96 return 0;
97}
98
99static int meson_wdt_stop(struct watchdog_device *wdt_dev)
100{
101 struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
102 u32 reg;
103
104 reg = readl(meson_wdt->wdt_base + MESON_WDT_TC);
105 reg &= ~MESON_WDT_TC_EN;
106 writel(reg, meson_wdt->wdt_base + MESON_WDT_TC);
107
108 return 0;
109}
110
111static int meson_wdt_start(struct watchdog_device *wdt_dev)
112{
113 struct meson_wdt_dev *meson_wdt = watchdog_get_drvdata(wdt_dev);
114 u32 reg;
115
116 meson_wdt_change_timeout(wdt_dev, meson_wdt->wdt_dev.timeout);
117 meson_wdt_ping(wdt_dev);
118
119 reg = readl(meson_wdt->wdt_base + MESON_WDT_TC);
120 reg |= MESON_WDT_TC_EN;
121 writel(reg, meson_wdt->wdt_base + MESON_WDT_TC);
122
123 return 0;
124}
125
126static const struct watchdog_info meson_wdt_info = {
127 .identity = DRV_NAME,
128 .options = WDIOF_SETTIMEOUT |
129 WDIOF_KEEPALIVEPING |
130 WDIOF_MAGICCLOSE,
131};
132
133static const struct watchdog_ops meson_wdt_ops = {
134 .owner = THIS_MODULE,
135 .start = meson_wdt_start,
136 .stop = meson_wdt_stop,
137 .ping = meson_wdt_ping,
138 .set_timeout = meson_wdt_set_timeout,
139};
140
141static int meson_wdt_probe(struct platform_device *pdev)
142{
143 struct resource *res;
144 struct meson_wdt_dev *meson_wdt;
145 int err;
146
147 meson_wdt = devm_kzalloc(&pdev->dev, sizeof(*meson_wdt), GFP_KERNEL);
148 if (!meson_wdt)
149 return -ENOMEM;
150
151 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
152 meson_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res);
153 if (IS_ERR(meson_wdt->wdt_base))
154 return PTR_ERR(meson_wdt->wdt_base);
155
156 meson_wdt->wdt_dev.parent = &pdev->dev;
157 meson_wdt->wdt_dev.info = &meson_wdt_info;
158 meson_wdt->wdt_dev.ops = &meson_wdt_ops;
159 meson_wdt->wdt_dev.timeout = MESON_WDT_TIMEOUT;
160 meson_wdt->wdt_dev.max_timeout = MESON_WDT_MAX_TIMEOUT;
161 meson_wdt->wdt_dev.min_timeout = MESON_WDT_MIN_TIMEOUT;
162
163 watchdog_set_drvdata(&meson_wdt->wdt_dev, meson_wdt);
164
165 watchdog_init_timeout(&meson_wdt->wdt_dev, timeout, &pdev->dev);
166 watchdog_set_nowayout(&meson_wdt->wdt_dev, nowayout);
167
168 meson_wdt_stop(&meson_wdt->wdt_dev);
169
170 err = watchdog_register_device(&meson_wdt->wdt_dev);
171 if (err)
172 return err;
173
174 platform_set_drvdata(pdev, meson_wdt);
175
176 meson_wdt->restart_handler.notifier_call = meson_restart_handle;
177 meson_wdt->restart_handler.priority = 128;
178 err = register_restart_handler(&meson_wdt->restart_handler);
179 if (err)
180 dev_err(&pdev->dev,
181 "cannot register restart handler (err=%d)\n", err);
182
183 dev_info(&pdev->dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)",
184 meson_wdt->wdt_dev.timeout, nowayout);
185
186 return 0;
187}
188
189static int meson_wdt_remove(struct platform_device *pdev)
190{
191 struct meson_wdt_dev *meson_wdt = platform_get_drvdata(pdev);
192
193 unregister_restart_handler(&meson_wdt->restart_handler);
194
195 watchdog_unregister_device(&meson_wdt->wdt_dev);
196
197 return 0;
198}
199
200static void meson_wdt_shutdown(struct platform_device *pdev)
201{
202 struct meson_wdt_dev *meson_wdt = platform_get_drvdata(pdev);
203
204 meson_wdt_stop(&meson_wdt->wdt_dev);
205}
206
207static const struct of_device_id meson_wdt_dt_ids[] = {
208 { .compatible = "amlogic,meson6-wdt" },
209 { /* sentinel */ }
210};
211MODULE_DEVICE_TABLE(of, meson_wdt_dt_ids);
212
213static struct platform_driver meson_wdt_driver = {
214 .probe = meson_wdt_probe,
215 .remove = meson_wdt_remove,
216 .shutdown = meson_wdt_shutdown,
217 .driver = {
218 .owner = THIS_MODULE,
219 .name = DRV_NAME,
220 .of_match_table = meson_wdt_dt_ids,
221 },
222};
223
224module_platform_driver(meson_wdt_driver);
225
226module_param(timeout, uint, 0);
227MODULE_PARM_DESC(timeout, "Watchdog heartbeat in seconds");
228
229module_param(nowayout, bool, 0);
230MODULE_PARM_DESC(nowayout,
231 "Watchdog cannot be stopped once started (default="
232 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
233
234MODULE_LICENSE("GPL");
235MODULE_AUTHOR("Carlo Caione <carlo@caione.org>");
236MODULE_DESCRIPTION("Meson Watchdog Timer Driver");
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 1e6e28df5d7b..b2e1b4cbbdc1 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -236,7 +236,6 @@ static struct platform_driver xwdt_driver = {
236 .probe = xwdt_probe, 236 .probe = xwdt_probe,
237 .remove = xwdt_remove, 237 .remove = xwdt_remove,
238 .driver = { 238 .driver = {
239 .owner = THIS_MODULE,
240 .name = WATCHDOG_NAME, 239 .name = WATCHDOG_NAME,
241 .of_match_table = xwdt_of_match, 240 .of_match_table = xwdt_of_match,
242 }, 241 },
diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c
new file mode 100644
index 000000000000..aa85618c4d03
--- /dev/null
+++ b/drivers/watchdog/qcom-wdt.c
@@ -0,0 +1,224 @@
1/* Copyright (c) 2014, The Linux Foundation. All rights reserved.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13#include <linux/clk.h>
14#include <linux/delay.h>
15#include <linux/io.h>
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/of.h>
19#include <linux/platform_device.h>
20#include <linux/reboot.h>
21#include <linux/watchdog.h>
22
23#define WDT_RST 0x0
24#define WDT_EN 0x8
25#define WDT_BITE_TIME 0x24
26
27struct qcom_wdt {
28 struct watchdog_device wdd;
29 struct clk *clk;
30 unsigned long rate;
31 struct notifier_block restart_nb;
32 void __iomem *base;
33};
34
35static inline
36struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd)
37{
38 return container_of(wdd, struct qcom_wdt, wdd);
39}
40
41static int qcom_wdt_start(struct watchdog_device *wdd)
42{
43 struct qcom_wdt *wdt = to_qcom_wdt(wdd);
44
45 writel(0, wdt->base + WDT_EN);
46 writel(1, wdt->base + WDT_RST);
47 writel(wdd->timeout * wdt->rate, wdt->base + WDT_BITE_TIME);
48 writel(1, wdt->base + WDT_EN);
49 return 0;
50}
51
52static int qcom_wdt_stop(struct watchdog_device *wdd)
53{
54 struct qcom_wdt *wdt = to_qcom_wdt(wdd);
55
56 writel(0, wdt->base + WDT_EN);
57 return 0;
58}
59
60static int qcom_wdt_ping(struct watchdog_device *wdd)
61{
62 struct qcom_wdt *wdt = to_qcom_wdt(wdd);
63
64 writel(1, wdt->base + WDT_RST);
65 return 0;
66}
67
68static int qcom_wdt_set_timeout(struct watchdog_device *wdd,
69 unsigned int timeout)
70{
71 wdd->timeout = timeout;
72 return qcom_wdt_start(wdd);
73}
74
75static const struct watchdog_ops qcom_wdt_ops = {
76 .start = qcom_wdt_start,
77 .stop = qcom_wdt_stop,
78 .ping = qcom_wdt_ping,
79 .set_timeout = qcom_wdt_set_timeout,
80 .owner = THIS_MODULE,
81};
82
83static const struct watchdog_info qcom_wdt_info = {
84 .options = WDIOF_KEEPALIVEPING
85 | WDIOF_MAGICCLOSE
86 | WDIOF_SETTIMEOUT,
87 .identity = KBUILD_MODNAME,
88};
89
90static int qcom_wdt_restart(struct notifier_block *nb, unsigned long action,
91 void *data)
92{
93 struct qcom_wdt *wdt = container_of(nb, struct qcom_wdt, restart_nb);
94 u32 timeout;
95
96 /*
97 * Trigger watchdog bite:
98 * Setup BITE_TIME to be 128ms, and enable WDT.
99 */
100 timeout = 128 * wdt->rate / 1000;
101
102 writel(0, wdt->base + WDT_EN);
103 writel(1, wdt->base + WDT_RST);
104 writel(timeout, wdt->base + WDT_BITE_TIME);
105 writel(1, wdt->base + WDT_EN);
106
107 /*
108 * Actually make sure the above sequence hits hardware before sleeping.
109 */
110 wmb();
111
112 msleep(150);
113 return NOTIFY_DONE;
114}
115
116static int qcom_wdt_probe(struct platform_device *pdev)
117{
118 struct qcom_wdt *wdt;
119 struct resource *res;
120 int ret;
121
122 wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
123 if (!wdt)
124 return -ENOMEM;
125
126 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
127 wdt->base = devm_ioremap_resource(&pdev->dev, res);
128 if (IS_ERR(wdt->base))
129 return PTR_ERR(wdt->base);
130
131 wdt->clk = devm_clk_get(&pdev->dev, NULL);
132 if (IS_ERR(wdt->clk)) {
133 dev_err(&pdev->dev, "failed to get input clock\n");
134 return PTR_ERR(wdt->clk);
135 }
136
137 ret = clk_prepare_enable(wdt->clk);
138 if (ret) {
139 dev_err(&pdev->dev, "failed to setup clock\n");
140 return ret;
141 }
142
143 /*
144 * We use the clock rate to calculate the max timeout, so ensure it's
145 * not zero to avoid a divide-by-zero exception.
146 *
147 * WATCHDOG_CORE assumes units of seconds, if the WDT is clocked such
148 * that it would bite before a second elapses it's usefulness is
149 * limited. Bail if this is the case.
150 */
151 wdt->rate = clk_get_rate(wdt->clk);
152 if (wdt->rate == 0 ||
153 wdt->rate > 0x10000000U) {
154 dev_err(&pdev->dev, "invalid clock rate\n");
155 ret = -EINVAL;
156 goto err_clk_unprepare;
157 }
158
159 wdt->wdd.dev = &pdev->dev;
160 wdt->wdd.info = &qcom_wdt_info;
161 wdt->wdd.ops = &qcom_wdt_ops;
162 wdt->wdd.min_timeout = 1;
163 wdt->wdd.max_timeout = 0x10000000U / wdt->rate;
164
165 /*
166 * If 'timeout-sec' unspecified in devicetree, assume a 30 second
167 * default, unless the max timeout is less than 30 seconds, then use
168 * the max instead.
169 */
170 wdt->wdd.timeout = min(wdt->wdd.max_timeout, 30U);
171 watchdog_init_timeout(&wdt->wdd, 0, &pdev->dev);
172
173 ret = watchdog_register_device(&wdt->wdd);
174 if (ret) {
175 dev_err(&pdev->dev, "failed to register watchdog\n");
176 goto err_clk_unprepare;
177 }
178
179 /*
180 * WDT restart notifier has priority 0 (use as a last resort)
181 */
182 wdt->restart_nb.notifier_call = qcom_wdt_restart;
183 ret = register_restart_handler(&wdt->restart_nb);
184 if (ret)
185 dev_err(&pdev->dev, "failed to setup restart handler\n");
186
187 platform_set_drvdata(pdev, wdt);
188 return 0;
189
190err_clk_unprepare:
191 clk_disable_unprepare(wdt->clk);
192 return ret;
193}
194
195static int qcom_wdt_remove(struct platform_device *pdev)
196{
197 struct qcom_wdt *wdt = platform_get_drvdata(pdev);
198
199 unregister_restart_handler(&wdt->restart_nb);
200 watchdog_unregister_device(&wdt->wdd);
201 clk_disable_unprepare(wdt->clk);
202 return 0;
203}
204
205static const struct of_device_id qcom_wdt_of_table[] = {
206 { .compatible = "qcom,kpss-wdt-msm8960", },
207 { .compatible = "qcom,kpss-wdt-apq8064", },
208 { .compatible = "qcom,kpss-wdt-ipq8064", },
209 { },
210};
211MODULE_DEVICE_TABLE(of, qcom_wdt_of_table);
212
213static struct platform_driver qcom_watchdog_driver = {
214 .probe = qcom_wdt_probe,
215 .remove = qcom_wdt_remove,
216 .driver = {
217 .name = KBUILD_MODNAME,
218 .of_match_table = qcom_wdt_of_table,
219 },
220};
221module_platform_driver(qcom_watchdog_driver);
222
223MODULE_DESCRIPTION("QCOM KPSS Watchdog Driver");
224MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c
new file mode 100644
index 000000000000..d1c12278cb6a
--- /dev/null
+++ b/drivers/watchdog/rn5t618_wdt.c
@@ -0,0 +1,198 @@
1/*
2 * Watchdog driver for Ricoh RN5T618 PMIC
3 *
4 * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 *
10 * You should have received a copy of the GNU General Public License
11 * along with this program. If not, see <http://www.gnu.org/licenses/>.
12 */
13
14#include <linux/device.h>
15#include <linux/mfd/rn5t618.h>
16#include <linux/module.h>
17#include <linux/platform_device.h>
18#include <linux/watchdog.h>
19
20#define DRIVER_NAME "rn5t618-wdt"
21
22static bool nowayout = WATCHDOG_NOWAYOUT;
23static unsigned int timeout;
24
25module_param(timeout, uint, 0);
26MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds");
27
28module_param(nowayout, bool, 0);
29MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
30 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
31
32struct rn5t618_wdt {
33 struct watchdog_device wdt_dev;
34 struct rn5t618 *rn5t618;
35};
36
37/*
38 * This array encodes the values of WDOGTIM field for the supported
39 * watchdog expiration times. If the watchdog is not accessed before
40 * the timer expiration, the PMU generates an interrupt and if the CPU
41 * doesn't clear it within one second the system is restarted.
42 */
43static const struct {
44 u8 reg_val;
45 unsigned int time;
46} rn5t618_wdt_map[] = {
47 { 0, 1 },
48 { 1, 8 },
49 { 2, 32 },
50 { 3, 128 },
51};
52
53static int rn5t618_wdt_set_timeout(struct watchdog_device *wdt_dev,
54 unsigned int t)
55{
56 struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
57 int ret, i;
58
59 for (i = 0; i < ARRAY_SIZE(rn5t618_wdt_map); i++) {
60 if (rn5t618_wdt_map[i].time + 1 >= t)
61 break;
62 }
63
64 if (i == ARRAY_SIZE(rn5t618_wdt_map))
65 return -EINVAL;
66
67 ret = regmap_update_bits(wdt->rn5t618->regmap, RN5T618_WATCHDOG,
68 RN5T618_WATCHDOG_WDOGTIM_M,
69 rn5t618_wdt_map[i].reg_val);
70 if (!ret)
71 wdt_dev->timeout = rn5t618_wdt_map[i].time;
72
73 return ret;
74}
75
76static int rn5t618_wdt_start(struct watchdog_device *wdt_dev)
77{
78 struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
79 int ret;
80
81 ret = rn5t618_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
82 if (ret)
83 return ret;
84
85 /* enable repower-on */
86 ret = regmap_update_bits(wdt->rn5t618->regmap, RN5T618_REPCNT,
87 RN5T618_REPCNT_REPWRON,
88 RN5T618_REPCNT_REPWRON);
89 if (ret)
90 return ret;
91
92 /* enable watchdog */
93 ret = regmap_update_bits(wdt->rn5t618->regmap, RN5T618_WATCHDOG,
94 RN5T618_WATCHDOG_WDOGEN,
95 RN5T618_WATCHDOG_WDOGEN);
96 if (ret)
97 return ret;
98
99 /* enable watchdog interrupt */
100 return regmap_update_bits(wdt->rn5t618->regmap, RN5T618_PWRIREN,
101 RN5T618_PWRIRQ_IR_WDOG,
102 RN5T618_PWRIRQ_IR_WDOG);
103}
104
105static int rn5t618_wdt_stop(struct watchdog_device *wdt_dev)
106{
107 struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
108
109 return regmap_update_bits(wdt->rn5t618->regmap, RN5T618_WATCHDOG,
110 RN5T618_WATCHDOG_WDOGEN, 0);
111}
112
113static int rn5t618_wdt_ping(struct watchdog_device *wdt_dev)
114{
115 struct rn5t618_wdt *wdt = watchdog_get_drvdata(wdt_dev);
116 unsigned int val;
117 int ret;
118
119 /* The counter is restarted after a R/W access to watchdog register */
120 ret = regmap_read(wdt->rn5t618->regmap, RN5T618_WATCHDOG, &val);
121 if (ret)
122 return ret;
123
124 ret = regmap_write(wdt->rn5t618->regmap, RN5T618_WATCHDOG, val);
125 if (ret)
126 return ret;
127
128 /* Clear pending watchdog interrupt */
129 return regmap_update_bits(wdt->rn5t618->regmap, RN5T618_PWRIRQ,
130 RN5T618_PWRIRQ_IR_WDOG, 0);
131}
132
133static struct watchdog_info rn5t618_wdt_info = {
134 .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE |
135 WDIOF_KEEPALIVEPING,
136 .identity = DRIVER_NAME,
137};
138
139static struct watchdog_ops rn5t618_wdt_ops = {
140 .owner = THIS_MODULE,
141 .start = rn5t618_wdt_start,
142 .stop = rn5t618_wdt_stop,
143 .ping = rn5t618_wdt_ping,
144 .set_timeout = rn5t618_wdt_set_timeout,
145};
146
147static int rn5t618_wdt_probe(struct platform_device *pdev)
148{
149 struct rn5t618 *rn5t618 = dev_get_drvdata(pdev->dev.parent);
150 struct rn5t618_wdt *wdt;
151 int min_timeout, max_timeout;
152
153 wdt = devm_kzalloc(&pdev->dev, sizeof(struct rn5t618_wdt), GFP_KERNEL);
154 if (!wdt)
155 return -ENOMEM;
156
157 min_timeout = rn5t618_wdt_map[0].time;
158 max_timeout = rn5t618_wdt_map[ARRAY_SIZE(rn5t618_wdt_map) - 1].time;
159
160 wdt->rn5t618 = rn5t618;
161 wdt->wdt_dev.info = &rn5t618_wdt_info;
162 wdt->wdt_dev.ops = &rn5t618_wdt_ops;
163 wdt->wdt_dev.min_timeout = min_timeout;
164 wdt->wdt_dev.max_timeout = max_timeout;
165 wdt->wdt_dev.timeout = max_timeout;
166 wdt->wdt_dev.parent = &pdev->dev;
167
168 watchdog_set_drvdata(&wdt->wdt_dev, wdt);
169 watchdog_init_timeout(&wdt->wdt_dev, timeout, &pdev->dev);
170 watchdog_set_nowayout(&wdt->wdt_dev, nowayout);
171
172 platform_set_drvdata(pdev, wdt);
173
174 return watchdog_register_device(&wdt->wdt_dev);
175}
176
177static int rn5t618_wdt_remove(struct platform_device *pdev)
178{
179 struct rn5t618_wdt *wdt = platform_get_drvdata(pdev);
180
181 watchdog_unregister_device(&wdt->wdt_dev);
182
183 return 0;
184}
185
186static struct platform_driver rn5t618_wdt_driver = {
187 .probe = rn5t618_wdt_probe,
188 .remove = rn5t618_wdt_remove,
189 .driver = {
190 .name = DRIVER_NAME,
191 },
192};
193
194module_platform_driver(rn5t618_wdt_driver);
195
196MODULE_AUTHOR("Beniamino Galvani <b.galvani@gmail.com>");
197MODULE_DESCRIPTION("RN5T618 watchdog driver");
198MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 7c6ccd071baf..8532c3e2aea7 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -41,6 +41,8 @@
41#include <linux/of.h> 41#include <linux/of.h>
42#include <linux/mfd/syscon.h> 42#include <linux/mfd/syscon.h>
43#include <linux/regmap.h> 43#include <linux/regmap.h>
44#include <linux/reboot.h>
45#include <linux/delay.h>
44 46
45#define S3C2410_WTCON 0x00 47#define S3C2410_WTCON 0x00
46#define S3C2410_WTDAT 0x04 48#define S3C2410_WTDAT 0x04
@@ -128,6 +130,7 @@ struct s3c2410_wdt {
128 unsigned long wtdat_save; 130 unsigned long wtdat_save;
129 struct watchdog_device wdt_device; 131 struct watchdog_device wdt_device;
130 struct notifier_block freq_transition; 132 struct notifier_block freq_transition;
133 struct notifier_block restart_handler;
131 struct s3c2410_wdt_variant *drv_data; 134 struct s3c2410_wdt_variant *drv_data;
132 struct regmap *pmureg; 135 struct regmap *pmureg;
133}; 136};
@@ -155,6 +158,15 @@ static const struct s3c2410_wdt_variant drv_data_exynos5420 = {
155 .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT, 158 .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
156}; 159};
157 160
161static const struct s3c2410_wdt_variant drv_data_exynos7 = {
162 .disable_reg = EXYNOS5_WDT_DISABLE_REG_OFFSET,
163 .mask_reset_reg = EXYNOS5_WDT_MASK_RESET_REG_OFFSET,
164 .mask_bit = 0,
165 .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
166 .rst_stat_bit = 23, /* A57 WDTRESET */
167 .quirks = QUIRK_HAS_PMU_CONFIG | QUIRK_HAS_RST_STAT,
168};
169
158static const struct of_device_id s3c2410_wdt_match[] = { 170static const struct of_device_id s3c2410_wdt_match[] = {
159 { .compatible = "samsung,s3c2410-wdt", 171 { .compatible = "samsung,s3c2410-wdt",
160 .data = &drv_data_s3c2410 }, 172 .data = &drv_data_s3c2410 },
@@ -162,6 +174,8 @@ static const struct of_device_id s3c2410_wdt_match[] = {
162 .data = &drv_data_exynos5250 }, 174 .data = &drv_data_exynos5250 },
163 { .compatible = "samsung,exynos5420-wdt", 175 { .compatible = "samsung,exynos5420-wdt",
164 .data = &drv_data_exynos5420 }, 176 .data = &drv_data_exynos5420 },
177 { .compatible = "samsung,exynos7-wdt",
178 .data = &drv_data_exynos7 },
165 {}, 179 {},
166}; 180};
167MODULE_DEVICE_TABLE(of, s3c2410_wdt_match); 181MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
@@ -438,6 +452,31 @@ static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt)
438} 452}
439#endif 453#endif
440 454
455static int s3c2410wdt_restart(struct notifier_block *this,
456 unsigned long mode, void *cmd)
457{
458 struct s3c2410_wdt *wdt = container_of(this, struct s3c2410_wdt,
459 restart_handler);
460 void __iomem *wdt_base = wdt->reg_base;
461
462 /* disable watchdog, to be safe */
463 writel(0, wdt_base + S3C2410_WTCON);
464
465 /* put initial values into count and data */
466 writel(0x80, wdt_base + S3C2410_WTCNT);
467 writel(0x80, wdt_base + S3C2410_WTDAT);
468
469 /* set the watchdog to go and reset... */
470 writel(S3C2410_WTCON_ENABLE | S3C2410_WTCON_DIV16 |
471 S3C2410_WTCON_RSTEN | S3C2410_WTCON_PRESCALE(0x20),
472 wdt_base + S3C2410_WTCON);
473
474 /* wait for reset to assert... */
475 mdelay(500);
476
477 return NOTIFY_DONE;
478}
479
441static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt) 480static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
442{ 481{
443 unsigned int rst_stat; 482 unsigned int rst_stat;
@@ -592,6 +631,12 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
592 631
593 platform_set_drvdata(pdev, wdt); 632 platform_set_drvdata(pdev, wdt);
594 633
634 wdt->restart_handler.notifier_call = s3c2410wdt_restart;
635 wdt->restart_handler.priority = 128;
636 ret = register_restart_handler(&wdt->restart_handler);
637 if (ret)
638 pr_err("cannot register restart handler, %d\n", ret);
639
595 /* print out a statement of readiness */ 640 /* print out a statement of readiness */
596 641
597 wtcon = readl(wdt->reg_base + S3C2410_WTCON); 642 wtcon = readl(wdt->reg_base + S3C2410_WTCON);
@@ -621,6 +666,8 @@ static int s3c2410wdt_remove(struct platform_device *dev)
621 int ret; 666 int ret;
622 struct s3c2410_wdt *wdt = platform_get_drvdata(dev); 667 struct s3c2410_wdt *wdt = platform_get_drvdata(dev);
623 668
669 unregister_restart_handler(&wdt->restart_handler);
670
624 ret = s3c2410wdt_mask_and_disable_reset(wdt, true); 671 ret = s3c2410wdt_mask_and_disable_reset(wdt, true);
625 if (ret < 0) 672 if (ret < 0)
626 return ret; 673 return ret;
diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c
index 3804d5e9baea..a62b1b6decf4 100644
--- a/drivers/watchdog/stmp3xxx_rtc_wdt.c
+++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c
@@ -94,9 +94,33 @@ static int stmp3xxx_wdt_remove(struct platform_device *pdev)
94 return 0; 94 return 0;
95} 95}
96 96
97static int __maybe_unused stmp3xxx_wdt_suspend(struct device *dev)
98{
99 struct watchdog_device *wdd = &stmp3xxx_wdd;
100
101 if (watchdog_active(wdd))
102 return wdt_stop(wdd);
103
104 return 0;
105}
106
107static int __maybe_unused stmp3xxx_wdt_resume(struct device *dev)
108{
109 struct watchdog_device *wdd = &stmp3xxx_wdd;
110
111 if (watchdog_active(wdd))
112 return wdt_start(wdd);
113
114 return 0;
115}
116
117static SIMPLE_DEV_PM_OPS(stmp3xxx_wdt_pm_ops,
118 stmp3xxx_wdt_suspend, stmp3xxx_wdt_resume);
119
97static struct platform_driver stmp3xxx_wdt_driver = { 120static struct platform_driver stmp3xxx_wdt_driver = {
98 .driver = { 121 .driver = {
99 .name = "stmp3xxx_rtc_wdt", 122 .name = "stmp3xxx_rtc_wdt",
123 .pm = &stmp3xxx_wdt_pm_ops,
100 }, 124 },
101 .probe = stmp3xxx_wdt_probe, 125 .probe = stmp3xxx_wdt_probe,
102 .remove = stmp3xxx_wdt_remove, 126 .remove = stmp3xxx_wdt_remove,
diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
index 480bb557f353..b62301e74e5f 100644
--- a/drivers/watchdog/sunxi_wdt.c
+++ b/drivers/watchdog/sunxi_wdt.c
@@ -23,6 +23,7 @@
23#include <linux/moduleparam.h> 23#include <linux/moduleparam.h>
24#include <linux/notifier.h> 24#include <linux/notifier.h>
25#include <linux/of.h> 25#include <linux/of.h>
26#include <linux/of_device.h>
26#include <linux/platform_device.h> 27#include <linux/platform_device.h>
27#include <linux/reboot.h> 28#include <linux/reboot.h>
28#include <linux/types.h> 29#include <linux/types.h>
@@ -30,15 +31,11 @@
30 31
31#define WDT_MAX_TIMEOUT 16 32#define WDT_MAX_TIMEOUT 16
32#define WDT_MIN_TIMEOUT 1 33#define WDT_MIN_TIMEOUT 1
33#define WDT_MODE_TIMEOUT(n) ((n) << 3) 34#define WDT_TIMEOUT_MASK 0x0F
34#define WDT_TIMEOUT_MASK WDT_MODE_TIMEOUT(0x0F)
35 35
36#define WDT_CTRL 0x00
37#define WDT_CTRL_RELOAD ((1 << 0) | (0x0a57 << 1)) 36#define WDT_CTRL_RELOAD ((1 << 0) | (0x0a57 << 1))
38 37
39#define WDT_MODE 0x04
40#define WDT_MODE_EN (1 << 0) 38#define WDT_MODE_EN (1 << 0)
41#define WDT_MODE_RST_EN (1 << 1)
42 39
43#define DRV_NAME "sunxi-wdt" 40#define DRV_NAME "sunxi-wdt"
44#define DRV_VERSION "1.0" 41#define DRV_VERSION "1.0"
@@ -46,15 +43,29 @@
46static bool nowayout = WATCHDOG_NOWAYOUT; 43static bool nowayout = WATCHDOG_NOWAYOUT;
47static unsigned int timeout = WDT_MAX_TIMEOUT; 44static unsigned int timeout = WDT_MAX_TIMEOUT;
48 45
46/*
47 * This structure stores the register offsets for different variants
48 * of Allwinner's watchdog hardware.
49 */
50struct sunxi_wdt_reg {
51 u8 wdt_ctrl;
52 u8 wdt_cfg;
53 u8 wdt_mode;
54 u8 wdt_timeout_shift;
55 u8 wdt_reset_mask;
56 u8 wdt_reset_val;
57};
58
49struct sunxi_wdt_dev { 59struct sunxi_wdt_dev {
50 struct watchdog_device wdt_dev; 60 struct watchdog_device wdt_dev;
51 void __iomem *wdt_base; 61 void __iomem *wdt_base;
62 const struct sunxi_wdt_reg *wdt_regs;
52 struct notifier_block restart_handler; 63 struct notifier_block restart_handler;
53}; 64};
54 65
55/* 66/*
56 * wdt_timeout_map maps the watchdog timer interval value in seconds to 67 * wdt_timeout_map maps the watchdog timer interval value in seconds to
57 * the value of the register WDT_MODE bit 3:6 68 * the value of the register WDT_MODE at bits .wdt_timeout_shift ~ +3
58 * 69 *
59 * [timeout seconds] = register value 70 * [timeout seconds] = register value
60 * 71 *
@@ -82,19 +93,32 @@ static int sunxi_restart_handle(struct notifier_block *this, unsigned long mode,
82 struct sunxi_wdt_dev, 93 struct sunxi_wdt_dev,
83 restart_handler); 94 restart_handler);
84 void __iomem *wdt_base = sunxi_wdt->wdt_base; 95 void __iomem *wdt_base = sunxi_wdt->wdt_base;
96 const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
97 u32 val;
98
99 /* Set system reset function */
100 val = readl(wdt_base + regs->wdt_cfg);
101 val &= ~(regs->wdt_reset_mask);
102 val |= regs->wdt_reset_val;
103 writel(val, wdt_base + regs->wdt_cfg);
85 104
86 /* Enable timer and set reset bit in the watchdog */ 105 /* Set lowest timeout and enable watchdog */
87 writel(WDT_MODE_EN | WDT_MODE_RST_EN, wdt_base + WDT_MODE); 106 val = readl(wdt_base + regs->wdt_mode);
107 val &= ~(WDT_TIMEOUT_MASK << regs->wdt_timeout_shift);
108 val |= WDT_MODE_EN;
109 writel(val, wdt_base + regs->wdt_mode);
88 110
89 /* 111 /*
90 * Restart the watchdog. The default (and lowest) interval 112 * Restart the watchdog. The default (and lowest) interval
91 * value for the watchdog is 0.5s. 113 * value for the watchdog is 0.5s.
92 */ 114 */
93 writel(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL); 115 writel(WDT_CTRL_RELOAD, wdt_base + regs->wdt_ctrl);
94 116
95 while (1) { 117 while (1) {
96 mdelay(5); 118 mdelay(5);
97 writel(WDT_MODE_EN | WDT_MODE_RST_EN, wdt_base + WDT_MODE); 119 val = readl(wdt_base + regs->wdt_mode);
120 val |= WDT_MODE_EN;
121 writel(val, wdt_base + regs->wdt_mode);
98 } 122 }
99 return NOTIFY_DONE; 123 return NOTIFY_DONE;
100} 124}
@@ -103,8 +127,9 @@ static int sunxi_wdt_ping(struct watchdog_device *wdt_dev)
103{ 127{
104 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); 128 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
105 void __iomem *wdt_base = sunxi_wdt->wdt_base; 129 void __iomem *wdt_base = sunxi_wdt->wdt_base;
130 const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
106 131
107 iowrite32(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL); 132 writel(WDT_CTRL_RELOAD, wdt_base + regs->wdt_ctrl);
108 133
109 return 0; 134 return 0;
110} 135}
@@ -114,6 +139,7 @@ static int sunxi_wdt_set_timeout(struct watchdog_device *wdt_dev,
114{ 139{
115 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); 140 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
116 void __iomem *wdt_base = sunxi_wdt->wdt_base; 141 void __iomem *wdt_base = sunxi_wdt->wdt_base;
142 const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
117 u32 reg; 143 u32 reg;
118 144
119 if (wdt_timeout_map[timeout] == 0) 145 if (wdt_timeout_map[timeout] == 0)
@@ -121,10 +147,10 @@ static int sunxi_wdt_set_timeout(struct watchdog_device *wdt_dev,
121 147
122 sunxi_wdt->wdt_dev.timeout = timeout; 148 sunxi_wdt->wdt_dev.timeout = timeout;
123 149
124 reg = ioread32(wdt_base + WDT_MODE); 150 reg = readl(wdt_base + regs->wdt_mode);
125 reg &= ~WDT_TIMEOUT_MASK; 151 reg &= ~(WDT_TIMEOUT_MASK << regs->wdt_timeout_shift);
126 reg |= WDT_MODE_TIMEOUT(wdt_timeout_map[timeout]); 152 reg |= wdt_timeout_map[timeout] << regs->wdt_timeout_shift;
127 iowrite32(reg, wdt_base + WDT_MODE); 153 writel(reg, wdt_base + regs->wdt_mode);
128 154
129 sunxi_wdt_ping(wdt_dev); 155 sunxi_wdt_ping(wdt_dev);
130 156
@@ -135,8 +161,9 @@ static int sunxi_wdt_stop(struct watchdog_device *wdt_dev)
135{ 161{
136 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); 162 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
137 void __iomem *wdt_base = sunxi_wdt->wdt_base; 163 void __iomem *wdt_base = sunxi_wdt->wdt_base;
164 const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
138 165
139 iowrite32(0, wdt_base + WDT_MODE); 166 writel(0, wdt_base + regs->wdt_mode);
140 167
141 return 0; 168 return 0;
142} 169}
@@ -146,6 +173,7 @@ static int sunxi_wdt_start(struct watchdog_device *wdt_dev)
146 u32 reg; 173 u32 reg;
147 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev); 174 struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
148 void __iomem *wdt_base = sunxi_wdt->wdt_base; 175 void __iomem *wdt_base = sunxi_wdt->wdt_base;
176 const struct sunxi_wdt_reg *regs = sunxi_wdt->wdt_regs;
149 int ret; 177 int ret;
150 178
151 ret = sunxi_wdt_set_timeout(&sunxi_wdt->wdt_dev, 179 ret = sunxi_wdt_set_timeout(&sunxi_wdt->wdt_dev,
@@ -153,9 +181,16 @@ static int sunxi_wdt_start(struct watchdog_device *wdt_dev)
153 if (ret < 0) 181 if (ret < 0)
154 return ret; 182 return ret;
155 183
156 reg = ioread32(wdt_base + WDT_MODE); 184 /* Set system reset function */
157 reg |= (WDT_MODE_RST_EN | WDT_MODE_EN); 185 reg = readl(wdt_base + regs->wdt_cfg);
158 iowrite32(reg, wdt_base + WDT_MODE); 186 reg &= ~(regs->wdt_reset_mask);
187 reg |= ~(regs->wdt_reset_val);
188 writel(reg, wdt_base + regs->wdt_cfg);
189
190 /* Enable watchdog */
191 reg = readl(wdt_base + regs->wdt_mode);
192 reg |= WDT_MODE_EN;
193 writel(reg, wdt_base + regs->wdt_mode);
159 194
160 return 0; 195 return 0;
161} 196}
@@ -175,9 +210,35 @@ static const struct watchdog_ops sunxi_wdt_ops = {
175 .set_timeout = sunxi_wdt_set_timeout, 210 .set_timeout = sunxi_wdt_set_timeout,
176}; 211};
177 212
213static const struct sunxi_wdt_reg sun4i_wdt_reg = {
214 .wdt_ctrl = 0x00,
215 .wdt_cfg = 0x04,
216 .wdt_mode = 0x04,
217 .wdt_timeout_shift = 3,
218 .wdt_reset_mask = 0x02,
219 .wdt_reset_val = 0x02,
220};
221
222static const struct sunxi_wdt_reg sun6i_wdt_reg = {
223 .wdt_ctrl = 0x10,
224 .wdt_cfg = 0x14,
225 .wdt_mode = 0x18,
226 .wdt_timeout_shift = 4,
227 .wdt_reset_mask = 0x03,
228 .wdt_reset_val = 0x01,
229};
230
231static const struct of_device_id sunxi_wdt_dt_ids[] = {
232 { .compatible = "allwinner,sun4i-a10-wdt", .data = &sun4i_wdt_reg },
233 { .compatible = "allwinner,sun6i-a31-wdt", .data = &sun6i_wdt_reg },
234 { /* sentinel */ }
235};
236MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids);
237
178static int sunxi_wdt_probe(struct platform_device *pdev) 238static int sunxi_wdt_probe(struct platform_device *pdev)
179{ 239{
180 struct sunxi_wdt_dev *sunxi_wdt; 240 struct sunxi_wdt_dev *sunxi_wdt;
241 const struct of_device_id *device;
181 struct resource *res; 242 struct resource *res;
182 int err; 243 int err;
183 244
@@ -187,6 +248,12 @@ static int sunxi_wdt_probe(struct platform_device *pdev)
187 248
188 platform_set_drvdata(pdev, sunxi_wdt); 249 platform_set_drvdata(pdev, sunxi_wdt);
189 250
251 device = of_match_device(sunxi_wdt_dt_ids, &pdev->dev);
252 if (!device)
253 return -ENODEV;
254
255 sunxi_wdt->wdt_regs = device->data;
256
190 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 257 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
191 sunxi_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res); 258 sunxi_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res);
192 if (IS_ERR(sunxi_wdt->wdt_base)) 259 if (IS_ERR(sunxi_wdt->wdt_base))
@@ -242,12 +309,6 @@ static void sunxi_wdt_shutdown(struct platform_device *pdev)
242 sunxi_wdt_stop(&sunxi_wdt->wdt_dev); 309 sunxi_wdt_stop(&sunxi_wdt->wdt_dev);
243} 310}
244 311
245static const struct of_device_id sunxi_wdt_dt_ids[] = {
246 { .compatible = "allwinner,sun4i-a10-wdt" },
247 { /* sentinel */ }
248};
249MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids);
250
251static struct platform_driver sunxi_wdt_driver = { 312static struct platform_driver sunxi_wdt_driver = {
252 .probe = sunxi_wdt_probe, 313 .probe = sunxi_wdt_probe,
253 .remove = sunxi_wdt_remove, 314 .remove = sunxi_wdt_remove,
diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c
index afa9d6ef353a..dee9c6cbe6df 100644
--- a/drivers/watchdog/ts72xx_wdt.c
+++ b/drivers/watchdog/ts72xx_wdt.c
@@ -428,11 +428,7 @@ static int ts72xx_wdt_probe(struct platform_device *pdev)
428 428
429static int ts72xx_wdt_remove(struct platform_device *pdev) 429static int ts72xx_wdt_remove(struct platform_device *pdev)
430{ 430{
431 int error; 431 return misc_deregister(&ts72xx_wdt_miscdev);
432
433 error = misc_deregister(&ts72xx_wdt_miscdev);
434
435 return error;
436} 432}
437 433
438static struct platform_driver ts72xx_wdt_driver = { 434static struct platform_driver ts72xx_wdt_driver = {
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 1e0a317d3dcd..3860d02729dc 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -167,6 +167,9 @@ static struct page *balloon_next_page(struct page *page)
167 167
168static enum bp_state update_schedule(enum bp_state state) 168static enum bp_state update_schedule(enum bp_state state)
169{ 169{
170 if (state == BP_ECANCELED)
171 return BP_ECANCELED;
172
170 if (state == BP_DONE) { 173 if (state == BP_DONE) {
171 balloon_stats.schedule_delay = 1; 174 balloon_stats.schedule_delay = 1;
172 balloon_stats.retry_count = 1; 175 balloon_stats.retry_count = 1;
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index dd9c249ea311..95ee4302ffb8 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -41,24 +41,29 @@ static int xen_add_device(struct device *dev)
41#endif 41#endif
42 42
43 if (pci_seg_supported) { 43 if (pci_seg_supported) {
44 struct physdev_pci_device_add add = { 44 struct {
45 .seg = pci_domain_nr(pci_dev->bus), 45 struct physdev_pci_device_add add;
46 .bus = pci_dev->bus->number, 46 uint32_t pxm;
47 .devfn = pci_dev->devfn 47 } add_ext = {
48 .add.seg = pci_domain_nr(pci_dev->bus),
49 .add.bus = pci_dev->bus->number,
50 .add.devfn = pci_dev->devfn
48 }; 51 };
52 struct physdev_pci_device_add *add = &add_ext.add;
53
49#ifdef CONFIG_ACPI 54#ifdef CONFIG_ACPI
50 acpi_handle handle; 55 acpi_handle handle;
51#endif 56#endif
52 57
53#ifdef CONFIG_PCI_IOV 58#ifdef CONFIG_PCI_IOV
54 if (pci_dev->is_virtfn) { 59 if (pci_dev->is_virtfn) {
55 add.flags = XEN_PCI_DEV_VIRTFN; 60 add->flags = XEN_PCI_DEV_VIRTFN;
56 add.physfn.bus = physfn->bus->number; 61 add->physfn.bus = physfn->bus->number;
57 add.physfn.devfn = physfn->devfn; 62 add->physfn.devfn = physfn->devfn;
58 } else 63 } else
59#endif 64#endif
60 if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) 65 if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
61 add.flags = XEN_PCI_DEV_EXTFN; 66 add->flags = XEN_PCI_DEV_EXTFN;
62 67
63#ifdef CONFIG_ACPI 68#ifdef CONFIG_ACPI
64 handle = ACPI_HANDLE(&pci_dev->dev); 69 handle = ACPI_HANDLE(&pci_dev->dev);
@@ -77,8 +82,8 @@ static int xen_add_device(struct device *dev)
77 status = acpi_evaluate_integer(handle, "_PXM", 82 status = acpi_evaluate_integer(handle, "_PXM",
78 NULL, &pxm); 83 NULL, &pxm);
79 if (ACPI_SUCCESS(status)) { 84 if (ACPI_SUCCESS(status)) {
80 add.optarr[0] = pxm; 85 add->optarr[0] = pxm;
81 add.flags |= XEN_PCI_DEV_PXM; 86 add->flags |= XEN_PCI_DEV_PXM;
82 break; 87 break;
83 } 88 }
84 status = acpi_get_parent(handle, &handle); 89 status = acpi_get_parent(handle, &handle);
@@ -86,7 +91,7 @@ static int xen_add_device(struct device *dev)
86 } 91 }
87#endif /* CONFIG_ACPI */ 92#endif /* CONFIG_ACPI */
88 93
89 r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); 94 r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, add);
90 if (r != -ENOSYS) 95 if (r != -ENOSYS)
91 return r; 96 return r;
92 pci_seg_supported = false; 97 pci_seg_supported = false;
diff --git a/fs/Kconfig b/fs/Kconfig
index db5dc1598716..664991afe0c0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -67,6 +67,7 @@ source "fs/quota/Kconfig"
67 67
68source "fs/autofs4/Kconfig" 68source "fs/autofs4/Kconfig"
69source "fs/fuse/Kconfig" 69source "fs/fuse/Kconfig"
70source "fs/overlayfs/Kconfig"
70 71
71menu "Caches" 72menu "Caches"
72 73
diff --git a/fs/Makefile b/fs/Makefile
index 90c88529892b..34a1b9dea6dd 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/
104obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 104obj-$(CONFIG_AUTOFS4_FS) += autofs4/
105obj-$(CONFIG_ADFS_FS) += adfs/ 105obj-$(CONFIG_ADFS_FS) += adfs/
106obj-$(CONFIG_FUSE_FS) += fuse/ 106obj-$(CONFIG_FUSE_FS) += fuse/
107obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
107obj-$(CONFIG_UDF_FS) += udf/ 108obj-$(CONFIG_UDF_FS) += udf/
108obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 109obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
109obj-$(CONFIG_OMFS_FS) += omfs/ 110obj-$(CONFIG_OMFS_FS) += omfs/
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d2b76e29d3b..4399f0c3a4ce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -765,23 +765,6 @@ out:
765 return ret; 765 return ret;
766} 766}
767 767
768/* copy of check_sticky in fs/namei.c()
769* It's inline, so penalty for filesystems that don't use sticky bit is
770* minimal.
771*/
772static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
773{
774 kuid_t fsuid = current_fsuid();
775
776 if (!(dir->i_mode & S_ISVTX))
777 return 0;
778 if (uid_eq(inode->i_uid, fsuid))
779 return 0;
780 if (uid_eq(dir->i_uid, fsuid))
781 return 0;
782 return !capable(CAP_FOWNER);
783}
784
785/* copy of may_delete in fs/namei.c() 768/* copy of may_delete in fs/namei.c()
786 * Check whether we can remove a link victim from directory dir, check 769 * Check whether we can remove a link victim from directory dir, check
787 * whether the type of victim is right. 770 * whether the type of victim is right.
@@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
817 return error; 800 return error;
818 if (IS_APPEND(dir)) 801 if (IS_APPEND(dir))
819 return -EPERM; 802 return -EPERM;
820 if (btrfs_check_sticky(dir, victim->d_inode)|| 803 if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
821 IS_APPEND(victim->d_inode)||
822 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 804 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
823 return -EPERM; 805 return -EPERM;
824 if (isdir) { 806 if (isdir) {
diff --git a/fs/buffer.c b/fs/buffer.c
index 9614adc7e754..6c48f20eddd4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
993 */ 993 */
994static int 994static int
995grow_dev_page(struct block_device *bdev, sector_t block, 995grow_dev_page(struct block_device *bdev, sector_t block,
996 pgoff_t index, int size, int sizebits) 996 pgoff_t index, int size, int sizebits, gfp_t gfp)
997{ 997{
998 struct inode *inode = bdev->bd_inode; 998 struct inode *inode = bdev->bd_inode;
999 struct page *page; 999 struct page *page;
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
1002 int ret = 0; /* Will call free_more_memory() */ 1002 int ret = 0; /* Will call free_more_memory() */
1003 gfp_t gfp_mask; 1003 gfp_t gfp_mask;
1004 1004
1005 gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; 1005 gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
1006 gfp_mask |= __GFP_MOVABLE; 1006
1007 /* 1007 /*
1008 * XXX: __getblk_slow() can not really deal with failure and 1008 * XXX: __getblk_slow() can not really deal with failure and
1009 * will endlessly loop on improvised global reclaim. Prefer 1009 * will endlessly loop on improvised global reclaim. Prefer
@@ -1060,7 +1060,7 @@ failed:
1060 * that page was dirty, the buffers are set dirty also. 1060 * that page was dirty, the buffers are set dirty also.
1061 */ 1061 */
1062static int 1062static int
1063grow_buffers(struct block_device *bdev, sector_t block, int size) 1063grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
1064{ 1064{
1065 pgoff_t index; 1065 pgoff_t index;
1066 int sizebits; 1066 int sizebits;
@@ -1087,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1087 } 1087 }
1088 1088
1089 /* Create a page with the proper size buffers.. */ 1089 /* Create a page with the proper size buffers.. */
1090 return grow_dev_page(bdev, block, index, size, sizebits); 1090 return grow_dev_page(bdev, block, index, size, sizebits, gfp);
1091} 1091}
1092 1092
1093static struct buffer_head * 1093struct buffer_head *
1094__getblk_slow(struct block_device *bdev, sector_t block, int size) 1094__getblk_slow(struct block_device *bdev, sector_t block,
1095 unsigned size, gfp_t gfp)
1095{ 1096{
1096 /* Size must be multiple of hard sectorsize */ 1097 /* Size must be multiple of hard sectorsize */
1097 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1098 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1113,13 +1114,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1113 if (bh) 1114 if (bh)
1114 return bh; 1115 return bh;
1115 1116
1116 ret = grow_buffers(bdev, block, size); 1117 ret = grow_buffers(bdev, block, size, gfp);
1117 if (ret < 0) 1118 if (ret < 0)
1118 return NULL; 1119 return NULL;
1119 if (ret == 0) 1120 if (ret == 0)
1120 free_more_memory(); 1121 free_more_memory();
1121 } 1122 }
1122} 1123}
1124EXPORT_SYMBOL(__getblk_slow);
1123 1125
1124/* 1126/*
1125 * The relationship between dirty buffers and dirty pages: 1127 * The relationship between dirty buffers and dirty pages:
@@ -1373,24 +1375,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
1373EXPORT_SYMBOL(__find_get_block); 1375EXPORT_SYMBOL(__find_get_block);
1374 1376
1375/* 1377/*
1376 * __getblk will locate (and, if necessary, create) the buffer_head 1378 * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
1377 * which corresponds to the passed block_device, block and size. The 1379 * which corresponds to the passed block_device, block and size. The
1378 * returned buffer has its reference count incremented. 1380 * returned buffer has its reference count incremented.
1379 * 1381 *
1380 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() 1382 * __getblk_gfp() will lock up the machine if grow_dev_page's
1381 * attempt is failing. FIXME, perhaps? 1383 * try_to_free_buffers() attempt is failing. FIXME, perhaps?
1382 */ 1384 */
1383struct buffer_head * 1385struct buffer_head *
1384__getblk(struct block_device *bdev, sector_t block, unsigned size) 1386__getblk_gfp(struct block_device *bdev, sector_t block,
1387 unsigned size, gfp_t gfp)
1385{ 1388{
1386 struct buffer_head *bh = __find_get_block(bdev, block, size); 1389 struct buffer_head *bh = __find_get_block(bdev, block, size);
1387 1390
1388 might_sleep(); 1391 might_sleep();
1389 if (bh == NULL) 1392 if (bh == NULL)
1390 bh = __getblk_slow(bdev, block, size); 1393 bh = __getblk_slow(bdev, block, size, gfp);
1391 return bh; 1394 return bh;
1392} 1395}
1393EXPORT_SYMBOL(__getblk); 1396EXPORT_SYMBOL(__getblk_gfp);
1394 1397
1395/* 1398/*
1396 * Do async read-ahead on a buffer.. 1399 * Do async read-ahead on a buffer..
@@ -1406,24 +1409,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
1406EXPORT_SYMBOL(__breadahead); 1409EXPORT_SYMBOL(__breadahead);
1407 1410
1408/** 1411/**
1409 * __bread() - reads a specified block and returns the bh 1412 * __bread_gfp() - reads a specified block and returns the bh
1410 * @bdev: the block_device to read from 1413 * @bdev: the block_device to read from
1411 * @block: number of block 1414 * @block: number of block
1412 * @size: size (in bytes) to read 1415 * @size: size (in bytes) to read
1413 * 1416 * @gfp: page allocation flag
1417 *
1414 * Reads a specified block, and returns buffer head that contains it. 1418 * Reads a specified block, and returns buffer head that contains it.
1419 * The page cache can be allocated from non-movable area
1420 * not to prevent page migration if you set gfp to zero.
1415 * It returns NULL if the block was unreadable. 1421 * It returns NULL if the block was unreadable.
1416 */ 1422 */
1417struct buffer_head * 1423struct buffer_head *
1418__bread(struct block_device *bdev, sector_t block, unsigned size) 1424__bread_gfp(struct block_device *bdev, sector_t block,
1425 unsigned size, gfp_t gfp)
1419{ 1426{
1420 struct buffer_head *bh = __getblk(bdev, block, size); 1427 struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
1421 1428
1422 if (likely(bh) && !buffer_uptodate(bh)) 1429 if (likely(bh) && !buffer_uptodate(bh))
1423 bh = __bread_slow(bh); 1430 bh = __bread_slow(bh);
1424 return bh; 1431 return bh;
1425} 1432}
1426EXPORT_SYMBOL(__bread); 1433EXPORT_SYMBOL(__bread_gfp);
1427 1434
1428/* 1435/*
1429 * invalidate_bh_lrus() is called rarely - but not only at unmount. 1436 * invalidate_bh_lrus() is called rarely - but not only at unmount.
@@ -2082,6 +2089,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2082 struct page *page, void *fsdata) 2089 struct page *page, void *fsdata)
2083{ 2090{
2084 struct inode *inode = mapping->host; 2091 struct inode *inode = mapping->host;
2092 loff_t old_size = inode->i_size;
2085 int i_size_changed = 0; 2093 int i_size_changed = 0;
2086 2094
2087 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 2095 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2101,6 +2109,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2101 unlock_page(page); 2109 unlock_page(page);
2102 page_cache_release(page); 2110 page_cache_release(page);
2103 2111
2112 if (old_size < pos)
2113 pagecache_isize_extended(inode, old_size, pos);
2104 /* 2114 /*
2105 * Don't mark the inode dirty under page lock. First, it unnecessarily 2115 * Don't mark the inode dirty under page lock. First, it unnecessarily
2106 * makes the holding time of page lock longer. Second, it forces lock 2116 * makes the holding time of page lock longer. Second, it forces lock
diff --git a/fs/dcache.c b/fs/dcache.c
index d5a23fd0da90..3ffef7f4e5cd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2673,11 +2673,13 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2673 if (!IS_ROOT(new)) { 2673 if (!IS_ROOT(new)) {
2674 spin_unlock(&inode->i_lock); 2674 spin_unlock(&inode->i_lock);
2675 dput(new); 2675 dput(new);
2676 iput(inode);
2676 return ERR_PTR(-EIO); 2677 return ERR_PTR(-EIO);
2677 } 2678 }
2678 if (d_ancestor(new, dentry)) { 2679 if (d_ancestor(new, dentry)) {
2679 spin_unlock(&inode->i_lock); 2680 spin_unlock(&inode->i_lock);
2680 dput(new); 2681 dput(new);
2682 iput(inode);
2681 return ERR_PTR(-EIO); 2683 return ERR_PTR(-EIO);
2682 } 2684 }
2683 write_seqlock(&rename_lock); 2685 write_seqlock(&rename_lock);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1b119d3bf924..c4cd1fd86cc2 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
566 s->s_maxbytes = path.dentry->d_sb->s_maxbytes; 566 s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
567 s->s_blocksize = path.dentry->d_sb->s_blocksize; 567 s->s_blocksize = path.dentry->d_sb->s_blocksize;
568 s->s_magic = ECRYPTFS_SUPER_MAGIC; 568 s->s_magic = ECRYPTFS_SUPER_MAGIC;
569 s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
570
571 rc = -EINVAL;
572 if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
573 pr_err("eCryptfs: maximum fs stacking depth exceeded\n");
574 goto out_free;
575 }
569 576
570 inode = ecryptfs_get_inode(path.dentry->d_inode, s); 577 inode = ecryptfs_get_inode(path.dentry->d_inode, s);
571 rc = PTR_ERR(inode); 578 rc = PTR_ERR(inode);
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 389ba8312d5d..b47c7b8dc275 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -4,7 +4,7 @@
4# Copyright (C) 2008 Panasas Inc. All rights reserved. 4# Copyright (C) 2008 Panasas Inc. All rights reserved.
5# 5#
6# Authors: 6# Authors:
7# Boaz Harrosh <bharrosh@panasas.com> 7# Boaz Harrosh <ooo@electrozaur.com>
8# 8#
9# This program is free software; you can redistribute it and/or modify 9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License version 2 10# it under the terms of the GNU General Public License version 2
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 3bbd46956d77..7d88ef566213 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2005, 2006 4 * Copyright (C) 2005, 2006
5 * Avishay Traeger (avishay@gmail.com) 5 * Avishay Traeger (avishay@gmail.com)
6 * Copyright (C) 2008, 2009 6 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * 8 *
9 * Copyrights for code taken from ext2: 9 * Copyrights for code taken from ext2:
10 * Copyright (C) 1992, 1993, 1994, 1995 10 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 49f51ab4caac..d7defd557601 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index fffe86fd7a42..ad9cac670a47 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 71bf8e4fb5d4..1a376b42d305 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3f9cafd73931..f1d3d4eb8c4f 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4731fd991efe..28907460e8fa 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index cfc0205d62c4..7bd8ac8dfb28 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * This file is part of exofs. 7 * This file is part of exofs.
8 * 8 *
@@ -29,7 +29,7 @@
29 29
30#include "ore_raid.h" 30#include "ore_raid.h"
31 31
32MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); 32MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
33MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); 33MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
34MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
35 35
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 84529b8a331b..27cbdb697649 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2011 2 * Copyright (C) 2011
3 * Boaz Harrosh <bharrosh@panasas.com> 3 * Boaz Harrosh <ooo@electrozaur.com>
4 * 4 *
5 * This file is part of the objects raid engine (ore). 5 * This file is part of the objects raid engine (ore).
6 * 6 *
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index cf6375d82129..a6e746775570 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) from 2011 2 * Copyright (C) from 2011
3 * Boaz Harrosh <bharrosh@panasas.com> 3 * Boaz Harrosh <ooo@electrozaur.com>
4 * 4 *
5 * This file is part of the objects raid engine (ore). 5 * This file is part of the objects raid engine (ore).
6 * 6 *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index ed73ed8ebbee..95965503afcb 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 4dd687c3e747..832e2624b80b 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -2,7 +2,7 @@
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <ooo@electrozaur.com>
6 * 6 *
7 * Copyrights for code taken from ext2: 7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995 8 * Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 1b4f2f95fc37..5e6a2c0a1f0b 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (C) 2012 2 * Copyright (C) 2012
3 * Sachin Bhamare <sbhamare@panasas.com> 3 * Sachin Bhamare <sbhamare@panasas.com>
4 * Boaz Harrosh <bharrosh@panasas.com> 4 * Boaz Harrosh <ooo@electrozaur.com>
5 * 5 *
6 * This file is part of exofs. 6 * This file is part of exofs.
7 * 7 *
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 581ef40fbe90..83a6f497c4e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
176} 176}
177 177
178/* Initializes an uninitialized block bitmap */ 178/* Initializes an uninitialized block bitmap */
179static void ext4_init_block_bitmap(struct super_block *sb, 179static int ext4_init_block_bitmap(struct super_block *sb,
180 struct buffer_head *bh, 180 struct buffer_head *bh,
181 ext4_group_t block_group, 181 ext4_group_t block_group,
182 struct ext4_group_desc *gdp) 182 struct ext4_group_desc *gdp)
@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb,
192 /* If checksum is bad mark all blocks used to prevent allocation 192 /* If checksum is bad mark all blocks used to prevent allocation
193 * essentially implementing a per-group read-only flag. */ 193 * essentially implementing a per-group read-only flag. */
194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
195 ext4_error(sb, "Checksum bad for group %u", block_group);
196 grp = ext4_get_group_info(sb, block_group); 195 grp = ext4_get_group_info(sb, block_group);
197 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) 196 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
198 percpu_counter_sub(&sbi->s_freeclusters_counter, 197 percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
205 count); 204 count);
206 } 205 }
207 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 206 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
208 return; 207 return -EIO;
209 } 208 }
210 memset(bh->b_data, 0, sb->s_blocksize); 209 memset(bh->b_data, 0, sb->s_blocksize);
211 210
@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
243 sb->s_blocksize * 8, bh->b_data); 242 sb->s_blocksize * 8, bh->b_data);
244 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); 243 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
245 ext4_group_desc_csum_set(sb, block_group, gdp); 244 ext4_group_desc_csum_set(sb, block_group, gdp);
245 return 0;
246} 246}
247 247
248/* Return the number of free blocks in a block group. It is used when 248/* Return the number of free blocks in a block group. It is used when
@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
438 } 438 }
439 ext4_lock_group(sb, block_group); 439 ext4_lock_group(sb, block_group);
440 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 440 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
441 ext4_init_block_bitmap(sb, bh, block_group, desc); 441 int err;
442
443 err = ext4_init_block_bitmap(sb, bh, block_group, desc);
442 set_bitmap_uptodate(bh); 444 set_bitmap_uptodate(bh);
443 set_buffer_uptodate(bh); 445 set_buffer_uptodate(bh);
444 ext4_unlock_group(sb, block_group); 446 ext4_unlock_group(sb, block_group);
445 unlock_buffer(bh); 447 unlock_buffer(bh);
448 if (err)
449 ext4_error(sb, "Checksum bad for grp %u", block_group);
446 return bh; 450 return bh;
447 } 451 }
448 ext4_unlock_group(sb, block_group); 452 ext4_unlock_group(sb, block_group);
@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
636 * Account for the allocated meta blocks. We will never 640 * Account for the allocated meta blocks. We will never
637 * fail EDQUOT for metdata, but we do account for it. 641 * fail EDQUOT for metdata, but we do account for it.
638 */ 642 */
639 if (!(*errp) && 643 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
640 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
641 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 644 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
642 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 645 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
643 dquot_alloc_block_nofail(inode, 646 dquot_alloc_block_nofail(inode,
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 3285aa5a706a..b610779a958c 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
24 __u32 provided, calculated; 24 __u32 provided, calculated;
25 struct ext4_sb_info *sbi = EXT4_SB(sb); 25 struct ext4_sb_info *sbi = EXT4_SB(sb);
26 26
27 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 27 if (!ext4_has_metadata_csum(sb))
28 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
29 return 1; 28 return 1;
30 29
31 provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); 30 provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
46 __u32 csum; 45 __u32 csum;
47 struct ext4_sb_info *sbi = EXT4_SB(sb); 46 struct ext4_sb_info *sbi = EXT4_SB(sb);
48 47
49 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 48 if (!ext4_has_metadata_csum(sb))
50 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
51 return; 49 return;
52 50
53 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 51 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
65 struct ext4_sb_info *sbi = EXT4_SB(sb); 63 struct ext4_sb_info *sbi = EXT4_SB(sb);
66 int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; 64 int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
67 65
68 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 66 if (!ext4_has_metadata_csum(sb))
69 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
70 return 1; 67 return 1;
71 68
72 provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); 69 provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
91 __u32 csum; 88 __u32 csum;
92 struct ext4_sb_info *sbi = EXT4_SB(sb); 89 struct ext4_sb_info *sbi = EXT4_SB(sb);
93 90
94 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 91 if (!ext4_has_metadata_csum(sb))
95 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
96 return; 92 return;
97 93
98 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 94 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0bb3f9ea0832..c24143ea9c08 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
151 &file->f_ra, file, 151 &file->f_ra, file,
152 index, 1); 152 index, 1);
153 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 153 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
154 bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); 154 bh = ext4_bread(NULL, inode, map.m_lblk, 0);
155 if (IS_ERR(bh))
156 return PTR_ERR(bh);
155 } 157 }
156 158
157 /*
158 * We ignore I/O errors on directories so users have a chance
159 * of recovering data when there's a bad sector
160 */
161 if (!bh) { 159 if (!bh) {
162 if (!dir_has_error) { 160 if (!dir_has_error) {
163 EXT4_ERROR_FILE(file, 0, 161 EXT4_ERROR_FILE(file, 0,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0c225cdb52c..c55a1faaed58 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -572,15 +572,15 @@ enum {
572 572
573/* 573/*
574 * The bit position of these flags must not overlap with any of the 574 * The bit position of these flags must not overlap with any of the
575 * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(), 575 * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(),
576 * read_extent_tree_block(), ext4_split_extent_at(), 576 * read_extent_tree_block(), ext4_split_extent_at(),
577 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). 577 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
578 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be 578 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
579 * caching the extents when reading from the extent tree while a 579 * caching the extents when reading from the extent tree while a
580 * truncate or punch hole operation is in progress. 580 * truncate or punch hole operation is in progress.
581 */ 581 */
582#define EXT4_EX_NOCACHE 0x0400 582#define EXT4_EX_NOCACHE 0x40000000
583#define EXT4_EX_FORCE_CACHE 0x0800 583#define EXT4_EX_FORCE_CACHE 0x20000000
584 584
585/* 585/*
586 * Flags used by ext4_free_blocks 586 * Flags used by ext4_free_blocks
@@ -890,6 +890,7 @@ struct ext4_inode_info {
890 struct ext4_es_tree i_es_tree; 890 struct ext4_es_tree i_es_tree;
891 rwlock_t i_es_lock; 891 rwlock_t i_es_lock;
892 struct list_head i_es_lru; 892 struct list_head i_es_lru;
893 unsigned int i_es_all_nr; /* protected by i_es_lock */
893 unsigned int i_es_lru_nr; /* protected by i_es_lock */ 894 unsigned int i_es_lru_nr; /* protected by i_es_lock */
894 unsigned long i_touch_when; /* jiffies of last accessing */ 895 unsigned long i_touch_when; /* jiffies of last accessing */
895 896
@@ -1174,6 +1175,9 @@ struct ext4_super_block {
1174#define EXT4_MF_MNTDIR_SAMPLED 0x0001 1175#define EXT4_MF_MNTDIR_SAMPLED 0x0001
1175#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ 1176#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
1176 1177
1178/* Number of quota types we support */
1179#define EXT4_MAXQUOTAS 2
1180
1177/* 1181/*
1178 * fourth extended-fs super-block data in memory 1182 * fourth extended-fs super-block data in memory
1179 */ 1183 */
@@ -1237,7 +1241,7 @@ struct ext4_sb_info {
1237 u32 s_min_batch_time; 1241 u32 s_min_batch_time;
1238 struct block_device *journal_bdev; 1242 struct block_device *journal_bdev;
1239#ifdef CONFIG_QUOTA 1243#ifdef CONFIG_QUOTA
1240 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 1244 char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
1241 int s_jquota_fmt; /* Format of quota to use */ 1245 int s_jquota_fmt; /* Format of quota to use */
1242#endif 1246#endif
1243 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ 1247 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
@@ -1330,8 +1334,7 @@ struct ext4_sb_info {
1330 /* Reclaim extents from extent status tree */ 1334 /* Reclaim extents from extent status tree */
1331 struct shrinker s_es_shrinker; 1335 struct shrinker s_es_shrinker;
1332 struct list_head s_es_lru; 1336 struct list_head s_es_lru;
1333 unsigned long s_es_last_sorted; 1337 struct ext4_es_stats s_es_stats;
1334 struct percpu_counter s_extent_cache_cnt;
1335 struct mb_cache *s_mb_cache; 1338 struct mb_cache *s_mb_cache;
1336 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1339 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1337 1340
@@ -1399,7 +1402,6 @@ enum {
1399 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ 1402 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1400 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1403 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1401 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1404 EXT4_STATE_NEWENTRY, /* File just added to dir */
1402 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1403 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read 1405 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
1404 nolocking */ 1406 nolocking */
1405 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1407 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
@@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2086extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2088extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
2087 2089
2088/* inode.c */ 2090/* inode.c */
2089struct buffer_head *ext4_getblk(handle_t *, struct inode *, 2091struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
2090 ext4_lblk_t, int, int *); 2092struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
2091struct buffer_head *ext4_bread(handle_t *, struct inode *,
2092 ext4_lblk_t, int, int *);
2093int ext4_get_block_write(struct inode *inode, sector_t iblock, 2093int ext4_get_block_write(struct inode *inode, sector_t iblock,
2094 struct buffer_head *bh_result, int create); 2094 struct buffer_head *bh_result, int create);
2095int ext4_get_block(struct inode *inode, sector_t iblock, 2095int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle,
2109#define CONVERT_INLINE_DATA 2 2109#define CONVERT_INLINE_DATA 2
2110 2110
2111extern struct inode *ext4_iget(struct super_block *, unsigned long); 2111extern struct inode *ext4_iget(struct super_block *, unsigned long);
2112extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
2112extern int ext4_write_inode(struct inode *, struct writeback_control *); 2113extern int ext4_write_inode(struct inode *, struct writeback_control *);
2113extern int ext4_setattr(struct dentry *, struct iattr *); 2114extern int ext4_setattr(struct dentry *, struct iattr *);
2114extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 2115extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb,
2332static inline int ext4_has_group_desc_csum(struct super_block *sb) 2333static inline int ext4_has_group_desc_csum(struct super_block *sb)
2333{ 2334{
2334 return EXT4_HAS_RO_COMPAT_FEATURE(sb, 2335 return EXT4_HAS_RO_COMPAT_FEATURE(sb,
2335 EXT4_FEATURE_RO_COMPAT_GDT_CSUM | 2336 EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
2336 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); 2337 (EXT4_SB(sb)->s_chksum_driver != NULL);
2337} 2338}
2338 2339
2340static inline int ext4_has_metadata_csum(struct super_block *sb)
2341{
2342 WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
2343 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
2344 !EXT4_SB(sb)->s_chksum_driver);
2345
2346 return (EXT4_SB(sb)->s_chksum_driver != NULL);
2347}
2339static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2348static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2340{ 2349{
2341 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | 2350 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2731,21 +2740,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode,
2731 struct ext4_extent *ex1, 2740 struct ext4_extent *ex1,
2732 struct ext4_extent *ex2); 2741 struct ext4_extent *ex2);
2733extern int ext4_ext_insert_extent(handle_t *, struct inode *, 2742extern int ext4_ext_insert_extent(handle_t *, struct inode *,
2734 struct ext4_ext_path *, 2743 struct ext4_ext_path **,
2735 struct ext4_extent *, int); 2744 struct ext4_extent *, int);
2736extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 2745extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
2737 struct ext4_ext_path *, 2746 struct ext4_ext_path **,
2738 int flags); 2747 int flags);
2739extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2748extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2740extern int ext4_ext_check_inode(struct inode *inode); 2749extern int ext4_ext_check_inode(struct inode *inode);
2741extern int ext4_find_delalloc_range(struct inode *inode, 2750extern int ext4_find_delalloc_range(struct inode *inode,
2742 ext4_lblk_t lblk_start, 2751 ext4_lblk_t lblk_start,
2743 ext4_lblk_t lblk_end); 2752 ext4_lblk_t lblk_end);
2744extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2753extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2754extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
2745extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2755extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2746 __u64 start, __u64 len); 2756 __u64 start, __u64 len);
2747extern int ext4_ext_precache(struct inode *inode); 2757extern int ext4_ext_precache(struct inode *inode);
2748extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); 2758extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2759extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
2760 struct inode *inode2, ext4_lblk_t lblk1,
2761 ext4_lblk_t lblk2, ext4_lblk_t count,
2762 int mark_unwritten,int *err);
2749 2763
2750/* move_extent.c */ 2764/* move_extent.c */
2751extern void ext4_double_down_write_data_sem(struct inode *first, 2765extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2755extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2769extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2756 __u64 start_orig, __u64 start_donor, 2770 __u64 start_orig, __u64 start_donor,
2757 __u64 len, __u64 *moved_len); 2771 __u64 len, __u64 *moved_len);
2758extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
2759 struct ext4_extent **extent);
2760 2772
2761/* page-io.c */ 2773/* page-io.c */
2762extern int __init ext4_init_pageio(void); 2774extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a867f5ca9991..3c9381547094 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh)
123struct ext4_ext_path { 123struct ext4_ext_path {
124 ext4_fsblk_t p_block; 124 ext4_fsblk_t p_block;
125 __u16 p_depth; 125 __u16 p_depth;
126 __u16 p_maxdepth;
126 struct ext4_extent *p_ext; 127 struct ext4_extent *p_ext;
127 struct ext4_extent_idx *p_idx; 128 struct ext4_extent_idx *p_idx;
128 struct ext4_extent_header *p_hdr; 129 struct ext4_extent_header *p_hdr;
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0074e0d23d6e..3445035c7e01 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
256 set_buffer_prio(bh); 256 set_buffer_prio(bh);
257 if (ext4_handle_valid(handle)) { 257 if (ext4_handle_valid(handle)) {
258 err = jbd2_journal_dirty_metadata(handle, bh); 258 err = jbd2_journal_dirty_metadata(handle, bh);
259 /* Errors can only happen if there is a bug */ 259 /* Errors can only happen due to aborted journal or a nasty bug */
260 if (WARN_ON_ONCE(err)) { 260 if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
261 ext4_journal_abort_handle(where, line, __func__, bh, 261 ext4_journal_abort_handle(where, line, __func__, bh,
262 handle, err); 262 handle, err);
263 if (inode == NULL) { 263 if (inode == NULL) {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 17c00ff202f2..9c5b49fb281e 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -102,9 +102,9 @@
102#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 102#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
103#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 103#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
104#endif 104#endif
105#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) 105#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
106#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 106#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
107#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 107#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
108 108
109static inline int ext4_jbd2_credits_xattr(struct inode *inode) 109static inline int ext4_jbd2_credits_xattr(struct inode *inode)
110{ 110{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74292a71b384..37043d0b2be8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
73{ 73{
74 struct ext4_extent_tail *et; 74 struct ext4_extent_tail *et;
75 75
76 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 76 if (!ext4_has_metadata_csum(inode->i_sb))
77 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
78 return 1; 77 return 1;
79 78
80 et = find_ext4_extent_tail(eh); 79 et = find_ext4_extent_tail(eh);
@@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
88{ 87{
89 struct ext4_extent_tail *et; 88 struct ext4_extent_tail *et;
90 89
91 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 90 if (!ext4_has_metadata_csum(inode->i_sb))
92 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
93 return; 91 return;
94 92
95 et = find_ext4_extent_tail(eh); 93 et = find_ext4_extent_tail(eh);
@@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode,
98 96
99static int ext4_split_extent(handle_t *handle, 97static int ext4_split_extent(handle_t *handle,
100 struct inode *inode, 98 struct inode *inode,
101 struct ext4_ext_path *path, 99 struct ext4_ext_path **ppath,
102 struct ext4_map_blocks *map, 100 struct ext4_map_blocks *map,
103 int split_flag, 101 int split_flag,
104 int flags); 102 int flags);
105 103
106static int ext4_split_extent_at(handle_t *handle, 104static int ext4_split_extent_at(handle_t *handle,
107 struct inode *inode, 105 struct inode *inode,
108 struct ext4_ext_path *path, 106 struct ext4_ext_path **ppath,
109 ext4_lblk_t split, 107 ext4_lblk_t split,
110 int split_flag, 108 int split_flag,
111 int flags); 109 int flags);
@@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
291 return size; 289 return size;
292} 290}
293 291
292static inline int
293ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
294 struct ext4_ext_path **ppath, ext4_lblk_t lblk,
295 int nofail)
296{
297 struct ext4_ext_path *path = *ppath;
298 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
299
300 return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
301 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
302 EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
303 (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
304}
305
294/* 306/*
295 * Calculate the number of metadata blocks needed 307 * Calculate the number of metadata blocks needed
296 * to allocate @blocks 308 * to allocate @blocks
@@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
695 707
696void ext4_ext_drop_refs(struct ext4_ext_path *path) 708void ext4_ext_drop_refs(struct ext4_ext_path *path)
697{ 709{
698 int depth = path->p_depth; 710 int depth, i;
699 int i;
700 711
712 if (!path)
713 return;
714 depth = path->p_depth;
701 for (i = 0; i <= depth; i++, path++) 715 for (i = 0; i <= depth; i++, path++)
702 if (path->p_bh) { 716 if (path->p_bh) {
703 brelse(path->p_bh); 717 brelse(path->p_bh);
@@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
841} 855}
842 856
843struct ext4_ext_path * 857struct ext4_ext_path *
844ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 858ext4_find_extent(struct inode *inode, ext4_lblk_t block,
845 struct ext4_ext_path *path, int flags) 859 struct ext4_ext_path **orig_path, int flags)
846{ 860{
847 struct ext4_extent_header *eh; 861 struct ext4_extent_header *eh;
848 struct buffer_head *bh; 862 struct buffer_head *bh;
849 short int depth, i, ppos = 0, alloc = 0; 863 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
864 short int depth, i, ppos = 0;
850 int ret; 865 int ret;
851 866
852 eh = ext_inode_hdr(inode); 867 eh = ext_inode_hdr(inode);
853 depth = ext_depth(inode); 868 depth = ext_depth(inode);
854 869
855 /* account possible depth increase */ 870 if (path) {
871 ext4_ext_drop_refs(path);
872 if (depth > path[0].p_maxdepth) {
873 kfree(path);
874 *orig_path = path = NULL;
875 }
876 }
856 if (!path) { 877 if (!path) {
878 /* account possible depth increase */
857 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 879 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
858 GFP_NOFS); 880 GFP_NOFS);
859 if (!path) 881 if (unlikely(!path))
860 return ERR_PTR(-ENOMEM); 882 return ERR_PTR(-ENOMEM);
861 alloc = 1; 883 path[0].p_maxdepth = depth + 1;
862 } 884 }
863 path[0].p_hdr = eh; 885 path[0].p_hdr = eh;
864 path[0].p_bh = NULL; 886 path[0].p_bh = NULL;
@@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
876 898
877 bh = read_extent_tree_block(inode, path[ppos].p_block, --i, 899 bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
878 flags); 900 flags);
879 if (IS_ERR(bh)) { 901 if (unlikely(IS_ERR(bh))) {
880 ret = PTR_ERR(bh); 902 ret = PTR_ERR(bh);
881 goto err; 903 goto err;
882 } 904 }
@@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
910 932
911err: 933err:
912 ext4_ext_drop_refs(path); 934 ext4_ext_drop_refs(path);
913 if (alloc) 935 kfree(path);
914 kfree(path); 936 if (orig_path)
937 *orig_path = NULL;
915 return ERR_PTR(ret); 938 return ERR_PTR(ret);
916} 939}
917 940
@@ -1238,16 +1261,24 @@ cleanup:
1238 * just created block 1261 * just created block
1239 */ 1262 */
1240static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1263static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1241 unsigned int flags, 1264 unsigned int flags)
1242 struct ext4_extent *newext)
1243{ 1265{
1244 struct ext4_extent_header *neh; 1266 struct ext4_extent_header *neh;
1245 struct buffer_head *bh; 1267 struct buffer_head *bh;
1246 ext4_fsblk_t newblock; 1268 ext4_fsblk_t newblock, goal = 0;
1269 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1247 int err = 0; 1270 int err = 0;
1248 1271
1249 newblock = ext4_ext_new_meta_block(handle, inode, NULL, 1272 /* Try to prepend new index to old one */
1250 newext, &err, flags); 1273 if (ext_depth(inode))
1274 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1275 if (goal > le32_to_cpu(es->s_first_data_block)) {
1276 flags |= EXT4_MB_HINT_TRY_GOAL;
1277 goal--;
1278 } else
1279 goal = ext4_inode_to_goal_block(inode);
1280 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1281 NULL, &err);
1251 if (newblock == 0) 1282 if (newblock == 0)
1252 return err; 1283 return err;
1253 1284
@@ -1314,9 +1345,10 @@ out:
1314static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1345static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1315 unsigned int mb_flags, 1346 unsigned int mb_flags,
1316 unsigned int gb_flags, 1347 unsigned int gb_flags,
1317 struct ext4_ext_path *path, 1348 struct ext4_ext_path **ppath,
1318 struct ext4_extent *newext) 1349 struct ext4_extent *newext)
1319{ 1350{
1351 struct ext4_ext_path *path = *ppath;
1320 struct ext4_ext_path *curp; 1352 struct ext4_ext_path *curp;
1321 int depth, i, err = 0; 1353 int depth, i, err = 0;
1322 1354
@@ -1340,23 +1372,21 @@ repeat:
1340 goto out; 1372 goto out;
1341 1373
1342 /* refill path */ 1374 /* refill path */
1343 ext4_ext_drop_refs(path); 1375 path = ext4_find_extent(inode,
1344 path = ext4_ext_find_extent(inode,
1345 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1376 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1346 path, gb_flags); 1377 ppath, gb_flags);
1347 if (IS_ERR(path)) 1378 if (IS_ERR(path))
1348 err = PTR_ERR(path); 1379 err = PTR_ERR(path);
1349 } else { 1380 } else {
1350 /* tree is full, time to grow in depth */ 1381 /* tree is full, time to grow in depth */
1351 err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); 1382 err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1352 if (err) 1383 if (err)
1353 goto out; 1384 goto out;
1354 1385
1355 /* refill path */ 1386 /* refill path */
1356 ext4_ext_drop_refs(path); 1387 path = ext4_find_extent(inode,
1357 path = ext4_ext_find_extent(inode,
1358 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1388 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1359 path, gb_flags); 1389 ppath, gb_flags);
1360 if (IS_ERR(path)) { 1390 if (IS_ERR(path)) {
1361 err = PTR_ERR(path); 1391 err = PTR_ERR(path);
1362 goto out; 1392 goto out;
@@ -1559,7 +1589,7 @@ found_extent:
1559 * allocated block. Thus, index entries have to be consistent 1589 * allocated block. Thus, index entries have to be consistent
1560 * with leaves. 1590 * with leaves.
1561 */ 1591 */
1562static ext4_lblk_t 1592ext4_lblk_t
1563ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1593ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1564{ 1594{
1565 int depth; 1595 int depth;
@@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
1802 sizeof(struct ext4_extent_idx); 1832 sizeof(struct ext4_extent_idx);
1803 s += sizeof(struct ext4_extent_header); 1833 s += sizeof(struct ext4_extent_header);
1804 1834
1835 path[1].p_maxdepth = path[0].p_maxdepth;
1805 memcpy(path[0].p_hdr, path[1].p_hdr, s); 1836 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1806 path[0].p_depth = 0; 1837 path[0].p_depth = 0;
1807 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + 1838 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
@@ -1896,9 +1927,10 @@ out:
1896 * creating new leaf in the no-space case. 1927 * creating new leaf in the no-space case.
1897 */ 1928 */
1898int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1929int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1899 struct ext4_ext_path *path, 1930 struct ext4_ext_path **ppath,
1900 struct ext4_extent *newext, int gb_flags) 1931 struct ext4_extent *newext, int gb_flags)
1901{ 1932{
1933 struct ext4_ext_path *path = *ppath;
1902 struct ext4_extent_header *eh; 1934 struct ext4_extent_header *eh;
1903 struct ext4_extent *ex, *fex; 1935 struct ext4_extent *ex, *fex;
1904 struct ext4_extent *nearex; /* nearest extent */ 1936 struct ext4_extent *nearex; /* nearest extent */
@@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1907 ext4_lblk_t next; 1939 ext4_lblk_t next;
1908 int mb_flags = 0, unwritten; 1940 int mb_flags = 0, unwritten;
1909 1941
1942 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1943 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1910 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1944 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1911 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1945 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1912 return -EIO; 1946 return -EIO;
@@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1925 /* 1959 /*
1926 * Try to see whether we should rather test the extent on 1960 * Try to see whether we should rather test the extent on
1927 * right from ex, or from the left of ex. This is because 1961 * right from ex, or from the left of ex. This is because
1928 * ext4_ext_find_extent() can return either extent on the 1962 * ext4_find_extent() can return either extent on the
1929 * left, or on the right from the searched position. This 1963 * left, or on the right from the searched position. This
1930 * will make merging more effective. 1964 * will make merging more effective.
1931 */ 1965 */
@@ -2008,7 +2042,7 @@ prepend:
2008 if (next != EXT_MAX_BLOCKS) { 2042 if (next != EXT_MAX_BLOCKS) {
2009 ext_debug("next leaf block - %u\n", next); 2043 ext_debug("next leaf block - %u\n", next);
2010 BUG_ON(npath != NULL); 2044 BUG_ON(npath != NULL);
2011 npath = ext4_ext_find_extent(inode, next, NULL, 0); 2045 npath = ext4_find_extent(inode, next, NULL, 0);
2012 if (IS_ERR(npath)) 2046 if (IS_ERR(npath))
2013 return PTR_ERR(npath); 2047 return PTR_ERR(npath);
2014 BUG_ON(npath->p_depth != path->p_depth); 2048 BUG_ON(npath->p_depth != path->p_depth);
@@ -2028,9 +2062,9 @@ prepend:
2028 * We're gonna add a new leaf in the tree. 2062 * We're gonna add a new leaf in the tree.
2029 */ 2063 */
2030 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) 2064 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2031 mb_flags = EXT4_MB_USE_RESERVED; 2065 mb_flags |= EXT4_MB_USE_RESERVED;
2032 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, 2066 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2033 path, newext); 2067 ppath, newext);
2034 if (err) 2068 if (err)
2035 goto cleanup; 2069 goto cleanup;
2036 depth = ext_depth(inode); 2070 depth = ext_depth(inode);
@@ -2108,10 +2142,8 @@ merge:
2108 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 2142 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2109 2143
2110cleanup: 2144cleanup:
2111 if (npath) { 2145 ext4_ext_drop_refs(npath);
2112 ext4_ext_drop_refs(npath); 2146 kfree(npath);
2113 kfree(npath);
2114 }
2115 return err; 2147 return err;
2116} 2148}
2117 2149
@@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2133 /* find extent for this block */ 2165 /* find extent for this block */
2134 down_read(&EXT4_I(inode)->i_data_sem); 2166 down_read(&EXT4_I(inode)->i_data_sem);
2135 2167
2136 if (path && ext_depth(inode) != depth) { 2168 path = ext4_find_extent(inode, block, &path, 0);
2137 /* depth was changed. we have to realloc path */
2138 kfree(path);
2139 path = NULL;
2140 }
2141
2142 path = ext4_ext_find_extent(inode, block, path, 0);
2143 if (IS_ERR(path)) { 2169 if (IS_ERR(path)) {
2144 up_read(&EXT4_I(inode)->i_data_sem); 2170 up_read(&EXT4_I(inode)->i_data_sem);
2145 err = PTR_ERR(path); 2171 err = PTR_ERR(path);
@@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2156 } 2182 }
2157 ex = path[depth].p_ext; 2183 ex = path[depth].p_ext;
2158 next = ext4_ext_next_allocated_block(path); 2184 next = ext4_ext_next_allocated_block(path);
2159 ext4_ext_drop_refs(path);
2160 2185
2161 flags = 0; 2186 flags = 0;
2162 exists = 0; 2187 exists = 0;
@@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2266 block = es.es_lblk + es.es_len; 2291 block = es.es_lblk + es.es_len;
2267 } 2292 }
2268 2293
2269 if (path) { 2294 ext4_ext_drop_refs(path);
2270 ext4_ext_drop_refs(path); 2295 kfree(path);
2271 kfree(path);
2272 }
2273
2274 return err; 2296 return err;
2275} 2297}
2276 2298
@@ -2826,7 +2848,7 @@ again:
2826 ext4_lblk_t ee_block; 2848 ext4_lblk_t ee_block;
2827 2849
2828 /* find extent for this block */ 2850 /* find extent for this block */
2829 path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); 2851 path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
2830 if (IS_ERR(path)) { 2852 if (IS_ERR(path)) {
2831 ext4_journal_stop(handle); 2853 ext4_journal_stop(handle);
2832 return PTR_ERR(path); 2854 return PTR_ERR(path);
@@ -2854,24 +2876,14 @@ again:
2854 */ 2876 */
2855 if (end >= ee_block && 2877 if (end >= ee_block &&
2856 end < ee_block + ext4_ext_get_actual_len(ex) - 1) { 2878 end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
2857 int split_flag = 0;
2858
2859 if (ext4_ext_is_unwritten(ex))
2860 split_flag = EXT4_EXT_MARK_UNWRIT1 |
2861 EXT4_EXT_MARK_UNWRIT2;
2862
2863 /* 2879 /*
2864 * Split the extent in two so that 'end' is the last 2880 * Split the extent in two so that 'end' is the last
2865 * block in the first new extent. Also we should not 2881 * block in the first new extent. Also we should not
2866 * fail removing space due to ENOSPC so try to use 2882 * fail removing space due to ENOSPC so try to use
2867 * reserved block if that happens. 2883 * reserved block if that happens.
2868 */ 2884 */
2869 err = ext4_split_extent_at(handle, inode, path, 2885 err = ext4_force_split_extent_at(handle, inode, &path,
2870 end + 1, split_flag, 2886 end + 1, 1);
2871 EXT4_EX_NOCACHE |
2872 EXT4_GET_BLOCKS_PRE_IO |
2873 EXT4_GET_BLOCKS_METADATA_NOFAIL);
2874
2875 if (err < 0) 2887 if (err < 0)
2876 goto out; 2888 goto out;
2877 } 2889 }
@@ -2893,7 +2905,7 @@ again:
2893 ext4_journal_stop(handle); 2905 ext4_journal_stop(handle);
2894 return -ENOMEM; 2906 return -ENOMEM;
2895 } 2907 }
2896 path[0].p_depth = depth; 2908 path[0].p_maxdepth = path[0].p_depth = depth;
2897 path[0].p_hdr = ext_inode_hdr(inode); 2909 path[0].p_hdr = ext_inode_hdr(inode);
2898 i = 0; 2910 i = 0;
2899 2911
@@ -3013,10 +3025,9 @@ again:
3013out: 3025out:
3014 ext4_ext_drop_refs(path); 3026 ext4_ext_drop_refs(path);
3015 kfree(path); 3027 kfree(path);
3016 if (err == -EAGAIN) { 3028 path = NULL;
3017 path = NULL; 3029 if (err == -EAGAIN)
3018 goto again; 3030 goto again;
3019 }
3020 ext4_journal_stop(handle); 3031 ext4_journal_stop(handle);
3021 3032
3022 return err; 3033 return err;
@@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3130 */ 3141 */
3131static int ext4_split_extent_at(handle_t *handle, 3142static int ext4_split_extent_at(handle_t *handle,
3132 struct inode *inode, 3143 struct inode *inode,
3133 struct ext4_ext_path *path, 3144 struct ext4_ext_path **ppath,
3134 ext4_lblk_t split, 3145 ext4_lblk_t split,
3135 int split_flag, 3146 int split_flag,
3136 int flags) 3147 int flags)
3137{ 3148{
3149 struct ext4_ext_path *path = *ppath;
3138 ext4_fsblk_t newblock; 3150 ext4_fsblk_t newblock;
3139 ext4_lblk_t ee_block; 3151 ext4_lblk_t ee_block;
3140 struct ext4_extent *ex, newex, orig_ex, zero_ex; 3152 struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
3205 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 3217 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3206 ext4_ext_mark_unwritten(ex2); 3218 ext4_ext_mark_unwritten(ex2);
3207 3219
3208 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3220 err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
3209 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3221 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3210 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 3222 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3211 if (split_flag & EXT4_EXT_DATA_VALID1) { 3223 if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3271,11 +3283,12 @@ fix_extent_len:
3271 */ 3283 */
3272static int ext4_split_extent(handle_t *handle, 3284static int ext4_split_extent(handle_t *handle,
3273 struct inode *inode, 3285 struct inode *inode,
3274 struct ext4_ext_path *path, 3286 struct ext4_ext_path **ppath,
3275 struct ext4_map_blocks *map, 3287 struct ext4_map_blocks *map,
3276 int split_flag, 3288 int split_flag,
3277 int flags) 3289 int flags)
3278{ 3290{
3291 struct ext4_ext_path *path = *ppath;
3279 ext4_lblk_t ee_block; 3292 ext4_lblk_t ee_block;
3280 struct ext4_extent *ex; 3293 struct ext4_extent *ex;
3281 unsigned int ee_len, depth; 3294 unsigned int ee_len, depth;
@@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle,
3298 EXT4_EXT_MARK_UNWRIT2; 3311 EXT4_EXT_MARK_UNWRIT2;
3299 if (split_flag & EXT4_EXT_DATA_VALID2) 3312 if (split_flag & EXT4_EXT_DATA_VALID2)
3300 split_flag1 |= EXT4_EXT_DATA_VALID1; 3313 split_flag1 |= EXT4_EXT_DATA_VALID1;
3301 err = ext4_split_extent_at(handle, inode, path, 3314 err = ext4_split_extent_at(handle, inode, ppath,
3302 map->m_lblk + map->m_len, split_flag1, flags1); 3315 map->m_lblk + map->m_len, split_flag1, flags1);
3303 if (err) 3316 if (err)
3304 goto out; 3317 goto out;
@@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle,
3309 * Update path is required because previous ext4_split_extent_at() may 3322 * Update path is required because previous ext4_split_extent_at() may
3310 * result in split of original leaf or extent zeroout. 3323 * result in split of original leaf or extent zeroout.
3311 */ 3324 */
3312 ext4_ext_drop_refs(path); 3325 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3313 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3314 if (IS_ERR(path)) 3326 if (IS_ERR(path))
3315 return PTR_ERR(path); 3327 return PTR_ERR(path);
3316 depth = ext_depth(inode); 3328 depth = ext_depth(inode);
@@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle,
3330 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | 3342 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3331 EXT4_EXT_MARK_UNWRIT2); 3343 EXT4_EXT_MARK_UNWRIT2);
3332 } 3344 }
3333 err = ext4_split_extent_at(handle, inode, path, 3345 err = ext4_split_extent_at(handle, inode, ppath,
3334 map->m_lblk, split_flag1, flags); 3346 map->m_lblk, split_flag1, flags);
3335 if (err) 3347 if (err)
3336 goto out; 3348 goto out;
@@ -3364,9 +3376,10 @@ out:
3364static int ext4_ext_convert_to_initialized(handle_t *handle, 3376static int ext4_ext_convert_to_initialized(handle_t *handle,
3365 struct inode *inode, 3377 struct inode *inode,
3366 struct ext4_map_blocks *map, 3378 struct ext4_map_blocks *map,
3367 struct ext4_ext_path *path, 3379 struct ext4_ext_path **ppath,
3368 int flags) 3380 int flags)
3369{ 3381{
3382 struct ext4_ext_path *path = *ppath;
3370 struct ext4_sb_info *sbi; 3383 struct ext4_sb_info *sbi;
3371 struct ext4_extent_header *eh; 3384 struct ext4_extent_header *eh;
3372 struct ext4_map_blocks split_map; 3385 struct ext4_map_blocks split_map;
@@ -3590,7 +3603,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3590 } 3603 }
3591 } 3604 }
3592 3605
3593 allocated = ext4_split_extent(handle, inode, path, 3606 allocated = ext4_split_extent(handle, inode, ppath,
3594 &split_map, split_flag, flags); 3607 &split_map, split_flag, flags);
3595 if (allocated < 0) 3608 if (allocated < 0)
3596 err = allocated; 3609 err = allocated;
@@ -3629,9 +3642,10 @@ out:
3629static int ext4_split_convert_extents(handle_t *handle, 3642static int ext4_split_convert_extents(handle_t *handle,
3630 struct inode *inode, 3643 struct inode *inode,
3631 struct ext4_map_blocks *map, 3644 struct ext4_map_blocks *map,
3632 struct ext4_ext_path *path, 3645 struct ext4_ext_path **ppath,
3633 int flags) 3646 int flags)
3634{ 3647{
3648 struct ext4_ext_path *path = *ppath;
3635 ext4_lblk_t eof_block; 3649 ext4_lblk_t eof_block;
3636 ext4_lblk_t ee_block; 3650 ext4_lblk_t ee_block;
3637 struct ext4_extent *ex; 3651 struct ext4_extent *ex;
@@ -3665,74 +3679,15 @@ static int ext4_split_convert_extents(handle_t *handle,
3665 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); 3679 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3666 } 3680 }
3667 flags |= EXT4_GET_BLOCKS_PRE_IO; 3681 flags |= EXT4_GET_BLOCKS_PRE_IO;
3668 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3682 return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3669}
3670
3671static int ext4_convert_initialized_extents(handle_t *handle,
3672 struct inode *inode,
3673 struct ext4_map_blocks *map,
3674 struct ext4_ext_path *path)
3675{
3676 struct ext4_extent *ex;
3677 ext4_lblk_t ee_block;
3678 unsigned int ee_len;
3679 int depth;
3680 int err = 0;
3681
3682 depth = ext_depth(inode);
3683 ex = path[depth].p_ext;
3684 ee_block = le32_to_cpu(ex->ee_block);
3685 ee_len = ext4_ext_get_actual_len(ex);
3686
3687 ext_debug("%s: inode %lu, logical"
3688 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3689 (unsigned long long)ee_block, ee_len);
3690
3691 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3692 err = ext4_split_convert_extents(handle, inode, map, path,
3693 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3694 if (err < 0)
3695 goto out;
3696 ext4_ext_drop_refs(path);
3697 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3698 if (IS_ERR(path)) {
3699 err = PTR_ERR(path);
3700 goto out;
3701 }
3702 depth = ext_depth(inode);
3703 ex = path[depth].p_ext;
3704 if (!ex) {
3705 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3706 (unsigned long) map->m_lblk);
3707 err = -EIO;
3708 goto out;
3709 }
3710 }
3711
3712 err = ext4_ext_get_access(handle, inode, path + depth);
3713 if (err)
3714 goto out;
3715 /* first mark the extent as unwritten */
3716 ext4_ext_mark_unwritten(ex);
3717
3718 /* note: ext4_ext_correct_indexes() isn't needed here because
3719 * borders are not changed
3720 */
3721 ext4_ext_try_to_merge(handle, inode, path, ex);
3722
3723 /* Mark modified extent as dirty */
3724 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3725out:
3726 ext4_ext_show_leaf(inode, path);
3727 return err;
3728} 3683}
3729 3684
3730
3731static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3685static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3732 struct inode *inode, 3686 struct inode *inode,
3733 struct ext4_map_blocks *map, 3687 struct ext4_map_blocks *map,
3734 struct ext4_ext_path *path) 3688 struct ext4_ext_path **ppath)
3735{ 3689{
3690 struct ext4_ext_path *path = *ppath;
3736 struct ext4_extent *ex; 3691 struct ext4_extent *ex;
3737 ext4_lblk_t ee_block; 3692 ext4_lblk_t ee_block;
3738 unsigned int ee_len; 3693 unsigned int ee_len;
@@ -3761,16 +3716,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3761 inode->i_ino, (unsigned long long)ee_block, ee_len, 3716 inode->i_ino, (unsigned long long)ee_block, ee_len,
3762 (unsigned long long)map->m_lblk, map->m_len); 3717 (unsigned long long)map->m_lblk, map->m_len);
3763#endif 3718#endif
3764 err = ext4_split_convert_extents(handle, inode, map, path, 3719 err = ext4_split_convert_extents(handle, inode, map, ppath,
3765 EXT4_GET_BLOCKS_CONVERT); 3720 EXT4_GET_BLOCKS_CONVERT);
3766 if (err < 0) 3721 if (err < 0)
3767 goto out; 3722 return err;
3768 ext4_ext_drop_refs(path); 3723 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3769 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); 3724 if (IS_ERR(path))
3770 if (IS_ERR(path)) { 3725 return PTR_ERR(path);
3771 err = PTR_ERR(path);
3772 goto out;
3773 }
3774 depth = ext_depth(inode); 3726 depth = ext_depth(inode);
3775 ex = path[depth].p_ext; 3727 ex = path[depth].p_ext;
3776 } 3728 }
@@ -3963,12 +3915,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3963} 3915}
3964 3916
3965static int 3917static int
3966ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, 3918convert_initialized_extent(handle_t *handle, struct inode *inode,
3967 struct ext4_map_blocks *map, 3919 struct ext4_map_blocks *map,
3968 struct ext4_ext_path *path, int flags, 3920 struct ext4_ext_path **ppath, int flags,
3969 unsigned int allocated, ext4_fsblk_t newblock) 3921 unsigned int allocated, ext4_fsblk_t newblock)
3970{ 3922{
3971 int ret = 0; 3923 struct ext4_ext_path *path = *ppath;
3924 struct ext4_extent *ex;
3925 ext4_lblk_t ee_block;
3926 unsigned int ee_len;
3927 int depth;
3972 int err = 0; 3928 int err = 0;
3973 3929
3974 /* 3930 /*
@@ -3978,28 +3934,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3978 if (map->m_len > EXT_UNWRITTEN_MAX_LEN) 3934 if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3979 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; 3935 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3980 3936
3981 ret = ext4_convert_initialized_extents(handle, inode, map, 3937 depth = ext_depth(inode);
3982 path); 3938 ex = path[depth].p_ext;
3983 if (ret >= 0) { 3939 ee_block = le32_to_cpu(ex->ee_block);
3984 ext4_update_inode_fsync_trans(handle, inode, 1); 3940 ee_len = ext4_ext_get_actual_len(ex);
3985 err = check_eofblocks_fl(handle, inode, map->m_lblk, 3941
3986 path, map->m_len); 3942 ext_debug("%s: inode %lu, logical"
3987 } else 3943 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3988 err = ret; 3944 (unsigned long long)ee_block, ee_len);
3945
3946 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3947 err = ext4_split_convert_extents(handle, inode, map, ppath,
3948 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3949 if (err < 0)
3950 return err;
3951 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3952 if (IS_ERR(path))
3953 return PTR_ERR(path);
3954 depth = ext_depth(inode);
3955 ex = path[depth].p_ext;
3956 if (!ex) {
3957 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3958 (unsigned long) map->m_lblk);
3959 return -EIO;
3960 }
3961 }
3962
3963 err = ext4_ext_get_access(handle, inode, path + depth);
3964 if (err)
3965 return err;
3966 /* first mark the extent as unwritten */
3967 ext4_ext_mark_unwritten(ex);
3968
3969 /* note: ext4_ext_correct_indexes() isn't needed here because
3970 * borders are not changed
3971 */
3972 ext4_ext_try_to_merge(handle, inode, path, ex);
3973
3974 /* Mark modified extent as dirty */
3975 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3976 if (err)
3977 return err;
3978 ext4_ext_show_leaf(inode, path);
3979
3980 ext4_update_inode_fsync_trans(handle, inode, 1);
3981 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
3982 if (err)
3983 return err;
3989 map->m_flags |= EXT4_MAP_UNWRITTEN; 3984 map->m_flags |= EXT4_MAP_UNWRITTEN;
3990 if (allocated > map->m_len) 3985 if (allocated > map->m_len)
3991 allocated = map->m_len; 3986 allocated = map->m_len;
3992 map->m_len = allocated; 3987 map->m_len = allocated;
3993 3988 return allocated;
3994 return err ? err : allocated;
3995} 3989}
3996 3990
3997static int 3991static int
3998ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, 3992ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3999 struct ext4_map_blocks *map, 3993 struct ext4_map_blocks *map,
4000 struct ext4_ext_path *path, int flags, 3994 struct ext4_ext_path **ppath, int flags,
4001 unsigned int allocated, ext4_fsblk_t newblock) 3995 unsigned int allocated, ext4_fsblk_t newblock)
4002{ 3996{
3997 struct ext4_ext_path *path = *ppath;
4003 int ret = 0; 3998 int ret = 0;
4004 int err = 0; 3999 int err = 0;
4005 ext4_io_end_t *io = ext4_inode_aio(inode); 4000 ext4_io_end_t *io = ext4_inode_aio(inode);
@@ -4021,8 +4016,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4021 4016
4022 /* get_block() before submit the IO, split the extent */ 4017 /* get_block() before submit the IO, split the extent */
4023 if (flags & EXT4_GET_BLOCKS_PRE_IO) { 4018 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
4024 ret = ext4_split_convert_extents(handle, inode, map, 4019 ret = ext4_split_convert_extents(handle, inode, map, ppath,
4025 path, flags | EXT4_GET_BLOCKS_CONVERT); 4020 flags | EXT4_GET_BLOCKS_CONVERT);
4026 if (ret <= 0) 4021 if (ret <= 0)
4027 goto out; 4022 goto out;
4028 /* 4023 /*
@@ -4040,7 +4035,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4040 /* IO end_io complete, convert the filled extent to written */ 4035 /* IO end_io complete, convert the filled extent to written */
4041 if (flags & EXT4_GET_BLOCKS_CONVERT) { 4036 if (flags & EXT4_GET_BLOCKS_CONVERT) {
4042 ret = ext4_convert_unwritten_extents_endio(handle, inode, map, 4037 ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
4043 path); 4038 ppath);
4044 if (ret >= 0) { 4039 if (ret >= 0) {
4045 ext4_update_inode_fsync_trans(handle, inode, 1); 4040 ext4_update_inode_fsync_trans(handle, inode, 1);
4046 err = check_eofblocks_fl(handle, inode, map->m_lblk, 4041 err = check_eofblocks_fl(handle, inode, map->m_lblk,
@@ -4078,7 +4073,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4078 } 4073 }
4079 4074
4080 /* buffered write, writepage time, convert*/ 4075 /* buffered write, writepage time, convert*/
4081 ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); 4076 ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
4082 if (ret >= 0) 4077 if (ret >= 0)
4083 ext4_update_inode_fsync_trans(handle, inode, 1); 4078 ext4_update_inode_fsync_trans(handle, inode, 1);
4084out: 4079out:
@@ -4279,7 +4274,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4279 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 4274 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4280 4275
4281 /* find extent for this block */ 4276 /* find extent for this block */
4282 path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); 4277 path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4283 if (IS_ERR(path)) { 4278 if (IS_ERR(path)) {
4284 err = PTR_ERR(path); 4279 err = PTR_ERR(path);
4285 path = NULL; 4280 path = NULL;
@@ -4291,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4291 /* 4286 /*
4292 * consistent leaf must not be empty; 4287 * consistent leaf must not be empty;
4293 * this situation is possible, though, _during_ tree modification; 4288 * this situation is possible, though, _during_ tree modification;
4294 * this is why assert can't be put in ext4_ext_find_extent() 4289 * this is why assert can't be put in ext4_find_extent()
4295 */ 4290 */
4296 if (unlikely(path[depth].p_ext == NULL && depth != 0)) { 4291 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4297 EXT4_ERROR_INODE(inode, "bad extent address " 4292 EXT4_ERROR_INODE(inode, "bad extent address "
@@ -4331,15 +4326,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4331 */ 4326 */
4332 if ((!ext4_ext_is_unwritten(ex)) && 4327 if ((!ext4_ext_is_unwritten(ex)) &&
4333 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { 4328 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4334 allocated = ext4_ext_convert_initialized_extent( 4329 allocated = convert_initialized_extent(
4335 handle, inode, map, path, flags, 4330 handle, inode, map, &path,
4336 allocated, newblock); 4331 flags, allocated, newblock);
4337 goto out2; 4332 goto out2;
4338 } else if (!ext4_ext_is_unwritten(ex)) 4333 } else if (!ext4_ext_is_unwritten(ex))
4339 goto out; 4334 goto out;
4340 4335
4341 ret = ext4_ext_handle_unwritten_extents( 4336 ret = ext4_ext_handle_unwritten_extents(
4342 handle, inode, map, path, flags, 4337 handle, inode, map, &path, flags,
4343 allocated, newblock); 4338 allocated, newblock);
4344 if (ret < 0) 4339 if (ret < 0)
4345 err = ret; 4340 err = ret;
@@ -4376,7 +4371,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4376 4371
4377 /* 4372 /*
4378 * If we are doing bigalloc, check to see if the extent returned 4373 * If we are doing bigalloc, check to see if the extent returned
4379 * by ext4_ext_find_extent() implies a cluster we can use. 4374 * by ext4_find_extent() implies a cluster we can use.
4380 */ 4375 */
4381 if (cluster_offset && ex && 4376 if (cluster_offset && ex &&
4382 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { 4377 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
@@ -4451,6 +4446,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4451 ar.flags = 0; 4446 ar.flags = 0;
4452 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) 4447 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4453 ar.flags |= EXT4_MB_HINT_NOPREALLOC; 4448 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4449 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4450 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4454 newblock = ext4_mb_new_blocks(handle, &ar, &err); 4451 newblock = ext4_mb_new_blocks(handle, &ar, &err);
4455 if (!newblock) 4452 if (!newblock)
4456 goto out2; 4453 goto out2;
@@ -4486,7 +4483,7 @@ got_allocated_blocks:
4486 err = check_eofblocks_fl(handle, inode, map->m_lblk, 4483 err = check_eofblocks_fl(handle, inode, map->m_lblk,
4487 path, ar.len); 4484 path, ar.len);
4488 if (!err) 4485 if (!err)
4489 err = ext4_ext_insert_extent(handle, inode, path, 4486 err = ext4_ext_insert_extent(handle, inode, &path,
4490 &newex, flags); 4487 &newex, flags);
4491 4488
4492 if (!err && set_unwritten) { 4489 if (!err && set_unwritten) {
@@ -4619,10 +4616,8 @@ out:
4619 map->m_pblk = newblock; 4616 map->m_pblk = newblock;
4620 map->m_len = allocated; 4617 map->m_len = allocated;
4621out2: 4618out2:
4622 if (path) { 4619 ext4_ext_drop_refs(path);
4623 ext4_ext_drop_refs(path); 4620 kfree(path);
4624 kfree(path);
4625 }
4626 4621
4627 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4622 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4628 err ? err : allocated); 4623 err ? err : allocated);
@@ -4799,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4799 max_blocks -= lblk; 4794 max_blocks -= lblk;
4800 4795
4801 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | 4796 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
4802 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; 4797 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4798 EXT4_EX_NOCACHE;
4803 if (mode & FALLOC_FL_KEEP_SIZE) 4799 if (mode & FALLOC_FL_KEEP_SIZE)
4804 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4800 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4805 4801
@@ -4837,15 +4833,21 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4837 ext4_inode_block_unlocked_dio(inode); 4833 ext4_inode_block_unlocked_dio(inode);
4838 inode_dio_wait(inode); 4834 inode_dio_wait(inode);
4839 4835
4836 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4837 flags, mode);
4838 if (ret)
4839 goto out_dio;
4840 /* 4840 /*
4841 * Remove entire range from the extent status tree. 4841 * Remove entire range from the extent status tree.
4842 *
4843 * ext4_es_remove_extent(inode, lblk, max_blocks) is
4844 * NOT sufficient. I'm not sure why this is the case,
4845 * but let's be conservative and remove the extent
4846 * status tree for the entire inode. There should be
4847 * no outstanding delalloc extents thanks to the
4848 * filemap_write_and_wait_range() call above.
4842 */ 4849 */
4843 ret = ext4_es_remove_extent(inode, lblk, max_blocks); 4850 ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
4844 if (ret)
4845 goto out_dio;
4846
4847 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4848 flags, mode);
4849 if (ret) 4851 if (ret)
4850 goto out_dio; 4852 goto out_dio;
4851 } 4853 }
@@ -5304,36 +5306,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5304 struct ext4_ext_path *path; 5306 struct ext4_ext_path *path;
5305 int ret = 0, depth; 5307 int ret = 0, depth;
5306 struct ext4_extent *extent; 5308 struct ext4_extent *extent;
5307 ext4_lblk_t stop_block, current_block; 5309 ext4_lblk_t stop_block;
5308 ext4_lblk_t ex_start, ex_end; 5310 ext4_lblk_t ex_start, ex_end;
5309 5311
5310 /* Let path point to the last extent */ 5312 /* Let path point to the last extent */
5311 path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 5313 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
5312 if (IS_ERR(path)) 5314 if (IS_ERR(path))
5313 return PTR_ERR(path); 5315 return PTR_ERR(path);
5314 5316
5315 depth = path->p_depth; 5317 depth = path->p_depth;
5316 extent = path[depth].p_ext; 5318 extent = path[depth].p_ext;
5317 if (!extent) { 5319 if (!extent)
5318 ext4_ext_drop_refs(path); 5320 goto out;
5319 kfree(path);
5320 return ret;
5321 }
5322 5321
5323 stop_block = le32_to_cpu(extent->ee_block) + 5322 stop_block = le32_to_cpu(extent->ee_block) +
5324 ext4_ext_get_actual_len(extent); 5323 ext4_ext_get_actual_len(extent);
5325 ext4_ext_drop_refs(path);
5326 kfree(path);
5327 5324
5328 /* Nothing to shift, if hole is at the end of file */ 5325 /* Nothing to shift, if hole is at the end of file */
5329 if (start >= stop_block) 5326 if (start >= stop_block)
5330 return ret; 5327 goto out;
5331 5328
5332 /* 5329 /*
5333 * Don't start shifting extents until we make sure the hole is big 5330 * Don't start shifting extents until we make sure the hole is big
5334 * enough to accomodate the shift. 5331 * enough to accomodate the shift.
5335 */ 5332 */
5336 path = ext4_ext_find_extent(inode, start - 1, NULL, 0); 5333 path = ext4_find_extent(inode, start - 1, &path, 0);
5337 if (IS_ERR(path)) 5334 if (IS_ERR(path))
5338 return PTR_ERR(path); 5335 return PTR_ERR(path);
5339 depth = path->p_depth; 5336 depth = path->p_depth;
@@ -5346,8 +5343,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5346 ex_start = 0; 5343 ex_start = 0;
5347 ex_end = 0; 5344 ex_end = 0;
5348 } 5345 }
5349 ext4_ext_drop_refs(path);
5350 kfree(path);
5351 5346
5352 if ((start == ex_start && shift > ex_start) || 5347 if ((start == ex_start && shift > ex_start) ||
5353 (shift > start - ex_end)) 5348 (shift > start - ex_end))
@@ -5355,7 +5350,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5355 5350
5356 /* Its safe to start updating extents */ 5351 /* Its safe to start updating extents */
5357 while (start < stop_block) { 5352 while (start < stop_block) {
5358 path = ext4_ext_find_extent(inode, start, NULL, 0); 5353 path = ext4_find_extent(inode, start, &path, 0);
5359 if (IS_ERR(path)) 5354 if (IS_ERR(path))
5360 return PTR_ERR(path); 5355 return PTR_ERR(path);
5361 depth = path->p_depth; 5356 depth = path->p_depth;
@@ -5365,27 +5360,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5365 (unsigned long) start); 5360 (unsigned long) start);
5366 return -EIO; 5361 return -EIO;
5367 } 5362 }
5368 5363 if (start > le32_to_cpu(extent->ee_block)) {
5369 current_block = le32_to_cpu(extent->ee_block);
5370 if (start > current_block) {
5371 /* Hole, move to the next extent */ 5364 /* Hole, move to the next extent */
5372 ret = mext_next_extent(inode, path, &extent); 5365 if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5373 if (ret != 0) { 5366 path[depth].p_ext++;
5374 ext4_ext_drop_refs(path); 5367 } else {
5375 kfree(path); 5368 start = ext4_ext_next_allocated_block(path);
5376 if (ret == 1) 5369 continue;
5377 ret = 0;
5378 break;
5379 } 5370 }
5380 } 5371 }
5381 ret = ext4_ext_shift_path_extents(path, shift, inode, 5372 ret = ext4_ext_shift_path_extents(path, shift, inode,
5382 handle, &start); 5373 handle, &start);
5383 ext4_ext_drop_refs(path);
5384 kfree(path);
5385 if (ret) 5374 if (ret)
5386 break; 5375 break;
5387 } 5376 }
5388 5377out:
5378 ext4_ext_drop_refs(path);
5379 kfree(path);
5389 return ret; 5380 return ret;
5390} 5381}
5391 5382
@@ -5508,3 +5499,199 @@ out_mutex:
5508 mutex_unlock(&inode->i_mutex); 5499 mutex_unlock(&inode->i_mutex);
5509 return ret; 5500 return ret;
5510} 5501}
5502
5503/**
5504 * ext4_swap_extents - Swap extents between two inodes
5505 *
5506 * @inode1: First inode
5507 * @inode2: Second inode
5508 * @lblk1: Start block for first inode
5509 * @lblk2: Start block for second inode
5510 * @count: Number of blocks to swap
5511 * @mark_unwritten: Mark second inode's extents as unwritten after swap
5512 * @erp: Pointer to save error value
5513 *
5514 * This helper routine does exactly what is promise "swap extents". All other
5515 * stuff such as page-cache locking consistency, bh mapping consistency or
5516 * extent's data copying must be performed by caller.
5517 * Locking:
5518 * i_mutex is held for both inodes
5519 * i_data_sem is locked for write for both inodes
5520 * Assumptions:
5521 * All pages from requested range are locked for both inodes
5522 */
5523int
5524ext4_swap_extents(handle_t *handle, struct inode *inode1,
5525 struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5526 ext4_lblk_t count, int unwritten, int *erp)
5527{
5528 struct ext4_ext_path *path1 = NULL;
5529 struct ext4_ext_path *path2 = NULL;
5530 int replaced_count = 0;
5531
5532 BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5533 BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5534 BUG_ON(!mutex_is_locked(&inode1->i_mutex));
5535 BUG_ON(!mutex_is_locked(&inode1->i_mutex));
5536
5537 *erp = ext4_es_remove_extent(inode1, lblk1, count);
5538 if (unlikely(*erp))
5539 return 0;
5540 *erp = ext4_es_remove_extent(inode2, lblk2, count);
5541 if (unlikely(*erp))
5542 return 0;
5543
5544 while (count) {
5545 struct ext4_extent *ex1, *ex2, tmp_ex;
5546 ext4_lblk_t e1_blk, e2_blk;
5547 int e1_len, e2_len, len;
5548 int split = 0;
5549
5550 path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5551 if (unlikely(IS_ERR(path1))) {
5552 *erp = PTR_ERR(path1);
5553 path1 = NULL;
5554 finish:
5555 count = 0;
5556 goto repeat;
5557 }
5558 path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5559 if (unlikely(IS_ERR(path2))) {
5560 *erp = PTR_ERR(path2);
5561 path2 = NULL;
5562 goto finish;
5563 }
5564 ex1 = path1[path1->p_depth].p_ext;
5565 ex2 = path2[path2->p_depth].p_ext;
5566 /* Do we have somthing to swap ? */
5567 if (unlikely(!ex2 || !ex1))
5568 goto finish;
5569
5570 e1_blk = le32_to_cpu(ex1->ee_block);
5571 e2_blk = le32_to_cpu(ex2->ee_block);
5572 e1_len = ext4_ext_get_actual_len(ex1);
5573 e2_len = ext4_ext_get_actual_len(ex2);
5574
5575 /* Hole handling */
5576 if (!in_range(lblk1, e1_blk, e1_len) ||
5577 !in_range(lblk2, e2_blk, e2_len)) {
5578 ext4_lblk_t next1, next2;
5579
5580 /* if hole after extent, then go to next extent */
5581 next1 = ext4_ext_next_allocated_block(path1);
5582 next2 = ext4_ext_next_allocated_block(path2);
5583 /* If hole before extent, then shift to that extent */
5584 if (e1_blk > lblk1)
5585 next1 = e1_blk;
5586 if (e2_blk > lblk2)
5587 next2 = e1_blk;
5588 /* Do we have something to swap */
5589 if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5590 goto finish;
5591 /* Move to the rightest boundary */
5592 len = next1 - lblk1;
5593 if (len < next2 - lblk2)
5594 len = next2 - lblk2;
5595 if (len > count)
5596 len = count;
5597 lblk1 += len;
5598 lblk2 += len;
5599 count -= len;
5600 goto repeat;
5601 }
5602
5603 /* Prepare left boundary */
5604 if (e1_blk < lblk1) {
5605 split = 1;
5606 *erp = ext4_force_split_extent_at(handle, inode1,
5607 &path1, lblk1, 0);
5608 if (unlikely(*erp))
5609 goto finish;
5610 }
5611 if (e2_blk < lblk2) {
5612 split = 1;
5613 *erp = ext4_force_split_extent_at(handle, inode2,
5614 &path2, lblk2, 0);
5615 if (unlikely(*erp))
5616 goto finish;
5617 }
5618 /* ext4_split_extent_at() may result in leaf extent split,
5619 * path must to be revalidated. */
5620 if (split)
5621 goto repeat;
5622
5623 /* Prepare right boundary */
5624 len = count;
5625 if (len > e1_blk + e1_len - lblk1)
5626 len = e1_blk + e1_len - lblk1;
5627 if (len > e2_blk + e2_len - lblk2)
5628 len = e2_blk + e2_len - lblk2;
5629
5630 if (len != e1_len) {
5631 split = 1;
5632 *erp = ext4_force_split_extent_at(handle, inode1,
5633 &path1, lblk1 + len, 0);
5634 if (unlikely(*erp))
5635 goto finish;
5636 }
5637 if (len != e2_len) {
5638 split = 1;
5639 *erp = ext4_force_split_extent_at(handle, inode2,
5640 &path2, lblk2 + len, 0);
5641 if (*erp)
5642 goto finish;
5643 }
5644 /* ext4_split_extent_at() may result in leaf extent split,
5645 * path must to be revalidated. */
5646 if (split)
5647 goto repeat;
5648
5649 BUG_ON(e2_len != e1_len);
5650 *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5651 if (unlikely(*erp))
5652 goto finish;
5653 *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5654 if (unlikely(*erp))
5655 goto finish;
5656
5657 /* Both extents are fully inside boundaries. Swap it now */
5658 tmp_ex = *ex1;
5659 ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5660 ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5661 ex1->ee_len = cpu_to_le16(e2_len);
5662 ex2->ee_len = cpu_to_le16(e1_len);
5663 if (unwritten)
5664 ext4_ext_mark_unwritten(ex2);
5665 if (ext4_ext_is_unwritten(&tmp_ex))
5666 ext4_ext_mark_unwritten(ex1);
5667
5668 ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5669 ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5670 *erp = ext4_ext_dirty(handle, inode2, path2 +
5671 path2->p_depth);
5672 if (unlikely(*erp))
5673 goto finish;
5674 *erp = ext4_ext_dirty(handle, inode1, path1 +
5675 path1->p_depth);
5676 /*
5677 * Looks scarry ah..? second inode already points to new blocks,
5678 * and it was successfully dirtied. But luckily error may happen
5679 * only due to journal error, so full transaction will be
5680 * aborted anyway.
5681 */
5682 if (unlikely(*erp))
5683 goto finish;
5684 lblk1 += len;
5685 lblk2 += len;
5686 replaced_count += len;
5687 count -= len;
5688
5689 repeat:
5690 ext4_ext_drop_refs(path1);
5691 kfree(path1);
5692 ext4_ext_drop_refs(path2);
5693 kfree(path2);
5694 path1 = path2 = NULL;
5695 }
5696 return replaced_count;
5697}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0b7e28e7eaa4..94e7855ae71b 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -11,6 +11,8 @@
11 */ 11 */
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/list_sort.h> 13#include <linux/list_sort.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
14#include "ext4.h" 16#include "ext4.h"
15#include "extents_status.h" 17#include "extents_status.h"
16 18
@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
313 */ 315 */
314 if (!ext4_es_is_delayed(es)) { 316 if (!ext4_es_is_delayed(es)) {
315 EXT4_I(inode)->i_es_lru_nr++; 317 EXT4_I(inode)->i_es_lru_nr++;
316 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); 318 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
319 s_es_stats.es_stats_lru_cnt);
317 } 320 }
318 321
322 EXT4_I(inode)->i_es_all_nr++;
323 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
324
319 return es; 325 return es;
320} 326}
321 327
322static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) 328static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
323{ 329{
330 EXT4_I(inode)->i_es_all_nr--;
331 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
332
324 /* Decrease the lru counter when this es is not delayed */ 333 /* Decrease the lru counter when this es is not delayed */
325 if (!ext4_es_is_delayed(es)) { 334 if (!ext4_es_is_delayed(es)) {
326 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); 335 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
327 EXT4_I(inode)->i_es_lru_nr--; 336 EXT4_I(inode)->i_es_lru_nr--;
328 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); 337 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
338 s_es_stats.es_stats_lru_cnt);
329 } 339 }
330 340
331 kmem_cache_free(ext4_es_cachep, es); 341 kmem_cache_free(ext4_es_cachep, es);
@@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
426 unsigned short ee_len; 436 unsigned short ee_len;
427 int depth, ee_status, es_status; 437 int depth, ee_status, es_status;
428 438
429 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); 439 path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
430 if (IS_ERR(path)) 440 if (IS_ERR(path))
431 return; 441 return;
432 442
@@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
499 } 509 }
500 } 510 }
501out: 511out:
502 if (path) { 512 ext4_ext_drop_refs(path);
503 ext4_ext_drop_refs(path); 513 kfree(path);
504 kfree(path);
505 }
506} 514}
507 515
508static void ext4_es_insert_extent_ind_check(struct inode *inode, 516static void ext4_es_insert_extent_ind_check(struct inode *inode,
@@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
731 struct extent_status *es) 739 struct extent_status *es)
732{ 740{
733 struct ext4_es_tree *tree; 741 struct ext4_es_tree *tree;
742 struct ext4_es_stats *stats;
734 struct extent_status *es1 = NULL; 743 struct extent_status *es1 = NULL;
735 struct rb_node *node; 744 struct rb_node *node;
736 int found = 0; 745 int found = 0;
@@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
767 } 776 }
768 777
769out: 778out:
779 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
770 if (found) { 780 if (found) {
771 BUG_ON(!es1); 781 BUG_ON(!es1);
772 es->es_lblk = es1->es_lblk; 782 es->es_lblk = es1->es_lblk;
773 es->es_len = es1->es_len; 783 es->es_len = es1->es_len;
774 es->es_pblk = es1->es_pblk; 784 es->es_pblk = es1->es_pblk;
785 stats->es_stats_cache_hits++;
786 } else {
787 stats->es_stats_cache_misses++;
775 } 788 }
776 789
777 read_unlock(&EXT4_I(inode)->i_es_lock); 790 read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
933 struct ext4_inode_info *locked_ei) 946 struct ext4_inode_info *locked_ei)
934{ 947{
935 struct ext4_inode_info *ei; 948 struct ext4_inode_info *ei;
949 struct ext4_es_stats *es_stats;
936 struct list_head *cur, *tmp; 950 struct list_head *cur, *tmp;
937 LIST_HEAD(skipped); 951 LIST_HEAD(skipped);
952 ktime_t start_time;
953 u64 scan_time;
938 int nr_shrunk = 0; 954 int nr_shrunk = 0;
939 int retried = 0, skip_precached = 1, nr_skipped = 0; 955 int retried = 0, skip_precached = 1, nr_skipped = 0;
940 956
957 es_stats = &sbi->s_es_stats;
958 start_time = ktime_get();
941 spin_lock(&sbi->s_es_lru_lock); 959 spin_lock(&sbi->s_es_lru_lock);
942 960
943retry: 961retry:
@@ -948,7 +966,8 @@ retry:
948 * If we have already reclaimed all extents from extent 966 * If we have already reclaimed all extents from extent
949 * status tree, just stop the loop immediately. 967 * status tree, just stop the loop immediately.
950 */ 968 */
951 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) 969 if (percpu_counter_read_positive(
970 &es_stats->es_stats_lru_cnt) == 0)
952 break; 971 break;
953 972
954 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 973 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
@@ -958,7 +977,7 @@ retry:
958 * time. Normally we try hard to avoid shrinking 977 * time. Normally we try hard to avoid shrinking
959 * precached inodes, but we will as a last resort. 978 * precached inodes, but we will as a last resort.
960 */ 979 */
961 if ((sbi->s_es_last_sorted < ei->i_touch_when) || 980 if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
962 (skip_precached && ext4_test_inode_state(&ei->vfs_inode, 981 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
963 EXT4_STATE_EXT_PRECACHED))) { 982 EXT4_STATE_EXT_PRECACHED))) {
964 nr_skipped++; 983 nr_skipped++;
@@ -992,7 +1011,7 @@ retry:
992 if ((nr_shrunk == 0) && nr_skipped && !retried) { 1011 if ((nr_shrunk == 0) && nr_skipped && !retried) {
993 retried++; 1012 retried++;
994 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); 1013 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
995 sbi->s_es_last_sorted = jiffies; 1014 es_stats->es_stats_last_sorted = jiffies;
996 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, 1015 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
997 i_es_lru); 1016 i_es_lru);
998 /* 1017 /*
@@ -1010,6 +1029,22 @@ retry:
1010 if (locked_ei && nr_shrunk == 0) 1029 if (locked_ei && nr_shrunk == 0)
1011 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); 1030 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
1012 1031
1032 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1033 if (likely(es_stats->es_stats_scan_time))
1034 es_stats->es_stats_scan_time = (scan_time +
1035 es_stats->es_stats_scan_time*3) / 4;
1036 else
1037 es_stats->es_stats_scan_time = scan_time;
1038 if (scan_time > es_stats->es_stats_max_scan_time)
1039 es_stats->es_stats_max_scan_time = scan_time;
1040 if (likely(es_stats->es_stats_shrunk))
1041 es_stats->es_stats_shrunk = (nr_shrunk +
1042 es_stats->es_stats_shrunk*3) / 4;
1043 else
1044 es_stats->es_stats_shrunk = nr_shrunk;
1045
1046 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
1047 nr_skipped, retried);
1013 return nr_shrunk; 1048 return nr_shrunk;
1014} 1049}
1015 1050
@@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
1020 struct ext4_sb_info *sbi; 1055 struct ext4_sb_info *sbi;
1021 1056
1022 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); 1057 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1023 nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 1058 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
1024 trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); 1059 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1025 return nr; 1060 return nr;
1026} 1061}
1027 1062
@@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
1033 int nr_to_scan = sc->nr_to_scan; 1068 int nr_to_scan = sc->nr_to_scan;
1034 int ret, nr_shrunk; 1069 int ret, nr_shrunk;
1035 1070
1036 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 1071 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
1037 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); 1072 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1038 1073
1039 if (!nr_to_scan) 1074 if (!nr_to_scan)
1040 return ret; 1075 return ret;
1041 1076
1042 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 1077 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
1043 1078
1044 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); 1079 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1045 return nr_shrunk; 1080 return nr_shrunk;
1046} 1081}
1047 1082
1048void ext4_es_register_shrinker(struct ext4_sb_info *sbi) 1083static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos)
1049{ 1084{
1085 return *pos ? NULL : SEQ_START_TOKEN;
1086}
1087
1088static void *
1089ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos)
1090{
1091 return NULL;
1092}
1093
1094static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
1095{
1096 struct ext4_sb_info *sbi = seq->private;
1097 struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1098 struct ext4_inode_info *ei, *max = NULL;
1099 unsigned int inode_cnt = 0;
1100
1101 if (v != SEQ_START_TOKEN)
1102 return 0;
1103
1104 /* here we just find an inode that has the max nr. of objects */
1105 spin_lock(&sbi->s_es_lru_lock);
1106 list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
1107 inode_cnt++;
1108 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1109 max = ei;
1110 else if (!max)
1111 max = ei;
1112 }
1113 spin_unlock(&sbi->s_es_lru_lock);
1114
1115 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1116 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1117 percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
1118 seq_printf(seq, " %lu/%lu cache hits/misses\n",
1119 es_stats->es_stats_cache_hits,
1120 es_stats->es_stats_cache_misses);
1121 if (es_stats->es_stats_last_sorted != 0)
1122 seq_printf(seq, " %u ms last sorted interval\n",
1123 jiffies_to_msecs(jiffies -
1124 es_stats->es_stats_last_sorted));
1125 if (inode_cnt)
1126 seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
1127
1128 seq_printf(seq, "average:\n %llu us scan time\n",
1129 div_u64(es_stats->es_stats_scan_time, 1000));
1130 seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
1131 if (inode_cnt)
1132 seq_printf(seq,
1133 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1134 " %llu us max scan time\n",
1135 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
1136 div_u64(es_stats->es_stats_max_scan_time, 1000));
1137
1138 return 0;
1139}
1140
1141static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v)
1142{
1143}
1144
1145static const struct seq_operations ext4_es_seq_shrinker_info_ops = {
1146 .start = ext4_es_seq_shrinker_info_start,
1147 .next = ext4_es_seq_shrinker_info_next,
1148 .stop = ext4_es_seq_shrinker_info_stop,
1149 .show = ext4_es_seq_shrinker_info_show,
1150};
1151
1152static int
1153ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file)
1154{
1155 int ret;
1156
1157 ret = seq_open(file, &ext4_es_seq_shrinker_info_ops);
1158 if (!ret) {
1159 struct seq_file *m = file->private_data;
1160 m->private = PDE_DATA(inode);
1161 }
1162
1163 return ret;
1164}
1165
1166static int
1167ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file)
1168{
1169 return seq_release(inode, file);
1170}
1171
1172static const struct file_operations ext4_es_seq_shrinker_info_fops = {
1173 .owner = THIS_MODULE,
1174 .open = ext4_es_seq_shrinker_info_open,
1175 .read = seq_read,
1176 .llseek = seq_lseek,
1177 .release = ext4_es_seq_shrinker_info_release,
1178};
1179
1180int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1181{
1182 int err;
1183
1050 INIT_LIST_HEAD(&sbi->s_es_lru); 1184 INIT_LIST_HEAD(&sbi->s_es_lru);
1051 spin_lock_init(&sbi->s_es_lru_lock); 1185 spin_lock_init(&sbi->s_es_lru_lock);
1052 sbi->s_es_last_sorted = 0; 1186 sbi->s_es_stats.es_stats_last_sorted = 0;
1187 sbi->s_es_stats.es_stats_shrunk = 0;
1188 sbi->s_es_stats.es_stats_cache_hits = 0;
1189 sbi->s_es_stats.es_stats_cache_misses = 0;
1190 sbi->s_es_stats.es_stats_scan_time = 0;
1191 sbi->s_es_stats.es_stats_max_scan_time = 0;
1192 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1193 if (err)
1194 return err;
1195 err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
1196 if (err)
1197 goto err1;
1198
1053 sbi->s_es_shrinker.scan_objects = ext4_es_scan; 1199 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1054 sbi->s_es_shrinker.count_objects = ext4_es_count; 1200 sbi->s_es_shrinker.count_objects = ext4_es_count;
1055 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 1201 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1056 register_shrinker(&sbi->s_es_shrinker); 1202 err = register_shrinker(&sbi->s_es_shrinker);
1203 if (err)
1204 goto err2;
1205
1206 if (sbi->s_proc)
1207 proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc,
1208 &ext4_es_seq_shrinker_info_fops, sbi);
1209
1210 return 0;
1211
1212err2:
1213 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
1214err1:
1215 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1216 return err;
1057} 1217}
1058 1218
1059void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) 1219void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1060{ 1220{
1221 if (sbi->s_proc)
1222 remove_proc_entry("es_shrinker_info", sbi->s_proc);
1223 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1224 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
1061 unregister_shrinker(&sbi->s_es_shrinker); 1225 unregister_shrinker(&sbi->s_es_shrinker);
1062} 1226}
1063 1227
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f1b62a419920..efd5f970b501 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -64,6 +64,17 @@ struct ext4_es_tree {
64 struct extent_status *cache_es; /* recently accessed extent */ 64 struct extent_status *cache_es; /* recently accessed extent */
65}; 65};
66 66
67struct ext4_es_stats {
68 unsigned long es_stats_last_sorted;
69 unsigned long es_stats_shrunk;
70 unsigned long es_stats_cache_hits;
71 unsigned long es_stats_cache_misses;
72 u64 es_stats_scan_time;
73 u64 es_stats_max_scan_time;
74 struct percpu_counter es_stats_all_cnt;
75 struct percpu_counter es_stats_lru_cnt;
76};
77
67extern int __init ext4_init_es(void); 78extern int __init ext4_init_es(void);
68extern void ext4_exit_es(void); 79extern void ext4_exit_es(void);
69extern void ext4_es_init_tree(struct ext4_es_tree *tree); 80extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
138 (pb & ~ES_MASK)); 149 (pb & ~ES_MASK));
139} 150}
140 151
141extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 152extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
142extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); 153extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
143extern void ext4_es_lru_add(struct inode *inode); 154extern void ext4_es_lru_add(struct inode *inode);
144extern void ext4_es_lru_del(struct inode *inode); 155extern void ext4_es_lru_del(struct inode *inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 5b87fc36aab8..8012a5daf401 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1011,8 +1011,7 @@ got:
1011 spin_unlock(&sbi->s_next_gen_lock); 1011 spin_unlock(&sbi->s_next_gen_lock);
1012 1012
1013 /* Precompute checksum seed for inode metadata */ 1013 /* Precompute checksum seed for inode metadata */
1014 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1014 if (ext4_has_metadata_csum(sb)) {
1015 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
1016 __u32 csum; 1015 __u32 csum;
1017 __le32 inum = cpu_to_le32(inode->i_ino); 1016 __le32 inum = cpu_to_le32(inode->i_ino);
1018 __le32 gen = cpu_to_le32(inode->i_generation); 1017 __le32 gen = cpu_to_le32(inode->i_generation);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index e75f840000a0..36b369697a13 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
318 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain 318 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
319 * as described above and return 0. 319 * as described above and return 0.
320 */ 320 */
321static int ext4_alloc_branch(handle_t *handle, struct inode *inode, 321static int ext4_alloc_branch(handle_t *handle,
322 ext4_lblk_t iblock, int indirect_blks, 322 struct ext4_allocation_request *ar,
323 int *blks, ext4_fsblk_t goal, 323 int indirect_blks, ext4_lblk_t *offsets,
324 ext4_lblk_t *offsets, Indirect *branch) 324 Indirect *branch)
325{ 325{
326 struct ext4_allocation_request ar;
327 struct buffer_head * bh; 326 struct buffer_head * bh;
328 ext4_fsblk_t b, new_blocks[4]; 327 ext4_fsblk_t b, new_blocks[4];
329 __le32 *p; 328 __le32 *p;
330 int i, j, err, len = 1; 329 int i, j, err, len = 1;
331 330
332 /*
333 * Set up for the direct block allocation
334 */
335 memset(&ar, 0, sizeof(ar));
336 ar.inode = inode;
337 ar.len = *blks;
338 ar.logical = iblock;
339 if (S_ISREG(inode->i_mode))
340 ar.flags = EXT4_MB_HINT_DATA;
341
342 for (i = 0; i <= indirect_blks; i++) { 331 for (i = 0; i <= indirect_blks; i++) {
343 if (i == indirect_blks) { 332 if (i == indirect_blks) {
344 ar.goal = goal; 333 new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
345 new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
346 } else 334 } else
347 goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, 335 ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
348 goal, 0, NULL, &err); 336 ar->inode, ar->goal,
337 ar->flags & EXT4_MB_DELALLOC_RESERVED,
338 NULL, &err);
349 if (err) { 339 if (err) {
350 i--; 340 i--;
351 goto failed; 341 goto failed;
@@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
354 if (i == 0) 344 if (i == 0)
355 continue; 345 continue;
356 346
357 bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); 347 bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
358 if (unlikely(!bh)) { 348 if (unlikely(!bh)) {
359 err = -ENOMEM; 349 err = -ENOMEM;
360 goto failed; 350 goto failed;
@@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
372 b = new_blocks[i]; 362 b = new_blocks[i];
373 363
374 if (i == indirect_blks) 364 if (i == indirect_blks)
375 len = ar.len; 365 len = ar->len;
376 for (j = 0; j < len; j++) 366 for (j = 0; j < len; j++)
377 *p++ = cpu_to_le32(b++); 367 *p++ = cpu_to_le32(b++);
378 368
@@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
381 unlock_buffer(bh); 371 unlock_buffer(bh);
382 372
383 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 373 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
384 err = ext4_handle_dirty_metadata(handle, inode, bh); 374 err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
385 if (err) 375 if (err)
386 goto failed; 376 goto failed;
387 } 377 }
388 *blks = ar.len;
389 return 0; 378 return 0;
390failed: 379failed:
391 for (; i >= 0; i--) { 380 for (; i >= 0; i--) {
@@ -396,10 +385,10 @@ failed:
396 * existing before ext4_alloc_branch() was called. 385 * existing before ext4_alloc_branch() was called.
397 */ 386 */
398 if (i > 0 && i != indirect_blks && branch[i].bh) 387 if (i > 0 && i != indirect_blks && branch[i].bh)
399 ext4_forget(handle, 1, inode, branch[i].bh, 388 ext4_forget(handle, 1, ar->inode, branch[i].bh,
400 branch[i].bh->b_blocknr); 389 branch[i].bh->b_blocknr);
401 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 390 ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
402 (i == indirect_blks) ? ar.len : 1, 0); 391 (i == indirect_blks) ? ar->len : 1, 0);
403 } 392 }
404 return err; 393 return err;
405} 394}
@@ -419,9 +408,9 @@ failed:
419 * inode (->i_blocks, etc.). In case of success we end up with the full 408 * inode (->i_blocks, etc.). In case of success we end up with the full
420 * chain to new block and return 0. 409 * chain to new block and return 0.
421 */ 410 */
422static int ext4_splice_branch(handle_t *handle, struct inode *inode, 411static int ext4_splice_branch(handle_t *handle,
423 ext4_lblk_t block, Indirect *where, int num, 412 struct ext4_allocation_request *ar,
424 int blks) 413 Indirect *where, int num)
425{ 414{
426 int i; 415 int i;
427 int err = 0; 416 int err = 0;
@@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
446 * Update the host buffer_head or inode to point to more just allocated 435 * Update the host buffer_head or inode to point to more just allocated
447 * direct blocks blocks 436 * direct blocks blocks
448 */ 437 */
449 if (num == 0 && blks > 1) { 438 if (num == 0 && ar->len > 1) {
450 current_block = le32_to_cpu(where->key) + 1; 439 current_block = le32_to_cpu(where->key) + 1;
451 for (i = 1; i < blks; i++) 440 for (i = 1; i < ar->len; i++)
452 *(where->p + i) = cpu_to_le32(current_block++); 441 *(where->p + i) = cpu_to_le32(current_block++);
453 } 442 }
454 443
@@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
465 */ 454 */
466 jbd_debug(5, "splicing indirect only\n"); 455 jbd_debug(5, "splicing indirect only\n");
467 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); 456 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
468 err = ext4_handle_dirty_metadata(handle, inode, where->bh); 457 err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
469 if (err) 458 if (err)
470 goto err_out; 459 goto err_out;
471 } else { 460 } else {
472 /* 461 /*
473 * OK, we spliced it into the inode itself on a direct block. 462 * OK, we spliced it into the inode itself on a direct block.
474 */ 463 */
475 ext4_mark_inode_dirty(handle, inode); 464 ext4_mark_inode_dirty(handle, ar->inode);
476 jbd_debug(5, "splicing direct\n"); 465 jbd_debug(5, "splicing direct\n");
477 } 466 }
478 return err; 467 return err;
@@ -484,11 +473,11 @@ err_out:
484 * need to revoke the block, which is why we don't 473 * need to revoke the block, which is why we don't
485 * need to set EXT4_FREE_BLOCKS_METADATA. 474 * need to set EXT4_FREE_BLOCKS_METADATA.
486 */ 475 */
487 ext4_free_blocks(handle, inode, where[i].bh, 0, 1, 476 ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
488 EXT4_FREE_BLOCKS_FORGET); 477 EXT4_FREE_BLOCKS_FORGET);
489 } 478 }
490 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), 479 ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
491 blks, 0); 480 ar->len, 0);
492 481
493 return err; 482 return err;
494} 483}
@@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
525 struct ext4_map_blocks *map, 514 struct ext4_map_blocks *map,
526 int flags) 515 int flags)
527{ 516{
517 struct ext4_allocation_request ar;
528 int err = -EIO; 518 int err = -EIO;
529 ext4_lblk_t offsets[4]; 519 ext4_lblk_t offsets[4];
530 Indirect chain[4]; 520 Indirect chain[4];
531 Indirect *partial; 521 Indirect *partial;
532 ext4_fsblk_t goal;
533 int indirect_blks; 522 int indirect_blks;
534 int blocks_to_boundary = 0; 523 int blocks_to_boundary = 0;
535 int depth; 524 int depth;
@@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
579 return -ENOSPC; 568 return -ENOSPC;
580 } 569 }
581 570
582 goal = ext4_find_goal(inode, map->m_lblk, partial); 571 /* Set up for the direct block allocation */
572 memset(&ar, 0, sizeof(ar));
573 ar.inode = inode;
574 ar.logical = map->m_lblk;
575 if (S_ISREG(inode->i_mode))
576 ar.flags = EXT4_MB_HINT_DATA;
577 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
578 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
579
580 ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
583 581
584 /* the number of blocks need to allocate for [d,t]indirect blocks */ 582 /* the number of blocks need to allocate for [d,t]indirect blocks */
585 indirect_blks = (chain + depth) - partial - 1; 583 indirect_blks = (chain + depth) - partial - 1;
@@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
588 * Next look up the indirect map to count the totoal number of 586 * Next look up the indirect map to count the totoal number of
589 * direct blocks to allocate for this branch. 587 * direct blocks to allocate for this branch.
590 */ 588 */
591 count = ext4_blks_to_allocate(partial, indirect_blks, 589 ar.len = ext4_blks_to_allocate(partial, indirect_blks,
592 map->m_len, blocks_to_boundary); 590 map->m_len, blocks_to_boundary);
591
593 /* 592 /*
594 * Block out ext4_truncate while we alter the tree 593 * Block out ext4_truncate while we alter the tree
595 */ 594 */
596 err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, 595 err = ext4_alloc_branch(handle, &ar, indirect_blks,
597 &count, goal,
598 offsets + (partial - chain), partial); 596 offsets + (partial - chain), partial);
599 597
600 /* 598 /*
@@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
605 * may need to return -EAGAIN upwards in the worst case. --sct 603 * may need to return -EAGAIN upwards in the worst case. --sct
606 */ 604 */
607 if (!err) 605 if (!err)
608 err = ext4_splice_branch(handle, inode, map->m_lblk, 606 err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
609 partial, indirect_blks, count);
610 if (err) 607 if (err)
611 goto cleanup; 608 goto cleanup;
612 609
613 map->m_flags |= EXT4_MAP_NEW; 610 map->m_flags |= EXT4_MAP_NEW;
614 611
615 ext4_update_inode_fsync_trans(handle, inode, 1); 612 ext4_update_inode_fsync_trans(handle, inode, 1);
613 count = ar.len;
616got_it: 614got_it:
617 map->m_flags |= EXT4_MAP_MAPPED; 615 map->m_flags |= EXT4_MAP_MAPPED;
618 map->m_pblk = le32_to_cpu(chain[depth-1].key); 616 map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index bea662bd0ca6..3ea62695abce 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -594,6 +594,7 @@ retry:
594 if (ret) { 594 if (ret) {
595 unlock_page(page); 595 unlock_page(page);
596 page_cache_release(page); 596 page_cache_release(page);
597 page = NULL;
597 ext4_orphan_add(handle, inode); 598 ext4_orphan_add(handle, inode);
598 up_write(&EXT4_I(inode)->xattr_sem); 599 up_write(&EXT4_I(inode)->xattr_sem);
599 sem_held = 0; 600 sem_held = 0;
@@ -613,7 +614,8 @@ retry:
613 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 614 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
614 goto retry; 615 goto retry;
615 616
616 block_commit_write(page, from, to); 617 if (page)
618 block_commit_write(page, from, to);
617out: 619out:
618 if (page) { 620 if (page) {
619 unlock_page(page); 621 unlock_page(page);
@@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
1126 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, 1128 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
1127 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1129 inline_size - EXT4_INLINE_DOTDOT_SIZE);
1128 1130
1129 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1131 if (ext4_has_metadata_csum(inode->i_sb))
1130 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1131 csum_size = sizeof(struct ext4_dir_entry_tail); 1132 csum_size = sizeof(struct ext4_dir_entry_tail);
1132 1133
1133 inode->i_size = inode->i_sb->s_blocksize; 1134 inode->i_size = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3aa26e9117c4..e9777f93cf05 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
83 83
84 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 84 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
85 cpu_to_le32(EXT4_OS_LINUX) || 85 cpu_to_le32(EXT4_OS_LINUX) ||
86 !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 86 !ext4_has_metadata_csum(inode->i_sb))
87 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
88 return 1; 87 return 1;
89 88
90 provided = le16_to_cpu(raw->i_checksum_lo); 89 provided = le16_to_cpu(raw->i_checksum_lo);
@@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
105 104
106 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 105 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
107 cpu_to_le32(EXT4_OS_LINUX) || 106 cpu_to_le32(EXT4_OS_LINUX) ||
108 !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 107 !ext4_has_metadata_csum(inode->i_sb))
109 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
110 return; 108 return;
111 109
112 csum = ext4_inode_csum(inode, raw, ei); 110 csum = ext4_inode_csum(inode, raw, ei);
@@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode)
224 goto no_delete; 222 goto no_delete;
225 } 223 }
226 224
227 if (!is_bad_inode(inode)) 225 if (is_bad_inode(inode))
228 dquot_initialize(inode); 226 goto no_delete;
227 dquot_initialize(inode);
229 228
230 if (ext4_should_order_data(inode)) 229 if (ext4_should_order_data(inode))
231 ext4_begin_ordered_truncate(inode, 0); 230 ext4_begin_ordered_truncate(inode, 0);
232 truncate_inode_pages_final(&inode->i_data); 231 truncate_inode_pages_final(&inode->i_data);
233 232
234 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); 233 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
235 if (is_bad_inode(inode))
236 goto no_delete;
237 234
238 /* 235 /*
239 * Protect us against freezing - iput() caller didn't have to have any 236 * Protect us against freezing - iput() caller didn't have to have any
@@ -590,20 +587,12 @@ found:
590 /* 587 /*
591 * New blocks allocate and/or writing to unwritten extent 588 * New blocks allocate and/or writing to unwritten extent
592 * will possibly result in updating i_data, so we take 589 * will possibly result in updating i_data, so we take
593 * the write lock of i_data_sem, and call get_blocks() 590 * the write lock of i_data_sem, and call get_block()
594 * with create == 1 flag. 591 * with create == 1 flag.
595 */ 592 */
596 down_write(&EXT4_I(inode)->i_data_sem); 593 down_write(&EXT4_I(inode)->i_data_sem);
597 594
598 /* 595 /*
599 * if the caller is from delayed allocation writeout path
600 * we have already reserved fs blocks for allocation
601 * let the underlying get_block() function know to
602 * avoid double accounting
603 */
604 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
605 ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
606 /*
607 * We need to check for EXT4 here because migrate 596 * We need to check for EXT4 here because migrate
608 * could have changed the inode type in between 597 * could have changed the inode type in between
609 */ 598 */
@@ -631,8 +620,6 @@ found:
631 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 620 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
632 ext4_da_update_reserve_space(inode, retval, 1); 621 ext4_da_update_reserve_space(inode, retval, 1);
633 } 622 }
634 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
635 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
636 623
637 if (retval > 0) { 624 if (retval > 0) {
638 unsigned int status; 625 unsigned int status;
@@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
734 * `handle' can be NULL if create is zero 721 * `handle' can be NULL if create is zero
735 */ 722 */
736struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 723struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
737 ext4_lblk_t block, int create, int *errp) 724 ext4_lblk_t block, int create)
738{ 725{
739 struct ext4_map_blocks map; 726 struct ext4_map_blocks map;
740 struct buffer_head *bh; 727 struct buffer_head *bh;
741 int fatal = 0, err; 728 int err;
742 729
743 J_ASSERT(handle != NULL || create == 0); 730 J_ASSERT(handle != NULL || create == 0);
744 731
@@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
747 err = ext4_map_blocks(handle, inode, &map, 734 err = ext4_map_blocks(handle, inode, &map,
748 create ? EXT4_GET_BLOCKS_CREATE : 0); 735 create ? EXT4_GET_BLOCKS_CREATE : 0);
749 736
750 /* ensure we send some value back into *errp */ 737 if (err == 0)
751 *errp = 0; 738 return create ? ERR_PTR(-ENOSPC) : NULL;
752
753 if (create && err == 0)
754 err = -ENOSPC; /* should never happen */
755 if (err < 0) 739 if (err < 0)
756 *errp = err; 740 return ERR_PTR(err);
757 if (err <= 0)
758 return NULL;
759 741
760 bh = sb_getblk(inode->i_sb, map.m_pblk); 742 bh = sb_getblk(inode->i_sb, map.m_pblk);
761 if (unlikely(!bh)) { 743 if (unlikely(!bh))
762 *errp = -ENOMEM; 744 return ERR_PTR(-ENOMEM);
763 return NULL;
764 }
765 if (map.m_flags & EXT4_MAP_NEW) { 745 if (map.m_flags & EXT4_MAP_NEW) {
766 J_ASSERT(create != 0); 746 J_ASSERT(create != 0);
767 J_ASSERT(handle != NULL); 747 J_ASSERT(handle != NULL);
@@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
775 */ 755 */
776 lock_buffer(bh); 756 lock_buffer(bh);
777 BUFFER_TRACE(bh, "call get_create_access"); 757 BUFFER_TRACE(bh, "call get_create_access");
778 fatal = ext4_journal_get_create_access(handle, bh); 758 err = ext4_journal_get_create_access(handle, bh);
779 if (!fatal && !buffer_uptodate(bh)) { 759 if (unlikely(err)) {
760 unlock_buffer(bh);
761 goto errout;
762 }
763 if (!buffer_uptodate(bh)) {
780 memset(bh->b_data, 0, inode->i_sb->s_blocksize); 764 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
781 set_buffer_uptodate(bh); 765 set_buffer_uptodate(bh);
782 } 766 }
783 unlock_buffer(bh); 767 unlock_buffer(bh);
784 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 768 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
785 err = ext4_handle_dirty_metadata(handle, inode, bh); 769 err = ext4_handle_dirty_metadata(handle, inode, bh);
786 if (!fatal) 770 if (unlikely(err))
787 fatal = err; 771 goto errout;
788 } else { 772 } else
789 BUFFER_TRACE(bh, "not a new buffer"); 773 BUFFER_TRACE(bh, "not a new buffer");
790 }
791 if (fatal) {
792 *errp = fatal;
793 brelse(bh);
794 bh = NULL;
795 }
796 return bh; 774 return bh;
775errout:
776 brelse(bh);
777 return ERR_PTR(err);
797} 778}
798 779
799struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 780struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
800 ext4_lblk_t block, int create, int *err) 781 ext4_lblk_t block, int create)
801{ 782{
802 struct buffer_head *bh; 783 struct buffer_head *bh;
803 784
804 bh = ext4_getblk(handle, inode, block, create, err); 785 bh = ext4_getblk(handle, inode, block, create);
805 if (!bh) 786 if (IS_ERR(bh))
806 return bh; 787 return bh;
807 if (buffer_uptodate(bh)) 788 if (!bh || buffer_uptodate(bh))
808 return bh; 789 return bh;
809 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 790 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
810 wait_on_buffer(bh); 791 wait_on_buffer(bh);
811 if (buffer_uptodate(bh)) 792 if (buffer_uptodate(bh))
812 return bh; 793 return bh;
813 put_bh(bh); 794 put_bh(bh);
814 *err = -EIO; 795 return ERR_PTR(-EIO);
815 return NULL;
816} 796}
817 797
818int ext4_walk_page_buffers(handle_t *handle, 798int ext4_walk_page_buffers(handle_t *handle,
@@ -1536,7 +1516,7 @@ out_unlock:
1536} 1516}
1537 1517
1538/* 1518/*
1539 * This is a special get_blocks_t callback which is used by 1519 * This is a special get_block_t callback which is used by
1540 * ext4_da_write_begin(). It will either return mapped block or 1520 * ext4_da_write_begin(). It will either return mapped block or
1541 * reserve space for a single block. 1521 * reserve space for a single block.
1542 * 1522 *
@@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2011 * in data loss. So use reserved blocks to allocate metadata if 1991 * in data loss. So use reserved blocks to allocate metadata if
2012 * possible. 1992 * possible.
2013 * 1993 *
2014 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks 1994 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
2015 * in question are delalloc blocks. This affects functions in many 1995 * the blocks in question are delalloc blocks. This indicates
2016 * different parts of the allocation call path. This flag exists 1996 * that the blocks and quotas has already been checked when
2017 * primarily because we don't want to change *many* call functions, so 1997 * the data was copied into the page cache.
2018 * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
2019 * once the inode's allocation semaphore is taken.
2020 */ 1998 */
2021 get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 1999 get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
2022 EXT4_GET_BLOCKS_METADATA_NOFAIL; 2000 EXT4_GET_BLOCKS_METADATA_NOFAIL;
@@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb)
2515 return 0; 2493 return 0;
2516} 2494}
2517 2495
2496/* We always reserve for an inode update; the superblock could be there too */
2497static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
2498{
2499 if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
2500 EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
2501 return 1;
2502
2503 if (pos + len <= 0x7fffffffULL)
2504 return 1;
2505
2506 /* We might need to update the superblock to set LARGE_FILE */
2507 return 2;
2508}
2509
2518static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2510static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2519 loff_t pos, unsigned len, unsigned flags, 2511 loff_t pos, unsigned len, unsigned flags,
2520 struct page **pagep, void **fsdata) 2512 struct page **pagep, void **fsdata)
@@ -2565,7 +2557,8 @@ retry_grab:
2565 * of file which has an already mapped buffer. 2557 * of file which has an already mapped buffer.
2566 */ 2558 */
2567retry_journal: 2559retry_journal:
2568 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); 2560 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2561 ext4_da_write_credits(inode, pos, len));
2569 if (IS_ERR(handle)) { 2562 if (IS_ERR(handle)) {
2570 page_cache_release(page); 2563 page_cache_release(page);
2571 return PTR_ERR(handle); 2564 return PTR_ERR(handle);
@@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file,
2658 if (copied && new_i_size > EXT4_I(inode)->i_disksize) { 2651 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2659 if (ext4_has_inline_data(inode) || 2652 if (ext4_has_inline_data(inode) ||
2660 ext4_da_should_update_i_disksize(page, end)) { 2653 ext4_da_should_update_i_disksize(page, end)) {
2661 down_write(&EXT4_I(inode)->i_data_sem); 2654 ext4_update_i_disksize(inode, new_i_size);
2662 if (new_i_size > EXT4_I(inode)->i_disksize)
2663 EXT4_I(inode)->i_disksize = new_i_size;
2664 up_write(&EXT4_I(inode)->i_data_sem);
2665 /* We need to mark inode dirty even if 2655 /* We need to mark inode dirty even if
2666 * new_i_size is less that inode->i_size 2656 * new_i_size is less that inode->i_size
2667 * bu greater than i_disksize.(hint delalloc) 2657 * bu greater than i_disksize.(hint delalloc)
@@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3936 ei->i_extra_isize = 0; 3926 ei->i_extra_isize = 0;
3937 3927
3938 /* Precompute checksum seed for inode metadata */ 3928 /* Precompute checksum seed for inode metadata */
3939 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3929 if (ext4_has_metadata_csum(sb)) {
3940 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3941 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3930 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3942 __u32 csum; 3931 __u32 csum;
3943 __le32 inum = cpu_to_le32(inode->i_ino); 3932 __le32 inum = cpu_to_le32(inode->i_ino);
@@ -4127,6 +4116,13 @@ bad_inode:
4127 return ERR_PTR(ret); 4116 return ERR_PTR(ret);
4128} 4117}
4129 4118
4119struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
4120{
4121 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
4122 return ERR_PTR(-EIO);
4123 return ext4_iget(sb, ino);
4124}
4125
4130static int ext4_inode_blocks_set(handle_t *handle, 4126static int ext4_inode_blocks_set(handle_t *handle,
4131 struct ext4_inode *raw_inode, 4127 struct ext4_inode *raw_inode,
4132 struct ext4_inode_info *ei) 4128 struct ext4_inode_info *ei)
@@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle,
4226 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 4222 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
4227 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 4223 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
4228 4224
4229 if (ext4_inode_blocks_set(handle, raw_inode, ei)) { 4225 err = ext4_inode_blocks_set(handle, raw_inode, ei);
4226 if (err) {
4230 spin_unlock(&ei->i_raw_lock); 4227 spin_unlock(&ei->i_raw_lock);
4231 goto out_brelse; 4228 goto out_brelse;
4232 } 4229 }
@@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4536 ext4_orphan_del(NULL, inode); 4533 ext4_orphan_del(NULL, inode);
4537 goto err_out; 4534 goto err_out;
4538 } 4535 }
4539 } else 4536 } else {
4537 loff_t oldsize = inode->i_size;
4538
4540 i_size_write(inode, attr->ia_size); 4539 i_size_write(inode, attr->ia_size);
4540 pagecache_isize_extended(inode, oldsize, inode->i_size);
4541 }
4541 4542
4542 /* 4543 /*
4543 * Blocks are going to be removed from the inode. Wait 4544 * Blocks are going to be removed from the inode. Wait
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f2252ec274d..bfda18a15592 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,8 +331,7 @@ flags_out:
331 if (!inode_owner_or_capable(inode)) 331 if (!inode_owner_or_capable(inode))
332 return -EPERM; 332 return -EPERM;
333 333
334 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 334 if (ext4_has_metadata_csum(inode->i_sb)) {
335 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
336 ext4_warning(sb, "Setting inode version is not " 335 ext4_warning(sb, "Setting inode version is not "
337 "supported with metadata_csum enabled."); 336 "supported with metadata_csum enabled.");
338 return -ENOTTY; 337 return -ENOTTY;
@@ -532,9 +531,17 @@ group_add_out:
532 } 531 }
533 532
534 case EXT4_IOC_SWAP_BOOT: 533 case EXT4_IOC_SWAP_BOOT:
534 {
535 int err;
535 if (!(filp->f_mode & FMODE_WRITE)) 536 if (!(filp->f_mode & FMODE_WRITE))
536 return -EBADF; 537 return -EBADF;
537 return swap_inode_boot_loader(sb, inode); 538 err = mnt_want_write_file(filp);
539 if (err)
540 return err;
541 err = swap_inode_boot_loader(sb, inode);
542 mnt_drop_write_file(filp);
543 return err;
544 }
538 545
539 case EXT4_IOC_RESIZE_FS: { 546 case EXT4_IOC_RESIZE_FS: {
540 ext4_fsblk_t n_blocks_count; 547 ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 748c9136a60a..dbfe15c2533c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3155 "start %lu, size %lu, fe_logical %lu", 3155 "start %lu, size %lu, fe_logical %lu",
3156 (unsigned long) start, (unsigned long) size, 3156 (unsigned long) start, (unsigned long) size,
3157 (unsigned long) ac->ac_o_ex.fe_logical); 3157 (unsigned long) ac->ac_o_ex.fe_logical);
3158 BUG();
3158 } 3159 }
3159 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3160 start > ac->ac_o_ex.fe_logical);
3161 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3160 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3162 3161
3163 /* now prepare goal request */ 3162 /* now prepare goal request */
@@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4410 if (IS_NOQUOTA(ar->inode)) 4409 if (IS_NOQUOTA(ar->inode))
4411 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; 4410 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4412 4411
4413 /* 4412 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4414 * For delayed allocation, we could skip the ENOSPC and
4415 * EDQUOT check, as blocks and quotas have been already
4416 * reserved when data being copied into pagecache.
4417 */
4418 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4419 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4420 else {
4421 /* Without delayed allocation we need to verify 4413 /* Without delayed allocation we need to verify
4422 * there is enough free blocks to do block allocation 4414 * there is enough free blocks to do block allocation
4423 * and verify allocation doesn't exceed the quota limits. 4415 * and verify allocation doesn't exceed the quota limits.
@@ -4528,8 +4520,7 @@ out:
4528 if (inquota && ar->len < inquota) 4520 if (inquota && ar->len < inquota)
4529 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); 4521 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4530 if (!ar->len) { 4522 if (!ar->len) {
4531 if (!ext4_test_inode_state(ar->inode, 4523 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4532 EXT4_STATE_DELALLOC_RESERVED))
4533 /* release all the reserved blocks if non delalloc */ 4524 /* release all the reserved blocks if non delalloc */
4534 percpu_counter_sub(&sbi->s_dirtyclusters_counter, 4525 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4535 reserv_clstrs); 4526 reserv_clstrs);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d3567f27bae7..a432634f2e6a 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
41 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 ext4_ext_store_pblock(&newext, lb->first_pblock);
42 /* Locking only for convinience since we are operating on temp inode */ 42 /* Locking only for convinience since we are operating on temp inode */
43 down_write(&EXT4_I(inode)->i_data_sem); 43 down_write(&EXT4_I(inode)->i_data_sem);
44 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); 44 path = ext4_find_extent(inode, lb->first_block, NULL, 0);
45
46 if (IS_ERR(path)) { 45 if (IS_ERR(path)) {
47 retval = PTR_ERR(path); 46 retval = PTR_ERR(path);
48 path = NULL; 47 path = NULL;
@@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode,
81 goto err_out; 80 goto err_out;
82 } 81 }
83 } 82 }
84 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 83 retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
85err_out: 84err_out:
86 up_write((&EXT4_I(inode)->i_data_sem)); 85 up_write((&EXT4_I(inode)->i_data_sem));
87 if (path) { 86 ext4_ext_drop_refs(path);
88 ext4_ext_drop_refs(path); 87 kfree(path);
89 kfree(path);
90 }
91 lb->first_pblock = 0; 88 lb->first_pblock = 0;
92 return retval; 89 return retval;
93} 90}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 32bce844c2e1..8313ca3324ec 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
20 20
21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
22{ 22{
23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 23 if (!ext4_has_metadata_csum(sb))
24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
25 return 1; 24 return 1;
26 25
27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 26 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
@@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
29 28
30static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 29static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31{ 30{
32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 31 if (!ext4_has_metadata_csum(sb))
33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
34 return; 32 return;
35 33
36 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 34 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 671a74b14fd7..9f2311bc9c4f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -27,120 +27,26 @@
27 * @lblock: logical block number to find an extent path 27 * @lblock: logical block number to find an extent path
28 * @path: pointer to an extent path pointer (for output) 28 * @path: pointer to an extent path pointer (for output)
29 * 29 *
30 * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value 30 * ext4_find_extent wrapper. Return 0 on success, or a negative error value
31 * on failure. 31 * on failure.
32 */ 32 */
33static inline int 33static inline int
34get_ext_path(struct inode *inode, ext4_lblk_t lblock, 34get_ext_path(struct inode *inode, ext4_lblk_t lblock,
35 struct ext4_ext_path **orig_path) 35 struct ext4_ext_path **ppath)
36{ 36{
37 int ret = 0;
38 struct ext4_ext_path *path; 37 struct ext4_ext_path *path;
39 38
40 path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); 39 path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
41 if (IS_ERR(path)) 40 if (IS_ERR(path))
42 ret = PTR_ERR(path); 41 return PTR_ERR(path);
43 else if (path[ext_depth(inode)].p_ext == NULL) 42 if (path[ext_depth(inode)].p_ext == NULL) {
44 ret = -ENODATA; 43 ext4_ext_drop_refs(path);
45 else 44 kfree(path);
46 *orig_path = path; 45 *ppath = NULL;
47 46 return -ENODATA;
48 return ret;
49}
50
51/**
52 * copy_extent_status - Copy the extent's initialization status
53 *
54 * @src: an extent for getting initialize status
55 * @dest: an extent to be set the status
56 */
57static void
58copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
59{
60 if (ext4_ext_is_unwritten(src))
61 ext4_ext_mark_unwritten(dest);
62 else
63 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
64}
65
66/**
67 * mext_next_extent - Search for the next extent and set it to "extent"
68 *
69 * @inode: inode which is searched
70 * @path: this will obtain data for the next extent
71 * @extent: pointer to the next extent we have just gotten
72 *
73 * Search the next extent in the array of ext4_ext_path structure (@path)
74 * and set it to ext4_extent structure (@extent). In addition, the member of
75 * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
76 * ext4_ext_path structure refers to the last extent, or a negative error
77 * value on failure.
78 */
79int
80mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
81 struct ext4_extent **extent)
82{
83 struct ext4_extent_header *eh;
84 int ppos, leaf_ppos = path->p_depth;
85
86 ppos = leaf_ppos;
87 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
88 /* leaf block */
89 *extent = ++path[ppos].p_ext;
90 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
91 return 0;
92 }
93
94 while (--ppos >= 0) {
95 if (EXT_LAST_INDEX(path[ppos].p_hdr) >
96 path[ppos].p_idx) {
97 int cur_ppos = ppos;
98
99 /* index block */
100 path[ppos].p_idx++;
101 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
102 if (path[ppos+1].p_bh)
103 brelse(path[ppos+1].p_bh);
104 path[ppos+1].p_bh =
105 sb_bread(inode->i_sb, path[ppos].p_block);
106 if (!path[ppos+1].p_bh)
107 return -EIO;
108 path[ppos+1].p_hdr =
109 ext_block_hdr(path[ppos+1].p_bh);
110
111 /* Halfway index block */
112 while (++cur_ppos < leaf_ppos) {
113 path[cur_ppos].p_idx =
114 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
115 path[cur_ppos].p_block =
116 ext4_idx_pblock(path[cur_ppos].p_idx);
117 if (path[cur_ppos+1].p_bh)
118 brelse(path[cur_ppos+1].p_bh);
119 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
120 path[cur_ppos].p_block);
121 if (!path[cur_ppos+1].p_bh)
122 return -EIO;
123 path[cur_ppos+1].p_hdr =
124 ext_block_hdr(path[cur_ppos+1].p_bh);
125 }
126
127 path[leaf_ppos].p_ext = *extent = NULL;
128
129 eh = path[leaf_ppos].p_hdr;
130 if (le16_to_cpu(eh->eh_entries) == 0)
131 /* empty leaf is found */
132 return -ENODATA;
133
134 /* leaf block */
135 path[leaf_ppos].p_ext = *extent =
136 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
137 path[leaf_ppos].p_block =
138 ext4_ext_pblock(path[leaf_ppos].p_ext);
139 return 0;
140 }
141 } 47 }
142 /* We found the last extent */ 48 *ppath = path;
143 return 1; 49 return 0;
144} 50}
145 51
146/** 52/**
@@ -178,417 +84,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode,
178} 84}
179 85
180/** 86/**
181 * mext_insert_across_blocks - Insert extents across leaf block
182 *
183 * @handle: journal handle
184 * @orig_inode: original inode
185 * @o_start: first original extent to be changed
186 * @o_end: last original extent to be changed
187 * @start_ext: first new extent to be inserted
188 * @new_ext: middle of new extent to be inserted
189 * @end_ext: last new extent to be inserted
190 *
191 * Allocate a new leaf block and insert extents into it. Return 0 on success,
192 * or a negative error value on failure.
193 */
194static int
195mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
196 struct ext4_extent *o_start, struct ext4_extent *o_end,
197 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
198 struct ext4_extent *end_ext)
199{
200 struct ext4_ext_path *orig_path = NULL;
201 ext4_lblk_t eblock = 0;
202 int new_flag = 0;
203 int end_flag = 0;
204 int err = 0;
205
206 if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
207 if (o_start == o_end) {
208
209 /* start_ext new_ext end_ext
210 * donor |---------|-----------|--------|
211 * orig |------------------------------|
212 */
213 end_flag = 1;
214 } else {
215
216 /* start_ext new_ext end_ext
217 * donor |---------|----------|---------|
218 * orig |---------------|--------------|
219 */
220 o_end->ee_block = end_ext->ee_block;
221 o_end->ee_len = end_ext->ee_len;
222 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
223 }
224
225 o_start->ee_len = start_ext->ee_len;
226 eblock = le32_to_cpu(start_ext->ee_block);
227 new_flag = 1;
228
229 } else if (start_ext->ee_len && new_ext->ee_len &&
230 !end_ext->ee_len && o_start == o_end) {
231
232 /* start_ext new_ext
233 * donor |--------------|---------------|
234 * orig |------------------------------|
235 */
236 o_start->ee_len = start_ext->ee_len;
237 eblock = le32_to_cpu(start_ext->ee_block);
238 new_flag = 1;
239
240 } else if (!start_ext->ee_len && new_ext->ee_len &&
241 end_ext->ee_len && o_start == o_end) {
242
243 /* new_ext end_ext
244 * donor |--------------|---------------|
245 * orig |------------------------------|
246 */
247 o_end->ee_block = end_ext->ee_block;
248 o_end->ee_len = end_ext->ee_len;
249 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
250
251 /*
252 * Set 0 to the extent block if new_ext was
253 * the first block.
254 */
255 if (new_ext->ee_block)
256 eblock = le32_to_cpu(new_ext->ee_block);
257
258 new_flag = 1;
259 } else {
260 ext4_debug("ext4 move extent: Unexpected insert case\n");
261 return -EIO;
262 }
263
264 if (new_flag) {
265 err = get_ext_path(orig_inode, eblock, &orig_path);
266 if (err)
267 goto out;
268
269 if (ext4_ext_insert_extent(handle, orig_inode,
270 orig_path, new_ext, 0))
271 goto out;
272 }
273
274 if (end_flag) {
275 err = get_ext_path(orig_inode,
276 le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
277 if (err)
278 goto out;
279
280 if (ext4_ext_insert_extent(handle, orig_inode,
281 orig_path, end_ext, 0))
282 goto out;
283 }
284out:
285 if (orig_path) {
286 ext4_ext_drop_refs(orig_path);
287 kfree(orig_path);
288 }
289
290 return err;
291
292}
293
294/**
295 * mext_insert_inside_block - Insert new extent to the extent block
296 *
297 * @o_start: first original extent to be moved
298 * @o_end: last original extent to be moved
299 * @start_ext: first new extent to be inserted
300 * @new_ext: middle of new extent to be inserted
301 * @end_ext: last new extent to be inserted
302 * @eh: extent header of target leaf block
303 * @range_to_move: used to decide how to insert extent
304 *
305 * Insert extents into the leaf block. The extent (@o_start) is overwritten
306 * by inserted extents.
307 */
308static void
309mext_insert_inside_block(struct ext4_extent *o_start,
310 struct ext4_extent *o_end,
311 struct ext4_extent *start_ext,
312 struct ext4_extent *new_ext,
313 struct ext4_extent *end_ext,
314 struct ext4_extent_header *eh,
315 int range_to_move)
316{
317 int i = 0;
318 unsigned long len;
319
320 /* Move the existing extents */
321 if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
322 len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
323 (unsigned long)(o_end + 1);
324 memmove(o_end + 1 + range_to_move, o_end + 1, len);
325 }
326
327 /* Insert start entry */
328 if (start_ext->ee_len)
329 o_start[i++].ee_len = start_ext->ee_len;
330
331 /* Insert new entry */
332 if (new_ext->ee_len) {
333 o_start[i] = *new_ext;
334 ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
335 }
336
337 /* Insert end entry */
338 if (end_ext->ee_len)
339 o_start[i] = *end_ext;
340
341 /* Increment the total entries counter on the extent block */
342 le16_add_cpu(&eh->eh_entries, range_to_move);
343}
344
345/**
346 * mext_insert_extents - Insert new extent
347 *
348 * @handle: journal handle
349 * @orig_inode: original inode
350 * @orig_path: path indicates first extent to be changed
351 * @o_start: first original extent to be changed
352 * @o_end: last original extent to be changed
353 * @start_ext: first new extent to be inserted
354 * @new_ext: middle of new extent to be inserted
355 * @end_ext: last new extent to be inserted
356 *
357 * Call the function to insert extents. If we cannot add more extents into
358 * the leaf block, we call mext_insert_across_blocks() to create a
359 * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
360 * on success, or a negative error value on failure.
361 */
362static int
363mext_insert_extents(handle_t *handle, struct inode *orig_inode,
364 struct ext4_ext_path *orig_path,
365 struct ext4_extent *o_start,
366 struct ext4_extent *o_end,
367 struct ext4_extent *start_ext,
368 struct ext4_extent *new_ext,
369 struct ext4_extent *end_ext)
370{
371 struct ext4_extent_header *eh;
372 unsigned long need_slots, slots_range;
373 int range_to_move, depth, ret;
374
375 /*
376 * The extents need to be inserted
377 * start_extent + new_extent + end_extent.
378 */
379 need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
380 (new_ext->ee_len ? 1 : 0);
381
382 /* The number of slots between start and end */
383 slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
384 / sizeof(struct ext4_extent);
385
386 /* Range to move the end of extent */
387 range_to_move = need_slots - slots_range;
388 depth = orig_path->p_depth;
389 orig_path += depth;
390 eh = orig_path->p_hdr;
391
392 if (depth) {
393 /* Register to journal */
394 BUFFER_TRACE(orig_path->p_bh, "get_write_access");
395 ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
396 if (ret)
397 return ret;
398 }
399
400 /* Expansion */
401 if (range_to_move > 0 &&
402 (range_to_move > le16_to_cpu(eh->eh_max)
403 - le16_to_cpu(eh->eh_entries))) {
404
405 ret = mext_insert_across_blocks(handle, orig_inode, o_start,
406 o_end, start_ext, new_ext, end_ext);
407 if (ret < 0)
408 return ret;
409 } else
410 mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
411 end_ext, eh, range_to_move);
412
413 return ext4_ext_dirty(handle, orig_inode, orig_path);
414}
415
416/**
417 * mext_leaf_block - Move one leaf extent block into the inode.
418 *
419 * @handle: journal handle
420 * @orig_inode: original inode
421 * @orig_path: path indicates first extent to be changed
422 * @dext: donor extent
423 * @from: start offset on the target file
424 *
425 * In order to insert extents into the leaf block, we must divide the extent
426 * in the leaf block into three extents. The one is located to be inserted
427 * extents, and the others are located around it.
428 *
429 * Therefore, this function creates structures to save extents of the leaf
430 * block, and inserts extents by calling mext_insert_extents() with
431 * created extents. Return 0 on success, or a negative error value on failure.
432 */
433static int
434mext_leaf_block(handle_t *handle, struct inode *orig_inode,
435 struct ext4_ext_path *orig_path, struct ext4_extent *dext,
436 ext4_lblk_t *from)
437{
438 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
439 struct ext4_extent new_ext, start_ext, end_ext;
440 ext4_lblk_t new_ext_end;
441 int oext_alen, new_ext_alen, end_ext_alen;
442 int depth = ext_depth(orig_inode);
443 int ret;
444
445 start_ext.ee_block = end_ext.ee_block = 0;
446 o_start = o_end = oext = orig_path[depth].p_ext;
447 oext_alen = ext4_ext_get_actual_len(oext);
448 start_ext.ee_len = end_ext.ee_len = 0;
449
450 new_ext.ee_block = cpu_to_le32(*from);
451 ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
452 new_ext.ee_len = dext->ee_len;
453 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
454 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
455
456 /*
457 * Case: original extent is first
458 * oext |--------|
459 * new_ext |--|
460 * start_ext |--|
461 */
462 if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
463 le32_to_cpu(new_ext.ee_block) <
464 le32_to_cpu(oext->ee_block) + oext_alen) {
465 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
466 le32_to_cpu(oext->ee_block));
467 start_ext.ee_block = oext->ee_block;
468 copy_extent_status(oext, &start_ext);
469 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
470 prev_ext = oext - 1;
471 /*
472 * We can merge new_ext into previous extent,
473 * if these are contiguous and same extent type.
474 */
475 if (ext4_can_extents_be_merged(orig_inode, prev_ext,
476 &new_ext)) {
477 o_start = prev_ext;
478 start_ext.ee_len = cpu_to_le16(
479 ext4_ext_get_actual_len(prev_ext) +
480 new_ext_alen);
481 start_ext.ee_block = oext->ee_block;
482 copy_extent_status(prev_ext, &start_ext);
483 new_ext.ee_len = 0;
484 }
485 }
486
487 /*
488 * Case: new_ext_end must be less than oext
489 * oext |-----------|
490 * new_ext |-------|
491 */
492 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
493 EXT4_ERROR_INODE(orig_inode,
494 "new_ext_end(%u) should be less than or equal to "
495 "oext->ee_block(%u) + oext_alen(%d) - 1",
496 new_ext_end, le32_to_cpu(oext->ee_block),
497 oext_alen);
498 ret = -EIO;
499 goto out;
500 }
501
502 /*
503 * Case: new_ext is smaller than original extent
504 * oext |---------------|
505 * new_ext |-----------|
506 * end_ext |---|
507 */
508 if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
509 new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
510 end_ext.ee_len =
511 cpu_to_le16(le32_to_cpu(oext->ee_block) +
512 oext_alen - 1 - new_ext_end);
513 copy_extent_status(oext, &end_ext);
514 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
515 ext4_ext_store_pblock(&end_ext,
516 (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
517 end_ext.ee_block =
518 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
519 oext_alen - end_ext_alen);
520 }
521
522 ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
523 o_end, &start_ext, &new_ext, &end_ext);
524out:
525 return ret;
526}
527
528/**
529 * mext_calc_swap_extents - Calculate extents for extent swapping.
530 *
531 * @tmp_dext: the extent that will belong to the original inode
532 * @tmp_oext: the extent that will belong to the donor inode
533 * @orig_off: block offset of original inode
534 * @donor_off: block offset of donor inode
535 * @max_count: the maximum length of extents
536 *
537 * Return 0 on success, or a negative error value on failure.
538 */
539static int
540mext_calc_swap_extents(struct ext4_extent *tmp_dext,
541 struct ext4_extent *tmp_oext,
542 ext4_lblk_t orig_off, ext4_lblk_t donor_off,
543 ext4_lblk_t max_count)
544{
545 ext4_lblk_t diff, orig_diff;
546 struct ext4_extent dext_old, oext_old;
547
548 BUG_ON(orig_off != donor_off);
549
550 /* original and donor extents have to cover the same block offset */
551 if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
552 le32_to_cpu(tmp_oext->ee_block) +
553 ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
554 return -ENODATA;
555
556 if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
557 le32_to_cpu(tmp_dext->ee_block) +
558 ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
559 return -ENODATA;
560
561 dext_old = *tmp_dext;
562 oext_old = *tmp_oext;
563
564 /* When tmp_dext is too large, pick up the target range. */
565 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
566
567 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
568 le32_add_cpu(&tmp_dext->ee_block, diff);
569 le16_add_cpu(&tmp_dext->ee_len, -diff);
570
571 if (max_count < ext4_ext_get_actual_len(tmp_dext))
572 tmp_dext->ee_len = cpu_to_le16(max_count);
573
574 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
575 ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
576
577 /* Adjust extent length if donor extent is larger than orig */
578 if (ext4_ext_get_actual_len(tmp_dext) >
579 ext4_ext_get_actual_len(tmp_oext) - orig_diff)
580 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
581 orig_diff);
582
583 tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
584
585 copy_extent_status(&oext_old, tmp_dext);
586 copy_extent_status(&dext_old, tmp_oext);
587
588 return 0;
589}
590
591/**
592 * mext_check_coverage - Check that all extents in range has the same type 87 * mext_check_coverage - Check that all extents in range has the same type
593 * 88 *
594 * @inode: inode in question 89 * @inode: inode in question
@@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
619 } 114 }
620 ret = 1; 115 ret = 1;
621out: 116out:
622 if (path) { 117 ext4_ext_drop_refs(path);
623 ext4_ext_drop_refs(path); 118 kfree(path);
624 kfree(path);
625 }
626 return ret; 119 return ret;
627} 120}
628 121
629/** 122/**
630 * mext_replace_branches - Replace original extents with new extents
631 *
632 * @handle: journal handle
633 * @orig_inode: original inode
634 * @donor_inode: donor inode
635 * @from: block offset of orig_inode
636 * @count: block count to be replaced
637 * @err: pointer to save return value
638 *
639 * Replace original inode extents and donor inode extents page by page.
640 * We implement this replacement in the following three steps:
641 * 1. Save the block information of original and donor inodes into
642 * dummy extents.
643 * 2. Change the block information of original inode to point at the
644 * donor inode blocks.
645 * 3. Change the block information of donor inode to point at the saved
646 * original inode blocks in the dummy extents.
647 *
648 * Return replaced block count.
649 */
650static int
651mext_replace_branches(handle_t *handle, struct inode *orig_inode,
652 struct inode *donor_inode, ext4_lblk_t from,
653 ext4_lblk_t count, int *err)
654{
655 struct ext4_ext_path *orig_path = NULL;
656 struct ext4_ext_path *donor_path = NULL;
657 struct ext4_extent *oext, *dext;
658 struct ext4_extent tmp_dext, tmp_oext;
659 ext4_lblk_t orig_off = from, donor_off = from;
660 int depth;
661 int replaced_count = 0;
662 int dext_alen;
663
664 *err = ext4_es_remove_extent(orig_inode, from, count);
665 if (*err)
666 goto out;
667
668 *err = ext4_es_remove_extent(donor_inode, from, count);
669 if (*err)
670 goto out;
671
672 /* Get the original extent for the block "orig_off" */
673 *err = get_ext_path(orig_inode, orig_off, &orig_path);
674 if (*err)
675 goto out;
676
677 /* Get the donor extent for the head */
678 *err = get_ext_path(donor_inode, donor_off, &donor_path);
679 if (*err)
680 goto out;
681 depth = ext_depth(orig_inode);
682 oext = orig_path[depth].p_ext;
683 tmp_oext = *oext;
684
685 depth = ext_depth(donor_inode);
686 dext = donor_path[depth].p_ext;
687 if (unlikely(!dext))
688 goto missing_donor_extent;
689 tmp_dext = *dext;
690
691 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
692 donor_off, count);
693 if (*err)
694 goto out;
695
696 /* Loop for the donor extents */
697 while (1) {
698 /* The extent for donor must be found. */
699 if (unlikely(!dext)) {
700 missing_donor_extent:
701 EXT4_ERROR_INODE(donor_inode,
702 "The extent for donor must be found");
703 *err = -EIO;
704 goto out;
705 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
706 EXT4_ERROR_INODE(donor_inode,
707 "Donor offset(%u) and the first block of donor "
708 "extent(%u) should be equal",
709 donor_off,
710 le32_to_cpu(tmp_dext.ee_block));
711 *err = -EIO;
712 goto out;
713 }
714
715 /* Set donor extent to orig extent */
716 *err = mext_leaf_block(handle, orig_inode,
717 orig_path, &tmp_dext, &orig_off);
718 if (*err)
719 goto out;
720
721 /* Set orig extent to donor extent */
722 *err = mext_leaf_block(handle, donor_inode,
723 donor_path, &tmp_oext, &donor_off);
724 if (*err)
725 goto out;
726
727 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
728 replaced_count += dext_alen;
729 donor_off += dext_alen;
730 orig_off += dext_alen;
731
732 BUG_ON(replaced_count > count);
733 /* Already moved the expected blocks */
734 if (replaced_count >= count)
735 break;
736
737 if (orig_path)
738 ext4_ext_drop_refs(orig_path);
739 *err = get_ext_path(orig_inode, orig_off, &orig_path);
740 if (*err)
741 goto out;
742 depth = ext_depth(orig_inode);
743 oext = orig_path[depth].p_ext;
744 tmp_oext = *oext;
745
746 if (donor_path)
747 ext4_ext_drop_refs(donor_path);
748 *err = get_ext_path(donor_inode, donor_off, &donor_path);
749 if (*err)
750 goto out;
751 depth = ext_depth(donor_inode);
752 dext = donor_path[depth].p_ext;
753 tmp_dext = *dext;
754
755 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
756 donor_off, count - replaced_count);
757 if (*err)
758 goto out;
759 }
760
761out:
762 if (orig_path) {
763 ext4_ext_drop_refs(orig_path);
764 kfree(orig_path);
765 }
766 if (donor_path) {
767 ext4_ext_drop_refs(donor_path);
768 kfree(donor_path);
769 }
770
771 return replaced_count;
772}
773
774/**
775 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 123 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
776 * 124 *
777 * @inode1: the inode structure 125 * @inode1: the inode structure
778 * @inode2: the inode structure 126 * @inode2: the inode structure
779 * @index: page index 127 * @index1: page index
128 * @index2: page index
780 * @page: result page vector 129 * @page: result page vector
781 * 130 *
782 * Grab two locked pages for inode's by inode order 131 * Grab two locked pages for inode's by inode order
783 */ 132 */
784static int 133static int
785mext_page_double_lock(struct inode *inode1, struct inode *inode2, 134mext_page_double_lock(struct inode *inode1, struct inode *inode2,
786 pgoff_t index, struct page *page[2]) 135 pgoff_t index1, pgoff_t index2, struct page *page[2])
787{ 136{
788 struct address_space *mapping[2]; 137 struct address_space *mapping[2];
789 unsigned fl = AOP_FLAG_NOFS; 138 unsigned fl = AOP_FLAG_NOFS;
@@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
793 mapping[0] = inode1->i_mapping; 142 mapping[0] = inode1->i_mapping;
794 mapping[1] = inode2->i_mapping; 143 mapping[1] = inode2->i_mapping;
795 } else { 144 } else {
145 pgoff_t tmp = index1;
146 index1 = index2;
147 index2 = tmp;
796 mapping[0] = inode2->i_mapping; 148 mapping[0] = inode2->i_mapping;
797 mapping[1] = inode1->i_mapping; 149 mapping[1] = inode1->i_mapping;
798 } 150 }
799 151
800 page[0] = grab_cache_page_write_begin(mapping[0], index, fl); 152 page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
801 if (!page[0]) 153 if (!page[0])
802 return -ENOMEM; 154 return -ENOMEM;
803 155
804 page[1] = grab_cache_page_write_begin(mapping[1], index, fl); 156 page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
805 if (!page[1]) { 157 if (!page[1]) {
806 unlock_page(page[0]); 158 unlock_page(page[0]);
807 page_cache_release(page[0]); 159 page_cache_release(page[0]);
@@ -893,25 +245,27 @@ out:
893 * @o_filp: file structure of original file 245 * @o_filp: file structure of original file
894 * @donor_inode: donor inode 246 * @donor_inode: donor inode
895 * @orig_page_offset: page index on original file 247 * @orig_page_offset: page index on original file
248 * @donor_page_offset: page index on donor file
896 * @data_offset_in_page: block index where data swapping starts 249 * @data_offset_in_page: block index where data swapping starts
897 * @block_len_in_page: the number of blocks to be swapped 250 * @block_len_in_page: the number of blocks to be swapped
898 * @unwritten: orig extent is unwritten or not 251 * @unwritten: orig extent is unwritten or not
899 * @err: pointer to save return value 252 * @err: pointer to save return value
900 * 253 *
901 * Save the data in original inode blocks and replace original inode extents 254 * Save the data in original inode blocks and replace original inode extents
902 * with donor inode extents by calling mext_replace_branches(). 255 * with donor inode extents by calling ext4_swap_extents().
903 * Finally, write out the saved data in new original inode blocks. Return 256 * Finally, write out the saved data in new original inode blocks. Return
904 * replaced block count. 257 * replaced block count.
905 */ 258 */
906static int 259static int
907move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 260move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
908 pgoff_t orig_page_offset, int data_offset_in_page, 261 pgoff_t orig_page_offset, pgoff_t donor_page_offset,
909 int block_len_in_page, int unwritten, int *err) 262 int data_offset_in_page,
263 int block_len_in_page, int unwritten, int *err)
910{ 264{
911 struct inode *orig_inode = file_inode(o_filp); 265 struct inode *orig_inode = file_inode(o_filp);
912 struct page *pagep[2] = {NULL, NULL}; 266 struct page *pagep[2] = {NULL, NULL};
913 handle_t *handle; 267 handle_t *handle;
914 ext4_lblk_t orig_blk_offset; 268 ext4_lblk_t orig_blk_offset, donor_blk_offset;
915 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 269 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
916 unsigned int w_flags = 0; 270 unsigned int w_flags = 0;
917 unsigned int tmp_data_size, data_size, replaced_size; 271 unsigned int tmp_data_size, data_size, replaced_size;
@@ -939,6 +293,9 @@ again:
939 orig_blk_offset = orig_page_offset * blocks_per_page + 293 orig_blk_offset = orig_page_offset * blocks_per_page +
940 data_offset_in_page; 294 data_offset_in_page;
941 295
296 donor_blk_offset = donor_page_offset * blocks_per_page +
297 data_offset_in_page;
298
942 /* Calculate data_size */ 299 /* Calculate data_size */
943 if ((orig_blk_offset + block_len_in_page - 1) == 300 if ((orig_blk_offset + block_len_in_page - 1) ==
944 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 301 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
@@ -959,7 +316,7 @@ again:
959 replaced_size = data_size; 316 replaced_size = data_size;
960 317
961 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, 318 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
962 pagep); 319 donor_page_offset, pagep);
963 if (unlikely(*err < 0)) 320 if (unlikely(*err < 0))
964 goto stop_journal; 321 goto stop_journal;
965 /* 322 /*
@@ -978,7 +335,7 @@ again:
978 if (*err) 335 if (*err)
979 goto drop_data_sem; 336 goto drop_data_sem;
980 337
981 unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, 338 unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
982 block_len_in_page, 1, err); 339 block_len_in_page, 1, err);
983 if (*err) 340 if (*err)
984 goto drop_data_sem; 341 goto drop_data_sem;
@@ -994,9 +351,10 @@ again:
994 *err = -EBUSY; 351 *err = -EBUSY;
995 goto drop_data_sem; 352 goto drop_data_sem;
996 } 353 }
997 replaced_count = mext_replace_branches(handle, orig_inode, 354 replaced_count = ext4_swap_extents(handle, orig_inode,
998 donor_inode, orig_blk_offset, 355 donor_inode, orig_blk_offset,
999 block_len_in_page, err); 356 donor_blk_offset,
357 block_len_in_page, 1, err);
1000 drop_data_sem: 358 drop_data_sem:
1001 ext4_double_up_write_data_sem(orig_inode, donor_inode); 359 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1002 goto unlock_pages; 360 goto unlock_pages;
@@ -1014,9 +372,9 @@ data_copy:
1014 goto unlock_pages; 372 goto unlock_pages;
1015 } 373 }
1016 ext4_double_down_write_data_sem(orig_inode, donor_inode); 374 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1017 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 375 replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
1018 orig_blk_offset, 376 orig_blk_offset, donor_blk_offset,
1019 block_len_in_page, err); 377 block_len_in_page, 1, err);
1020 ext4_double_up_write_data_sem(orig_inode, donor_inode); 378 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1021 if (*err) { 379 if (*err) {
1022 if (replaced_count) { 380 if (replaced_count) {
@@ -1061,9 +419,9 @@ repair_branches:
1061 * Try to swap extents to it's original places 419 * Try to swap extents to it's original places
1062 */ 420 */
1063 ext4_double_down_write_data_sem(orig_inode, donor_inode); 421 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1064 replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, 422 replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
1065 orig_blk_offset, 423 orig_blk_offset, donor_blk_offset,
1066 block_len_in_page, &err2); 424 block_len_in_page, 0, &err2);
1067 ext4_double_up_write_data_sem(orig_inode, donor_inode); 425 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1068 if (replaced_count != block_len_in_page) { 426 if (replaced_count != block_len_in_page) {
1069 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), 427 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
@@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode,
1093 struct inode *donor_inode, __u64 orig_start, 451 struct inode *donor_inode, __u64 orig_start,
1094 __u64 donor_start, __u64 *len) 452 __u64 donor_start, __u64 *len)
1095{ 453{
1096 ext4_lblk_t orig_blocks, donor_blocks; 454 __u64 orig_eof, donor_eof;
1097 unsigned int blkbits = orig_inode->i_blkbits; 455 unsigned int blkbits = orig_inode->i_blkbits;
1098 unsigned int blocksize = 1 << blkbits; 456 unsigned int blocksize = 1 << blkbits;
1099 457
458 orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
459 donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
460
461
1100 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { 462 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
1101 ext4_debug("ext4 move extent: suid or sgid is set" 463 ext4_debug("ext4 move extent: suid or sgid is set"
1102 " to donor file [ino:orig %lu, donor %lu]\n", 464 " to donor file [ino:orig %lu, donor %lu]\n",
@@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode,
1112 ext4_debug("ext4 move extent: The argument files should " 474 ext4_debug("ext4 move extent: The argument files should "
1113 "not be swapfile [ino:orig %lu, donor %lu]\n", 475 "not be swapfile [ino:orig %lu, donor %lu]\n",
1114 orig_inode->i_ino, donor_inode->i_ino); 476 orig_inode->i_ino, donor_inode->i_ino);
1115 return -EINVAL; 477 return -EBUSY;
1116 } 478 }
1117 479
1118 /* Ext4 move extent supports only extent based file */ 480 /* Ext4 move extent supports only extent based file */
@@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode,
1132 } 494 }
1133 495
1134 /* Start offset should be same */ 496 /* Start offset should be same */
1135 if (orig_start != donor_start) { 497 if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
498 (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
1136 ext4_debug("ext4 move extent: orig and donor's start " 499 ext4_debug("ext4 move extent: orig and donor's start "
1137 "offset are not same [ino:orig %lu, donor %lu]\n", 500 "offset are not alligned [ino:orig %lu, donor %lu]\n",
1138 orig_inode->i_ino, donor_inode->i_ino); 501 orig_inode->i_ino, donor_inode->i_ino);
1139 return -EINVAL; 502 return -EINVAL;
1140 } 503 }
1141 504
1142 if ((orig_start >= EXT_MAX_BLOCKS) || 505 if ((orig_start >= EXT_MAX_BLOCKS) ||
506 (donor_start >= EXT_MAX_BLOCKS) ||
1143 (*len > EXT_MAX_BLOCKS) || 507 (*len > EXT_MAX_BLOCKS) ||
508 (donor_start + *len >= EXT_MAX_BLOCKS) ||
1144 (orig_start + *len >= EXT_MAX_BLOCKS)) { 509 (orig_start + *len >= EXT_MAX_BLOCKS)) {
1145 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 510 ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
1146 "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, 511 "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
1147 orig_inode->i_ino, donor_inode->i_ino); 512 orig_inode->i_ino, donor_inode->i_ino);
1148 return -EINVAL; 513 return -EINVAL;
1149 } 514 }
1150 515 if (orig_eof < orig_start + *len - 1)
1151 if (orig_inode->i_size > donor_inode->i_size) { 516 *len = orig_eof - orig_start;
1152 donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; 517 if (donor_eof < donor_start + *len - 1)
1153 /* TODO: eliminate this artificial restriction */ 518 *len = donor_eof - donor_start;
1154 if (orig_start >= donor_blocks) {
1155 ext4_debug("ext4 move extent: orig start offset "
1156 "[%llu] should be less than donor file blocks "
1157 "[%u] [ino:orig %lu, donor %lu]\n",
1158 orig_start, donor_blocks,
1159 orig_inode->i_ino, donor_inode->i_ino);
1160 return -EINVAL;
1161 }
1162
1163 /* TODO: eliminate this artificial restriction */
1164 if (orig_start + *len > donor_blocks) {
1165 ext4_debug("ext4 move extent: End offset [%llu] should "
1166 "be less than donor file blocks [%u]."
1167 "So adjust length from %llu to %llu "
1168 "[ino:orig %lu, donor %lu]\n",
1169 orig_start + *len, donor_blocks,
1170 *len, donor_blocks - orig_start,
1171 orig_inode->i_ino, donor_inode->i_ino);
1172 *len = donor_blocks - orig_start;
1173 }
1174 } else {
1175 orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
1176 if (orig_start >= orig_blocks) {
1177 ext4_debug("ext4 move extent: start offset [%llu] "
1178 "should be less than original file blocks "
1179 "[%u] [ino:orig %lu, donor %lu]\n",
1180 orig_start, orig_blocks,
1181 orig_inode->i_ino, donor_inode->i_ino);
1182 return -EINVAL;
1183 }
1184
1185 if (orig_start + *len > orig_blocks) {
1186 ext4_debug("ext4 move extent: Adjust length "
1187 "from %llu to %llu. Because it should be "
1188 "less than original file blocks "
1189 "[ino:orig %lu, donor %lu]\n",
1190 *len, orig_blocks - orig_start,
1191 orig_inode->i_ino, donor_inode->i_ino);
1192 *len = orig_blocks - orig_start;
1193 }
1194 }
1195
1196 if (!*len) { 519 if (!*len) {
1197 ext4_debug("ext4 move extent: len should not be 0 " 520 ext4_debug("ext4 move extent: len should not be 0 "
1198 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 521 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
@@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode,
1208 * 531 *
1209 * @o_filp: file structure of the original file 532 * @o_filp: file structure of the original file
1210 * @d_filp: file structure of the donor file 533 * @d_filp: file structure of the donor file
1211 * @orig_start: start offset in block for orig 534 * @orig_blk: start offset in block for orig
1212 * @donor_start: start offset in block for donor 535 * @donor_blk: start offset in block for donor
1213 * @len: the number of blocks to be moved 536 * @len: the number of blocks to be moved
1214 * @moved_len: moved block length 537 * @moved_len: moved block length
1215 * 538 *
1216 * This function returns 0 and moved block length is set in moved_len 539 * This function returns 0 and moved block length is set in moved_len
1217 * if succeed, otherwise returns error value. 540 * if succeed, otherwise returns error value.
1218 * 541 *
1219 * Note: ext4_move_extents() proceeds the following order.
1220 * 1:ext4_move_extents() calculates the last block number of moving extent
1221 * function by the start block number (orig_start) and the number of blocks
1222 * to be moved (len) specified as arguments.
1223 * If the {orig, donor}_start points a hole, the extent's start offset
1224 * pointed by ext_cur (current extent), holecheck_path, orig_path are set
1225 * after hole behind.
1226 * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
1227 * or the ext_cur exceeds the block_end which is last logical block number.
1228 * 3:To get the length of continues area, call mext_next_extent()
1229 * specified with the ext_cur (initial value is holecheck_path) re-cursive,
1230 * until find un-continuous extent, the start logical block number exceeds
1231 * the block_end or the extent points to the last extent.
1232 * 4:Exchange the original inode data with donor inode data
1233 * from orig_page_offset to seq_end_page.
1234 * The start indexes of data are specified as arguments.
1235 * That of the original inode is orig_page_offset,
1236 * and the donor inode is also orig_page_offset
1237 * (To easily handle blocksize != pagesize case, the offset for the
1238 * donor inode is block unit).
1239 * 5:Update holecheck_path and orig_path to points a next proceeding extent,
1240 * then returns to step 2.
1241 * 6:Release holecheck_path, orig_path and set the len to moved_len
1242 * which shows the number of moved blocks.
1243 * The moved_len is useful for the command to calculate the file offset
1244 * for starting next move extent ioctl.
1245 * 7:Return 0 on success, or a negative error value on failure.
1246 */ 542 */
1247int 543int
1248ext4_move_extents(struct file *o_filp, struct file *d_filp, 544ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
1249 __u64 orig_start, __u64 donor_start, __u64 len, 545 __u64 donor_blk, __u64 len, __u64 *moved_len)
1250 __u64 *moved_len)
1251{ 546{
1252 struct inode *orig_inode = file_inode(o_filp); 547 struct inode *orig_inode = file_inode(o_filp);
1253 struct inode *donor_inode = file_inode(d_filp); 548 struct inode *donor_inode = file_inode(d_filp);
1254 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; 549 struct ext4_ext_path *path = NULL;
1255 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1256 ext4_lblk_t block_start = orig_start;
1257 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1258 ext4_lblk_t rest_blocks;
1259 pgoff_t orig_page_offset = 0, seq_end_page;
1260 int ret, depth, last_extent = 0;
1261 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 550 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1262 int data_offset_in_page; 551 ext4_lblk_t o_end, o_start = orig_blk;
1263 int block_len_in_page; 552 ext4_lblk_t d_start = donor_blk;
1264 int unwritten; 553 int ret;
1265 554
1266 if (orig_inode->i_sb != donor_inode->i_sb) { 555 if (orig_inode->i_sb != donor_inode->i_sb) {
1267 ext4_debug("ext4 move extent: The argument files " 556 ext4_debug("ext4 move extent: The argument files "
@@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1303 /* Protect extent tree against block allocations via delalloc */ 592 /* Protect extent tree against block allocations via delalloc */
1304 ext4_double_down_write_data_sem(orig_inode, donor_inode); 593 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1305 /* Check the filesystem environment whether move_extent can be done */ 594 /* Check the filesystem environment whether move_extent can be done */
1306 ret = mext_check_arguments(orig_inode, donor_inode, orig_start, 595 ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
1307 donor_start, &len); 596 donor_blk, &len);
1308 if (ret) 597 if (ret)
1309 goto out; 598 goto out;
599 o_end = o_start + len;
1310 600
1311 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; 601 while (o_start < o_end) {
1312 block_end = block_start + len - 1; 602 struct ext4_extent *ex;
1313 if (file_end < block_end) 603 ext4_lblk_t cur_blk, next_blk;
1314 len -= block_end - file_end; 604 pgoff_t orig_page_index, donor_page_index;
605 int offset_in_page;
606 int unwritten, cur_len;
1315 607
1316 ret = get_ext_path(orig_inode, block_start, &orig_path); 608 ret = get_ext_path(orig_inode, o_start, &path);
1317 if (ret) 609 if (ret)
1318 goto out;
1319
1320 /* Get path structure to check the hole */
1321 ret = get_ext_path(orig_inode, block_start, &holecheck_path);
1322 if (ret)
1323 goto out;
1324
1325 depth = ext_depth(orig_inode);
1326 ext_cur = holecheck_path[depth].p_ext;
1327
1328 /*
1329 * Get proper starting location of block replacement if block_start was
1330 * within the hole.
1331 */
1332 if (le32_to_cpu(ext_cur->ee_block) +
1333 ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
1334 /*
1335 * The hole exists between extents or the tail of
1336 * original file.
1337 */
1338 last_extent = mext_next_extent(orig_inode,
1339 holecheck_path, &ext_cur);
1340 if (last_extent < 0) {
1341 ret = last_extent;
1342 goto out;
1343 }
1344 last_extent = mext_next_extent(orig_inode, orig_path,
1345 &ext_dummy);
1346 if (last_extent < 0) {
1347 ret = last_extent;
1348 goto out; 610 goto out;
1349 } 611 ex = path[path->p_depth].p_ext;
1350 seq_start = le32_to_cpu(ext_cur->ee_block); 612 next_blk = ext4_ext_next_allocated_block(path);
1351 } else if (le32_to_cpu(ext_cur->ee_block) > block_start) 613 cur_blk = le32_to_cpu(ex->ee_block);
1352 /* The hole exists at the beginning of original file. */ 614 cur_len = ext4_ext_get_actual_len(ex);
1353 seq_start = le32_to_cpu(ext_cur->ee_block); 615 /* Check hole before the start pos */
1354 else 616 if (cur_blk + cur_len - 1 < o_start) {
1355 seq_start = block_start; 617 if (next_blk == EXT_MAX_BLOCKS) {
1356 618 o_start = o_end;
1357 /* No blocks within the specified range. */ 619 ret = -ENODATA;
1358 if (le32_to_cpu(ext_cur->ee_block) > block_end) { 620 goto out;
1359 ext4_debug("ext4 move extent: The specified range of file " 621 }
1360 "may be the hole\n"); 622 d_start += next_blk - o_start;
1361 ret = -EINVAL; 623 o_start = next_blk;
1362 goto out;
1363 }
1364
1365 /* Adjust start blocks */
1366 add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
1367 ext4_ext_get_actual_len(ext_cur), block_end + 1) -
1368 max(le32_to_cpu(ext_cur->ee_block), block_start);
1369
1370 while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
1371 seq_blocks += add_blocks;
1372
1373 /* Adjust tail blocks */
1374 if (seq_start + seq_blocks - 1 > block_end)
1375 seq_blocks = block_end - seq_start + 1;
1376
1377 ext_prev = ext_cur;
1378 last_extent = mext_next_extent(orig_inode, holecheck_path,
1379 &ext_cur);
1380 if (last_extent < 0) {
1381 ret = last_extent;
1382 break;
1383 }
1384 add_blocks = ext4_ext_get_actual_len(ext_cur);
1385
1386 /*
1387 * Extend the length of contiguous block (seq_blocks)
1388 * if extents are contiguous.
1389 */
1390 if (ext4_can_extents_be_merged(orig_inode,
1391 ext_prev, ext_cur) &&
1392 block_end >= le32_to_cpu(ext_cur->ee_block) &&
1393 !last_extent)
1394 continue; 624 continue;
1395 625 /* Check hole after the start pos */
1396 /* Is original extent is unwritten */ 626 } else if (cur_blk > o_start) {
1397 unwritten = ext4_ext_is_unwritten(ext_prev); 627 /* Skip hole */
1398 628 d_start += cur_blk - o_start;
1399 data_offset_in_page = seq_start % blocks_per_page; 629 o_start = cur_blk;
1400 630 /* Extent inside requested range ?*/
1401 /* 631 if (cur_blk >= o_end)
1402 * Calculate data blocks count that should be swapped 632 goto out;
1403 * at the first page. 633 } else { /* in_range(o_start, o_blk, o_len) */
1404 */ 634 cur_len += cur_blk - o_start;
1405 if (data_offset_in_page + seq_blocks > blocks_per_page) {
1406 /* Swapped blocks are across pages */
1407 block_len_in_page =
1408 blocks_per_page - data_offset_in_page;
1409 } else {
1410 /* Swapped blocks are in a page */
1411 block_len_in_page = seq_blocks;
1412 } 635 }
1413 636 unwritten = ext4_ext_is_unwritten(ex);
1414 orig_page_offset = seq_start >> 637 if (o_end - o_start < cur_len)
1415 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); 638 cur_len = o_end - o_start;
1416 seq_end_page = (seq_start + seq_blocks - 1) >> 639
1417 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); 640 orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
1418 seq_start = le32_to_cpu(ext_cur->ee_block); 641 orig_inode->i_blkbits);
1419 rest_blocks = seq_blocks; 642 donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
1420 643 donor_inode->i_blkbits);
644 offset_in_page = o_start % blocks_per_page;
645 if (cur_len > blocks_per_page- offset_in_page)
646 cur_len = blocks_per_page - offset_in_page;
1421 /* 647 /*
1422 * Up semaphore to avoid following problems: 648 * Up semaphore to avoid following problems:
1423 * a. transaction deadlock among ext4_journal_start, 649 * a. transaction deadlock among ext4_journal_start,
@@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1426 * in move_extent_per_page 652 * in move_extent_per_page
1427 */ 653 */
1428 ext4_double_up_write_data_sem(orig_inode, donor_inode); 654 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1429 655 /* Swap original branches with new branches */
1430 while (orig_page_offset <= seq_end_page) { 656 move_extent_per_page(o_filp, donor_inode,
1431 657 orig_page_index, donor_page_index,
1432 /* Swap original branches with new branches */ 658 offset_in_page, cur_len,
1433 block_len_in_page = move_extent_per_page( 659 unwritten, &ret);
1434 o_filp, donor_inode,
1435 orig_page_offset,
1436 data_offset_in_page,
1437 block_len_in_page,
1438 unwritten, &ret);
1439
1440 /* Count how many blocks we have exchanged */
1441 *moved_len += block_len_in_page;
1442 if (ret < 0)
1443 break;
1444 if (*moved_len > len) {
1445 EXT4_ERROR_INODE(orig_inode,
1446 "We replaced blocks too much! "
1447 "sum of replaced: %llu requested: %llu",
1448 *moved_len, len);
1449 ret = -EIO;
1450 break;
1451 }
1452
1453 orig_page_offset++;
1454 data_offset_in_page = 0;
1455 rest_blocks -= block_len_in_page;
1456 if (rest_blocks > blocks_per_page)
1457 block_len_in_page = blocks_per_page;
1458 else
1459 block_len_in_page = rest_blocks;
1460 }
1461
1462 ext4_double_down_write_data_sem(orig_inode, donor_inode); 660 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1463 if (ret < 0) 661 if (ret < 0)
1464 break; 662 break;
1465 663 o_start += cur_len;
1466 /* Decrease buffer counter */ 664 d_start += cur_len;
1467 if (holecheck_path)
1468 ext4_ext_drop_refs(holecheck_path);
1469 ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
1470 if (ret)
1471 break;
1472 depth = holecheck_path->p_depth;
1473
1474 /* Decrease buffer counter */
1475 if (orig_path)
1476 ext4_ext_drop_refs(orig_path);
1477 ret = get_ext_path(orig_inode, seq_start, &orig_path);
1478 if (ret)
1479 break;
1480
1481 ext_cur = holecheck_path[depth].p_ext;
1482 add_blocks = ext4_ext_get_actual_len(ext_cur);
1483 seq_blocks = 0;
1484
1485 } 665 }
666 *moved_len = o_start - orig_blk;
667 if (*moved_len > len)
668 *moved_len = len;
669
1486out: 670out:
1487 if (*moved_len) { 671 if (*moved_len) {
1488 ext4_discard_preallocations(orig_inode); 672 ext4_discard_preallocations(orig_inode);
1489 ext4_discard_preallocations(donor_inode); 673 ext4_discard_preallocations(donor_inode);
1490 } 674 }
1491 675
1492 if (orig_path) { 676 ext4_ext_drop_refs(path);
1493 ext4_ext_drop_refs(orig_path); 677 kfree(path);
1494 kfree(orig_path);
1495 }
1496 if (holecheck_path) {
1497 ext4_ext_drop_refs(holecheck_path);
1498 kfree(holecheck_path);
1499 }
1500 ext4_double_up_write_data_sem(orig_inode, donor_inode); 678 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1501 ext4_inode_resume_unlocked_dio(orig_inode); 679 ext4_inode_resume_unlocked_dio(orig_inode);
1502 ext4_inode_resume_unlocked_dio(donor_inode); 680 ext4_inode_resume_unlocked_dio(donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 603e4ebbd0ac..123798c5ac31 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
53 ext4_lblk_t *block) 53 ext4_lblk_t *block)
54{ 54{
55 struct buffer_head *bh; 55 struct buffer_head *bh;
56 int err = 0; 56 int err;
57 57
58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && 58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 ((inode->i_size >> 10) >= 59 ((inode->i_size >> 10) >=
@@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle,
62 62
63 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 63 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
64 64
65 bh = ext4_bread(handle, inode, *block, 1, &err); 65 bh = ext4_bread(handle, inode, *block, 1);
66 if (!bh) 66 if (IS_ERR(bh))
67 return ERR_PTR(err); 67 return bh;
68 inode->i_size += inode->i_sb->s_blocksize; 68 inode->i_size += inode->i_sb->s_blocksize;
69 EXT4_I(inode)->i_disksize = inode->i_size; 69 EXT4_I(inode)->i_disksize = inode->i_size;
70 BUFFER_TRACE(bh, "get_write_access"); 70 BUFFER_TRACE(bh, "get_write_access");
@@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
94{ 94{
95 struct buffer_head *bh; 95 struct buffer_head *bh;
96 struct ext4_dir_entry *dirent; 96 struct ext4_dir_entry *dirent;
97 int err = 0, is_dx_block = 0; 97 int is_dx_block = 0;
98 98
99 bh = ext4_bread(NULL, inode, block, 0, &err); 99 bh = ext4_bread(NULL, inode, block, 0);
100 if (!bh) { 100 if (IS_ERR(bh)) {
101 if (err == 0) {
102 ext4_error_inode(inode, __func__, line, block,
103 "Directory hole found");
104 return ERR_PTR(-EIO);
105 }
106 __ext4_warning(inode->i_sb, __func__, line, 101 __ext4_warning(inode->i_sb, __func__, line,
107 "error reading directory block " 102 "error %ld reading directory block "
108 "(ino %lu, block %lu)", inode->i_ino, 103 "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino,
109 (unsigned long) block); 104 (unsigned long) block);
110 return ERR_PTR(err); 105
106 return bh;
107 }
108 if (!bh) {
109 ext4_error_inode(inode, __func__, line, block, "Directory hole found");
110 return ERR_PTR(-EIO);
111 } 111 }
112 dirent = (struct ext4_dir_entry *) bh->b_data; 112 dirent = (struct ext4_dir_entry *) bh->b_data;
113 /* Determine whether or not we have an index block */ 113 /* Determine whether or not we have an index block */
@@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
124 "directory leaf block found instead of index block"); 124 "directory leaf block found instead of index block");
125 return ERR_PTR(-EIO); 125 return ERR_PTR(-EIO);
126 } 126 }
127 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 127 if (!ext4_has_metadata_csum(inode->i_sb) ||
128 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
129 buffer_verified(bh)) 128 buffer_verified(bh))
130 return bh; 129 return bh;
131 130
@@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir);
253static struct dx_frame *dx_probe(const struct qstr *d_name, 252static struct dx_frame *dx_probe(const struct qstr *d_name,
254 struct inode *dir, 253 struct inode *dir,
255 struct dx_hash_info *hinfo, 254 struct dx_hash_info *hinfo,
256 struct dx_frame *frame, 255 struct dx_frame *frame);
257 int *err);
258static void dx_release(struct dx_frame *frames); 256static void dx_release(struct dx_frame *frames);
259static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, 257static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
260 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 258 struct dx_hash_info *hinfo, struct dx_map_entry map[]);
@@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
270 __u32 *start_hash); 268 __u32 *start_hash);
271static struct buffer_head * ext4_dx_find_entry(struct inode *dir, 269static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
272 const struct qstr *d_name, 270 const struct qstr *d_name,
273 struct ext4_dir_entry_2 **res_dir, 271 struct ext4_dir_entry_2 **res_dir);
274 int *err);
275static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 272static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
276 struct inode *inode); 273 struct inode *inode);
277 274
@@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
340{ 337{
341 struct ext4_dir_entry_tail *t; 338 struct ext4_dir_entry_tail *t;
342 339
343 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 340 if (!ext4_has_metadata_csum(inode->i_sb))
344 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
345 return 1; 341 return 1;
346 342
347 t = get_dirent_tail(inode, dirent); 343 t = get_dirent_tail(inode, dirent);
@@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
362{ 358{
363 struct ext4_dir_entry_tail *t; 359 struct ext4_dir_entry_tail *t;
364 360
365 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 361 if (!ext4_has_metadata_csum(inode->i_sb))
366 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
367 return; 362 return;
368 363
369 t = get_dirent_tail(inode, dirent); 364 t = get_dirent_tail(inode, dirent);
@@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
438 struct dx_tail *t; 433 struct dx_tail *t;
439 int count_offset, limit, count; 434 int count_offset, limit, count;
440 435
441 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 436 if (!ext4_has_metadata_csum(inode->i_sb))
442 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
443 return 1; 437 return 1;
444 438
445 c = get_dx_countlimit(inode, dirent, &count_offset); 439 c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
468 struct dx_tail *t; 462 struct dx_tail *t;
469 int count_offset, limit, count; 463 int count_offset, limit, count;
470 464
471 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 465 if (!ext4_has_metadata_csum(inode->i_sb))
472 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
473 return; 466 return;
474 467
475 c = get_dx_countlimit(inode, dirent, &count_offset); 468 c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
557 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - 550 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
558 EXT4_DIR_REC_LEN(2) - infosize; 551 EXT4_DIR_REC_LEN(2) - infosize;
559 552
560 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 553 if (ext4_has_metadata_csum(dir->i_sb))
561 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
562 entry_space -= sizeof(struct dx_tail); 554 entry_space -= sizeof(struct dx_tail);
563 return entry_space / sizeof(struct dx_entry); 555 return entry_space / sizeof(struct dx_entry);
564} 556}
@@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir)
567{ 559{
568 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); 560 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
569 561
570 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 562 if (ext4_has_metadata_csum(dir->i_sb))
571 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
572 entry_space -= sizeof(struct dx_tail); 563 entry_space -= sizeof(struct dx_tail);
573 return entry_space / sizeof(struct dx_entry); 564 return entry_space / sizeof(struct dx_entry);
574} 565}
@@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
641 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; 632 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
642 struct stats stats; 633 struct stats stats;
643 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); 634 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
644 if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; 635 bh = ext4_bread(NULL,dir, block, 0);
636 if (!bh || IS_ERR(bh))
637 continue;
645 stats = levels? 638 stats = levels?
646 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): 639 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
647 dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); 640 dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
@@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
669 */ 662 */
670static struct dx_frame * 663static struct dx_frame *
671dx_probe(const struct qstr *d_name, struct inode *dir, 664dx_probe(const struct qstr *d_name, struct inode *dir,
672 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 665 struct dx_hash_info *hinfo, struct dx_frame *frame_in)
673{ 666{
674 unsigned count, indirect; 667 unsigned count, indirect;
675 struct dx_entry *at, *entries, *p, *q, *m; 668 struct dx_entry *at, *entries, *p, *q, *m;
676 struct dx_root *root; 669 struct dx_root *root;
677 struct buffer_head *bh;
678 struct dx_frame *frame = frame_in; 670 struct dx_frame *frame = frame_in;
671 struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
679 u32 hash; 672 u32 hash;
680 673
681 frame->bh = NULL; 674 frame->bh = ext4_read_dirblock(dir, 0, INDEX);
682 bh = ext4_read_dirblock(dir, 0, INDEX); 675 if (IS_ERR(frame->bh))
683 if (IS_ERR(bh)) { 676 return (struct dx_frame *) frame->bh;
684 *err = PTR_ERR(bh); 677
685 goto fail; 678 root = (struct dx_root *) frame->bh->b_data;
686 }
687 root = (struct dx_root *) bh->b_data;
688 if (root->info.hash_version != DX_HASH_TEA && 679 if (root->info.hash_version != DX_HASH_TEA &&
689 root->info.hash_version != DX_HASH_HALF_MD4 && 680 root->info.hash_version != DX_HASH_HALF_MD4 &&
690 root->info.hash_version != DX_HASH_LEGACY) { 681 root->info.hash_version != DX_HASH_LEGACY) {
691 ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", 682 ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
692 root->info.hash_version); 683 root->info.hash_version);
693 brelse(bh);
694 *err = ERR_BAD_DX_DIR;
695 goto fail; 684 goto fail;
696 } 685 }
697 hinfo->hash_version = root->info.hash_version; 686 hinfo->hash_version = root->info.hash_version;
@@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
705 if (root->info.unused_flags & 1) { 694 if (root->info.unused_flags & 1) {
706 ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", 695 ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
707 root->info.unused_flags); 696 root->info.unused_flags);
708 brelse(bh);
709 *err = ERR_BAD_DX_DIR;
710 goto fail; 697 goto fail;
711 } 698 }
712 699
713 if ((indirect = root->info.indirect_levels) > 1) { 700 if ((indirect = root->info.indirect_levels) > 1) {
714 ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", 701 ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
715 root->info.indirect_levels); 702 root->info.indirect_levels);
716 brelse(bh);
717 *err = ERR_BAD_DX_DIR;
718 goto fail; 703 goto fail;
719 } 704 }
720 705
@@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
724 if (dx_get_limit(entries) != dx_root_limit(dir, 709 if (dx_get_limit(entries) != dx_root_limit(dir,
725 root->info.info_length)) { 710 root->info.info_length)) {
726 ext4_warning(dir->i_sb, "dx entry: limit != root limit"); 711 ext4_warning(dir->i_sb, "dx entry: limit != root limit");
727 brelse(bh);
728 *err = ERR_BAD_DX_DIR;
729 goto fail; 712 goto fail;
730 } 713 }
731 714
732 dxtrace(printk("Look up %x", hash)); 715 dxtrace(printk("Look up %x", hash));
733 while (1) 716 while (1) {
734 {
735 count = dx_get_count(entries); 717 count = dx_get_count(entries);
736 if (!count || count > dx_get_limit(entries)) { 718 if (!count || count > dx_get_limit(entries)) {
737 ext4_warning(dir->i_sb, 719 ext4_warning(dir->i_sb,
738 "dx entry: no count or count > limit"); 720 "dx entry: no count or count > limit");
739 brelse(bh); 721 goto fail;
740 *err = ERR_BAD_DX_DIR;
741 goto fail2;
742 } 722 }
743 723
744 p = entries + 1; 724 p = entries + 1;
745 q = entries + count - 1; 725 q = entries + count - 1;
746 while (p <= q) 726 while (p <= q) {
747 {
748 m = p + (q - p)/2; 727 m = p + (q - p)/2;
749 dxtrace(printk(".")); 728 dxtrace(printk("."));
750 if (dx_get_hash(m) > hash) 729 if (dx_get_hash(m) > hash)
@@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
753 p = m + 1; 732 p = m + 1;
754 } 733 }
755 734
756 if (0) // linear search cross check 735 if (0) { // linear search cross check
757 {
758 unsigned n = count - 1; 736 unsigned n = count - 1;
759 at = entries; 737 at = entries;
760 while (n--) 738 while (n--)
@@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
771 749
772 at = p - 1; 750 at = p - 1;
773 dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); 751 dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
774 frame->bh = bh;
775 frame->entries = entries; 752 frame->entries = entries;
776 frame->at = at; 753 frame->at = at;
777 if (!indirect--) return frame; 754 if (!indirect--)
778 bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); 755 return frame;
779 if (IS_ERR(bh)) { 756 frame++;
780 *err = PTR_ERR(bh); 757 frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
781 goto fail2; 758 if (IS_ERR(frame->bh)) {
759 ret_err = (struct dx_frame *) frame->bh;
760 frame->bh = NULL;
761 goto fail;
782 } 762 }
783 entries = ((struct dx_node *) bh->b_data)->entries; 763 entries = ((struct dx_node *) frame->bh->b_data)->entries;
784 764
785 if (dx_get_limit(entries) != dx_node_limit (dir)) { 765 if (dx_get_limit(entries) != dx_node_limit (dir)) {
786 ext4_warning(dir->i_sb, 766 ext4_warning(dir->i_sb,
787 "dx entry: limit != node limit"); 767 "dx entry: limit != node limit");
788 brelse(bh); 768 goto fail;
789 *err = ERR_BAD_DX_DIR;
790 goto fail2;
791 } 769 }
792 frame++;
793 frame->bh = NULL;
794 } 770 }
795fail2: 771fail:
796 while (frame >= frame_in) { 772 while (frame >= frame_in) {
797 brelse(frame->bh); 773 brelse(frame->bh);
798 frame--; 774 frame--;
799 } 775 }
800fail: 776 if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
801 if (*err == ERR_BAD_DX_DIR)
802 ext4_warning(dir->i_sb, 777 ext4_warning(dir->i_sb,
803 "Corrupt dir inode %lu, running e2fsck is " 778 "Corrupt dir inode %lu, running e2fsck is "
804 "recommended.", dir->i_ino); 779 "recommended.", dir->i_ino);
805 return NULL; 780 return ret_err;
806} 781}
807 782
808static void dx_release (struct dx_frame *frames) 783static void dx_release (struct dx_frame *frames)
@@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
988 } 963 }
989 hinfo.hash = start_hash; 964 hinfo.hash = start_hash;
990 hinfo.minor_hash = 0; 965 hinfo.minor_hash = 0;
991 frame = dx_probe(NULL, dir, &hinfo, frames, &err); 966 frame = dx_probe(NULL, dir, &hinfo, frames);
992 if (!frame) 967 if (IS_ERR(frame))
993 return err; 968 return PTR_ERR(frame);
994 969
995 /* Add '.' and '..' from the htree header */ 970 /* Add '.' and '..' from the htree header */
996 if (!start_hash && !start_minor_hash) { 971 if (!start_hash && !start_minor_hash) {
@@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1227 buffer */ 1202 buffer */
1228 int num = 0; 1203 int num = 0;
1229 ext4_lblk_t nblocks; 1204 ext4_lblk_t nblocks;
1230 int i, err = 0; 1205 int i, namelen;
1231 int namelen;
1232 1206
1233 *res_dir = NULL; 1207 *res_dir = NULL;
1234 sb = dir->i_sb; 1208 sb = dir->i_sb;
@@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1258 goto restart; 1232 goto restart;
1259 } 1233 }
1260 if (is_dx(dir)) { 1234 if (is_dx(dir)) {
1261 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); 1235 bh = ext4_dx_find_entry(dir, d_name, res_dir);
1262 /* 1236 /*
1263 * On success, or if the error was file not found, 1237 * On success, or if the error was file not found,
1264 * return. Otherwise, fall back to doing a search the 1238 * return. Otherwise, fall back to doing a search the
1265 * old fashioned way. 1239 * old fashioned way.
1266 */ 1240 */
1267 if (err == -ENOENT) 1241 if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR)
1268 return NULL;
1269 if (err && err != ERR_BAD_DX_DIR)
1270 return ERR_PTR(err);
1271 if (bh)
1272 return bh; 1242 return bh;
1273 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " 1243 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1274 "falling back\n")); 1244 "falling back\n"));
@@ -1298,10 +1268,10 @@ restart:
1298 break; 1268 break;
1299 } 1269 }
1300 num++; 1270 num++;
1301 bh = ext4_getblk(NULL, dir, b++, 0, &err); 1271 bh = ext4_getblk(NULL, dir, b++, 0);
1302 if (unlikely(err)) { 1272 if (unlikely(IS_ERR(bh))) {
1303 if (ra_max == 0) 1273 if (ra_max == 0)
1304 return ERR_PTR(err); 1274 return bh;
1305 break; 1275 break;
1306 } 1276 }
1307 bh_use[ra_max] = bh; 1277 bh_use[ra_max] = bh;
@@ -1366,7 +1336,7 @@ cleanup_and_exit:
1366} 1336}
1367 1337
1368static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, 1338static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1369 struct ext4_dir_entry_2 **res_dir, int *err) 1339 struct ext4_dir_entry_2 **res_dir)
1370{ 1340{
1371 struct super_block * sb = dir->i_sb; 1341 struct super_block * sb = dir->i_sb;
1372 struct dx_hash_info hinfo; 1342 struct dx_hash_info hinfo;
@@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1375 ext4_lblk_t block; 1345 ext4_lblk_t block;
1376 int retval; 1346 int retval;
1377 1347
1378 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) 1348 frame = dx_probe(d_name, dir, &hinfo, frames);
1379 return NULL; 1349 if (IS_ERR(frame))
1350 return (struct buffer_head *) frame;
1380 do { 1351 do {
1381 block = dx_get_block(frame->at); 1352 block = dx_get_block(frame->at);
1382 bh = ext4_read_dirblock(dir, block, DIRENT); 1353 bh = ext4_read_dirblock(dir, block, DIRENT);
1383 if (IS_ERR(bh)) { 1354 if (IS_ERR(bh))
1384 *err = PTR_ERR(bh);
1385 goto errout; 1355 goto errout;
1386 } 1356
1387 retval = search_dirblock(bh, dir, d_name, 1357 retval = search_dirblock(bh, dir, d_name,
1388 block << EXT4_BLOCK_SIZE_BITS(sb), 1358 block << EXT4_BLOCK_SIZE_BITS(sb),
1389 res_dir); 1359 res_dir);
1390 if (retval == 1) { /* Success! */ 1360 if (retval == 1)
1391 dx_release(frames); 1361 goto success;
1392 return bh;
1393 }
1394 brelse(bh); 1362 brelse(bh);
1395 if (retval == -1) { 1363 if (retval == -1) {
1396 *err = ERR_BAD_DX_DIR; 1364 bh = ERR_PTR(ERR_BAD_DX_DIR);
1397 goto errout; 1365 goto errout;
1398 } 1366 }
1399 1367
@@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1402 frames, NULL); 1370 frames, NULL);
1403 if (retval < 0) { 1371 if (retval < 0) {
1404 ext4_warning(sb, 1372 ext4_warning(sb,
1405 "error reading index page in directory #%lu", 1373 "error %d reading index page in directory #%lu",
1406 dir->i_ino); 1374 retval, dir->i_ino);
1407 *err = retval; 1375 bh = ERR_PTR(retval);
1408 goto errout; 1376 goto errout;
1409 } 1377 }
1410 } while (retval == 1); 1378 } while (retval == 1);
1411 1379
1412 *err = -ENOENT; 1380 bh = NULL;
1413errout: 1381errout:
1414 dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); 1382 dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1415 dx_release (frames); 1383success:
1416 return NULL; 1384 dx_release(frames);
1385 return bh;
1417} 1386}
1418 1387
1419static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 1388static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1441 dentry); 1410 dentry);
1442 return ERR_PTR(-EIO); 1411 return ERR_PTR(-EIO);
1443 } 1412 }
1444 inode = ext4_iget(dir->i_sb, ino); 1413 inode = ext4_iget_normal(dir->i_sb, ino);
1445 if (inode == ERR_PTR(-ESTALE)) { 1414 if (inode == ERR_PTR(-ESTALE)) {
1446 EXT4_ERROR_INODE(dir, 1415 EXT4_ERROR_INODE(dir,
1447 "deleted inode referenced: %u", 1416 "deleted inode referenced: %u",
@@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1474 return ERR_PTR(-EIO); 1443 return ERR_PTR(-EIO);
1475 } 1444 }
1476 1445
1477 return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); 1446 return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
1478} 1447}
1479 1448
1480/* 1449/*
@@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1533 */ 1502 */
1534static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1503static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1535 struct buffer_head **bh,struct dx_frame *frame, 1504 struct buffer_head **bh,struct dx_frame *frame,
1536 struct dx_hash_info *hinfo, int *error) 1505 struct dx_hash_info *hinfo)
1537{ 1506{
1538 unsigned blocksize = dir->i_sb->s_blocksize; 1507 unsigned blocksize = dir->i_sb->s_blocksize;
1539 unsigned count, continued; 1508 unsigned count, continued;
@@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1548 int csum_size = 0; 1517 int csum_size = 0;
1549 int err = 0, i; 1518 int err = 0, i;
1550 1519
1551 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 1520 if (ext4_has_metadata_csum(dir->i_sb))
1552 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1553 csum_size = sizeof(struct ext4_dir_entry_tail); 1521 csum_size = sizeof(struct ext4_dir_entry_tail);
1554 1522
1555 bh2 = ext4_append(handle, dir, &newblock); 1523 bh2 = ext4_append(handle, dir, &newblock);
1556 if (IS_ERR(bh2)) { 1524 if (IS_ERR(bh2)) {
1557 brelse(*bh); 1525 brelse(*bh);
1558 *bh = NULL; 1526 *bh = NULL;
1559 *error = PTR_ERR(bh2); 1527 return (struct ext4_dir_entry_2 *) bh2;
1560 return NULL;
1561 } 1528 }
1562 1529
1563 BUFFER_TRACE(*bh, "get_write_access"); 1530 BUFFER_TRACE(*bh, "get_write_access");
@@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1617 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); 1584 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1618 1585
1619 /* Which block gets the new entry? */ 1586 /* Which block gets the new entry? */
1620 if (hinfo->hash >= hash2) 1587 if (hinfo->hash >= hash2) {
1621 {
1622 swap(*bh, bh2); 1588 swap(*bh, bh2);
1623 de = de2; 1589 de = de2;
1624 } 1590 }
@@ -1638,8 +1604,7 @@ journal_error:
1638 brelse(bh2); 1604 brelse(bh2);
1639 *bh = NULL; 1605 *bh = NULL;
1640 ext4_std_error(dir->i_sb, err); 1606 ext4_std_error(dir->i_sb, err);
1641 *error = err; 1607 return ERR_PTR(err);
1642 return NULL;
1643} 1608}
1644 1609
1645int ext4_find_dest_de(struct inode *dir, struct inode *inode, 1610int ext4_find_dest_de(struct inode *dir, struct inode *inode,
@@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1718 int csum_size = 0; 1683 int csum_size = 0;
1719 int err; 1684 int err;
1720 1685
1721 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1686 if (ext4_has_metadata_csum(inode->i_sb))
1722 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1723 csum_size = sizeof(struct ext4_dir_entry_tail); 1687 csum_size = sizeof(struct ext4_dir_entry_tail);
1724 1688
1725 if (!de) { 1689 if (!de) {
@@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1786 struct fake_dirent *fde; 1750 struct fake_dirent *fde;
1787 int csum_size = 0; 1751 int csum_size = 0;
1788 1752
1789 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1753 if (ext4_has_metadata_csum(inode->i_sb))
1790 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1791 csum_size = sizeof(struct ext4_dir_entry_tail); 1754 csum_size = sizeof(struct ext4_dir_entry_tail);
1792 1755
1793 blocksize = dir->i_sb->s_blocksize; 1756 blocksize = dir->i_sb->s_blocksize;
@@ -1862,8 +1825,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1862 ext4_handle_dirty_dx_node(handle, dir, frame->bh); 1825 ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1863 ext4_handle_dirty_dirent_node(handle, dir, bh); 1826 ext4_handle_dirty_dirent_node(handle, dir, bh);
1864 1827
1865 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1828 de = do_split(handle,dir, &bh, frame, &hinfo);
1866 if (!de) { 1829 if (IS_ERR(de)) {
1867 /* 1830 /*
1868 * Even if the block split failed, we have to properly write 1831 * Even if the block split failed, we have to properly write
1869 * out all the changes we did so far. Otherwise we can end up 1832 * out all the changes we did so far. Otherwise we can end up
@@ -1871,7 +1834,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1871 */ 1834 */
1872 ext4_mark_inode_dirty(handle, dir); 1835 ext4_mark_inode_dirty(handle, dir);
1873 dx_release(frames); 1836 dx_release(frames);
1874 return retval; 1837 return PTR_ERR(de);
1875 } 1838 }
1876 dx_release(frames); 1839 dx_release(frames);
1877 1840
@@ -1904,8 +1867,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1904 ext4_lblk_t block, blocks; 1867 ext4_lblk_t block, blocks;
1905 int csum_size = 0; 1868 int csum_size = 0;
1906 1869
1907 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1870 if (ext4_has_metadata_csum(inode->i_sb))
1908 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1909 csum_size = sizeof(struct ext4_dir_entry_tail); 1871 csum_size = sizeof(struct ext4_dir_entry_tail);
1910 1872
1911 sb = dir->i_sb; 1873 sb = dir->i_sb;
@@ -1982,9 +1944,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1982 struct ext4_dir_entry_2 *de; 1944 struct ext4_dir_entry_2 *de;
1983 int err; 1945 int err;
1984 1946
1985 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); 1947 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames);
1986 if (!frame) 1948 if (IS_ERR(frame))
1987 return err; 1949 return PTR_ERR(frame);
1988 entries = frame->entries; 1950 entries = frame->entries;
1989 at = frame->at; 1951 at = frame->at;
1990 bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); 1952 bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
@@ -2095,9 +2057,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
2095 goto cleanup; 2057 goto cleanup;
2096 } 2058 }
2097 } 2059 }
2098 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 2060 de = do_split(handle, dir, &bh, frame, &hinfo);
2099 if (!de) 2061 if (IS_ERR(de)) {
2062 err = PTR_ERR(de);
2100 goto cleanup; 2063 goto cleanup;
2064 }
2101 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 2065 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
2102 goto cleanup; 2066 goto cleanup;
2103 2067
@@ -2167,8 +2131,7 @@ static int ext4_delete_entry(handle_t *handle,
2167 return err; 2131 return err;
2168 } 2132 }
2169 2133
2170 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 2134 if (ext4_has_metadata_csum(dir->i_sb))
2171 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2172 csum_size = sizeof(struct ext4_dir_entry_tail); 2135 csum_size = sizeof(struct ext4_dir_entry_tail);
2173 2136
2174 BUFFER_TRACE(bh, "get_write_access"); 2137 BUFFER_TRACE(bh, "get_write_access");
@@ -2387,8 +2350,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2387 int csum_size = 0; 2350 int csum_size = 0;
2388 int err; 2351 int err;
2389 2352
2390 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 2353 if (ext4_has_metadata_csum(dir->i_sb))
2391 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2392 csum_size = sizeof(struct ext4_dir_entry_tail); 2354 csum_size = sizeof(struct ext4_dir_entry_tail);
2393 2355
2394 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 2356 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2403,10 +2365,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2403 dir_block = ext4_append(handle, inode, &block); 2365 dir_block = ext4_append(handle, inode, &block);
2404 if (IS_ERR(dir_block)) 2366 if (IS_ERR(dir_block))
2405 return PTR_ERR(dir_block); 2367 return PTR_ERR(dir_block);
2406 BUFFER_TRACE(dir_block, "get_write_access");
2407 err = ext4_journal_get_write_access(handle, dir_block);
2408 if (err)
2409 goto out;
2410 de = (struct ext4_dir_entry_2 *)dir_block->b_data; 2368 de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2411 ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); 2369 ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2412 set_nlink(inode, 2); 2370 set_nlink(inode, 2);
@@ -2573,7 +2531,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2573 int err = 0, rc; 2531 int err = 0, rc;
2574 bool dirty = false; 2532 bool dirty = false;
2575 2533
2576 if (!sbi->s_journal) 2534 if (!sbi->s_journal || is_bad_inode(inode))
2577 return 0; 2535 return 0;
2578 2536
2579 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && 2537 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
@@ -3190,6 +3148,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3190 } 3148 }
3191} 3149}
3192 3150
3151static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
3152 int credits, handle_t **h)
3153{
3154 struct inode *wh;
3155 handle_t *handle;
3156 int retries = 0;
3157
3158 /*
3159 * for inode block, sb block, group summaries,
3160 * and inode bitmap
3161 */
3162 credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3163 EXT4_XATTR_TRANS_BLOCKS + 4);
3164retry:
3165 wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
3166 &ent->dentry->d_name, 0, NULL,
3167 EXT4_HT_DIR, credits);
3168
3169 handle = ext4_journal_current_handle();
3170 if (IS_ERR(wh)) {
3171 if (handle)
3172 ext4_journal_stop(handle);
3173 if (PTR_ERR(wh) == -ENOSPC &&
3174 ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3175 goto retry;
3176 } else {
3177 *h = handle;
3178 init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3179 wh->i_op = &ext4_special_inode_operations;
3180 }
3181 return wh;
3182}
3183
3193/* 3184/*
3194 * Anybody can rename anything with this: the permission checks are left to the 3185 * Anybody can rename anything with this: the permission checks are left to the
3195 * higher-level routines. 3186 * higher-level routines.
@@ -3199,7 +3190,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3199 * This comes from rename(const char *oldpath, const char *newpath) 3190 * This comes from rename(const char *oldpath, const char *newpath)
3200 */ 3191 */
3201static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, 3192static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3202 struct inode *new_dir, struct dentry *new_dentry) 3193 struct inode *new_dir, struct dentry *new_dentry,
3194 unsigned int flags)
3203{ 3195{
3204 handle_t *handle = NULL; 3196 handle_t *handle = NULL;
3205 struct ext4_renament old = { 3197 struct ext4_renament old = {
@@ -3214,6 +3206,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3214 }; 3206 };
3215 int force_reread; 3207 int force_reread;
3216 int retval; 3208 int retval;
3209 struct inode *whiteout = NULL;
3210 int credits;
3211 u8 old_file_type;
3217 3212
3218 dquot_initialize(old.dir); 3213 dquot_initialize(old.dir);
3219 dquot_initialize(new.dir); 3214 dquot_initialize(new.dir);
@@ -3252,11 +3247,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3252 if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) 3247 if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3253 ext4_alloc_da_blocks(old.inode); 3248 ext4_alloc_da_blocks(old.inode);
3254 3249
3255 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, 3250 credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3256 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + 3251 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3257 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); 3252 if (!(flags & RENAME_WHITEOUT)) {
3258 if (IS_ERR(handle)) 3253 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3259 return PTR_ERR(handle); 3254 if (IS_ERR(handle))
3255 return PTR_ERR(handle);
3256 } else {
3257 whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
3258 if (IS_ERR(whiteout))
3259 return PTR_ERR(whiteout);
3260 }
3260 3261
3261 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) 3262 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3262 ext4_handle_sync(handle); 3263 ext4_handle_sync(handle);
@@ -3284,13 +3285,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3284 */ 3285 */
3285 force_reread = (new.dir->i_ino == old.dir->i_ino && 3286 force_reread = (new.dir->i_ino == old.dir->i_ino &&
3286 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); 3287 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3288
3289 old_file_type = old.de->file_type;
3290 if (whiteout) {
3291 /*
3292 * Do this before adding a new entry, so the old entry is sure
3293 * to be still pointing to the valid old entry.
3294 */
3295 retval = ext4_setent(handle, &old, whiteout->i_ino,
3296 EXT4_FT_CHRDEV);
3297 if (retval)
3298 goto end_rename;
3299 ext4_mark_inode_dirty(handle, whiteout);
3300 }
3287 if (!new.bh) { 3301 if (!new.bh) {
3288 retval = ext4_add_entry(handle, new.dentry, old.inode); 3302 retval = ext4_add_entry(handle, new.dentry, old.inode);
3289 if (retval) 3303 if (retval)
3290 goto end_rename; 3304 goto end_rename;
3291 } else { 3305 } else {
3292 retval = ext4_setent(handle, &new, 3306 retval = ext4_setent(handle, &new,
3293 old.inode->i_ino, old.de->file_type); 3307 old.inode->i_ino, old_file_type);
3294 if (retval) 3308 if (retval)
3295 goto end_rename; 3309 goto end_rename;
3296 } 3310 }
@@ -3305,10 +3319,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3305 old.inode->i_ctime = ext4_current_time(old.inode); 3319 old.inode->i_ctime = ext4_current_time(old.inode);
3306 ext4_mark_inode_dirty(handle, old.inode); 3320 ext4_mark_inode_dirty(handle, old.inode);
3307 3321
3308 /* 3322 if (!whiteout) {
3309 * ok, that's it 3323 /*
3310 */ 3324 * ok, that's it
3311 ext4_rename_delete(handle, &old, force_reread); 3325 */
3326 ext4_rename_delete(handle, &old, force_reread);
3327 }
3312 3328
3313 if (new.inode) { 3329 if (new.inode) {
3314 ext4_dec_count(handle, new.inode); 3330 ext4_dec_count(handle, new.inode);
@@ -3344,6 +3360,12 @@ end_rename:
3344 brelse(old.dir_bh); 3360 brelse(old.dir_bh);
3345 brelse(old.bh); 3361 brelse(old.bh);
3346 brelse(new.bh); 3362 brelse(new.bh);
3363 if (whiteout) {
3364 if (retval)
3365 drop_nlink(whiteout);
3366 unlock_new_inode(whiteout);
3367 iput(whiteout);
3368 }
3347 if (handle) 3369 if (handle)
3348 ext4_journal_stop(handle); 3370 ext4_journal_stop(handle);
3349 return retval; 3371 return retval;
@@ -3476,18 +3498,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
3476 struct inode *new_dir, struct dentry *new_dentry, 3498 struct inode *new_dir, struct dentry *new_dentry,
3477 unsigned int flags) 3499 unsigned int flags)
3478{ 3500{
3479 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 3501 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3480 return -EINVAL; 3502 return -EINVAL;
3481 3503
3482 if (flags & RENAME_EXCHANGE) { 3504 if (flags & RENAME_EXCHANGE) {
3483 return ext4_cross_rename(old_dir, old_dentry, 3505 return ext4_cross_rename(old_dir, old_dentry,
3484 new_dir, new_dentry); 3506 new_dir, new_dentry);
3485 } 3507 }
3486 /* 3508
3487 * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" 3509 return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
3488 * is equivalent to regular rename.
3489 */
3490 return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
3491} 3510}
3492 3511
3493/* 3512/*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1e43b905ff98..f298c60f907d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
1212{ 1212{
1213 struct buffer_head *bh; 1213 struct buffer_head *bh;
1214 1214
1215 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 1215 if (!ext4_has_metadata_csum(sb))
1216 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1217 return 0; 1216 return 0;
1218 1217
1219 bh = ext4_get_bitmap(sb, group_data->inode_bitmap); 1218 bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 05c159218bc2..1eda6ab0ef9d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
70static void ext4_clear_journal_err(struct super_block *sb, 70static void ext4_clear_journal_err(struct super_block *sb,
71 struct ext4_super_block *es); 71 struct ext4_super_block *es);
72static int ext4_sync_fs(struct super_block *sb, int wait); 72static int ext4_sync_fs(struct super_block *sb, int wait);
73static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
74static int ext4_remount(struct super_block *sb, int *flags, char *data); 73static int ext4_remount(struct super_block *sb, int *flags, char *data);
75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
76static int ext4_unfreeze(struct super_block *sb); 75static int ext4_unfreeze(struct super_block *sb);
@@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
141static int ext4_superblock_csum_verify(struct super_block *sb, 140static int ext4_superblock_csum_verify(struct super_block *sb,
142 struct ext4_super_block *es) 141 struct ext4_super_block *es)
143{ 142{
144 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 143 if (!ext4_has_metadata_csum(sb))
145 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
146 return 1; 144 return 1;
147 145
148 return es->s_checksum == ext4_superblock_csum(sb, es); 146 return es->s_checksum == ext4_superblock_csum(sb, es);
@@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
152{ 150{
153 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 151 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
154 152
155 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 153 if (!ext4_has_metadata_csum(sb))
156 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
157 return; 154 return;
158 155
159 es->s_checksum = ext4_superblock_csum(sb, es); 156 es->s_checksum = ext4_superblock_csum(sb, es);
@@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb)
820 percpu_counter_destroy(&sbi->s_freeinodes_counter); 817 percpu_counter_destroy(&sbi->s_freeinodes_counter);
821 percpu_counter_destroy(&sbi->s_dirs_counter); 818 percpu_counter_destroy(&sbi->s_dirs_counter);
822 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 819 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
823 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
824 brelse(sbi->s_sbh); 820 brelse(sbi->s_sbh);
825#ifdef CONFIG_QUOTA 821#ifdef CONFIG_QUOTA
826 for (i = 0; i < MAXQUOTAS; i++) 822 for (i = 0; i < EXT4_MAXQUOTAS; i++)
827 kfree(sbi->s_qf_names[i]); 823 kfree(sbi->s_qf_names[i]);
828#endif 824#endif
829 825
@@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
885 ext4_es_init_tree(&ei->i_es_tree); 881 ext4_es_init_tree(&ei->i_es_tree);
886 rwlock_init(&ei->i_es_lock); 882 rwlock_init(&ei->i_es_lock);
887 INIT_LIST_HEAD(&ei->i_es_lru); 883 INIT_LIST_HEAD(&ei->i_es_lru);
884 ei->i_es_all_nr = 0;
888 ei->i_es_lru_nr = 0; 885 ei->i_es_lru_nr = 0;
889 ei->i_touch_when = 0; 886 ei->i_touch_when = 0;
890 ei->i_reserved_data_blocks = 0; 887 ei->i_reserved_data_blocks = 0;
@@ -1002,7 +999,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1002 * Currently we don't know the generation for parent directory, so 999 * Currently we don't know the generation for parent directory, so
1003 * a generation of 0 means "accept any" 1000 * a generation of 0 means "accept any"
1004 */ 1001 */
1005 inode = ext4_iget(sb, ino); 1002 inode = ext4_iget_normal(sb, ino);
1006 if (IS_ERR(inode)) 1003 if (IS_ERR(inode))
1007 return ERR_CAST(inode); 1004 return ERR_CAST(inode);
1008 if (generation && inode->i_generation != generation) { 1005 if (generation && inode->i_generation != generation) {
@@ -1124,25 +1121,6 @@ static const struct super_operations ext4_sops = {
1124 .bdev_try_to_free_page = bdev_try_to_free_page, 1121 .bdev_try_to_free_page = bdev_try_to_free_page,
1125}; 1122};
1126 1123
1127static const struct super_operations ext4_nojournal_sops = {
1128 .alloc_inode = ext4_alloc_inode,
1129 .destroy_inode = ext4_destroy_inode,
1130 .write_inode = ext4_write_inode,
1131 .dirty_inode = ext4_dirty_inode,
1132 .drop_inode = ext4_drop_inode,
1133 .evict_inode = ext4_evict_inode,
1134 .sync_fs = ext4_sync_fs_nojournal,
1135 .put_super = ext4_put_super,
1136 .statfs = ext4_statfs,
1137 .remount_fs = ext4_remount,
1138 .show_options = ext4_show_options,
1139#ifdef CONFIG_QUOTA
1140 .quota_read = ext4_quota_read,
1141 .quota_write = ext4_quota_write,
1142#endif
1143 .bdev_try_to_free_page = bdev_try_to_free_page,
1144};
1145
1146static const struct export_operations ext4_export_ops = { 1124static const struct export_operations ext4_export_ops = {
1147 .fh_to_dentry = ext4_fh_to_dentry, 1125 .fh_to_dentry = ext4_fh_to_dentry,
1148 .fh_to_parent = ext4_fh_to_parent, 1126 .fh_to_parent = ext4_fh_to_parent,
@@ -1712,13 +1690,6 @@ static int parse_options(char *options, struct super_block *sb,
1712 "not specified"); 1690 "not specified");
1713 return 0; 1691 return 0;
1714 } 1692 }
1715 } else {
1716 if (sbi->s_jquota_fmt) {
1717 ext4_msg(sb, KERN_ERR, "journaled quota format "
1718 "specified with no journaling "
1719 "enabled");
1720 return 0;
1721 }
1722 } 1693 }
1723#endif 1694#endif
1724 if (test_opt(sb, DIOREAD_NOLOCK)) { 1695 if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -2016,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2016 __u16 crc = 0; 1987 __u16 crc = 0;
2017 __le32 le_group = cpu_to_le32(block_group); 1988 __le32 le_group = cpu_to_le32(block_group);
2018 1989
2019 if ((sbi->s_es->s_feature_ro_compat & 1990 if (ext4_has_metadata_csum(sbi->s_sb)) {
2020 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
2021 /* Use new metadata_csum algorithm */ 1991 /* Use new metadata_csum algorithm */
2022 __le16 save_csum; 1992 __le16 save_csum;
2023 __u32 csum32; 1993 __u32 csum32;
@@ -2035,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2035 } 2005 }
2036 2006
2037 /* old crc16 code */ 2007 /* old crc16 code */
2008 if (!(sbi->s_es->s_feature_ro_compat &
2009 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
2010 return 0;
2011
2038 offset = offsetof(struct ext4_group_desc, bg_checksum); 2012 offset = offsetof(struct ext4_group_desc, bg_checksum);
2039 2013
2040 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 2014 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2191,7 +2165,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2191 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2165 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2192 /* don't clear list on RO mount w/ errors */ 2166 /* don't clear list on RO mount w/ errors */
2193 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 2167 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2194 jbd_debug(1, "Errors on filesystem, " 2168 ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2195 "clearing orphan list.\n"); 2169 "clearing orphan list.\n");
2196 es->s_last_orphan = 0; 2170 es->s_last_orphan = 0;
2197 } 2171 }
@@ -2207,7 +2181,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2207 /* Needed for iput() to work correctly and not trash data */ 2181 /* Needed for iput() to work correctly and not trash data */
2208 sb->s_flags |= MS_ACTIVE; 2182 sb->s_flags |= MS_ACTIVE;
2209 /* Turn on quotas so that they are updated correctly */ 2183 /* Turn on quotas so that they are updated correctly */
2210 for (i = 0; i < MAXQUOTAS; i++) { 2184 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2211 if (EXT4_SB(sb)->s_qf_names[i]) { 2185 if (EXT4_SB(sb)->s_qf_names[i]) {
2212 int ret = ext4_quota_on_mount(sb, i); 2186 int ret = ext4_quota_on_mount(sb, i);
2213 if (ret < 0) 2187 if (ret < 0)
@@ -2263,7 +2237,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2263 PLURAL(nr_truncates)); 2237 PLURAL(nr_truncates));
2264#ifdef CONFIG_QUOTA 2238#ifdef CONFIG_QUOTA
2265 /* Turn quotas off */ 2239 /* Turn quotas off */
2266 for (i = 0; i < MAXQUOTAS; i++) { 2240 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2267 if (sb_dqopt(sb)->files[i]) 2241 if (sb_dqopt(sb)->files[i])
2268 dquot_quota_off(sb, i); 2242 dquot_quota_off(sb, i);
2269 } 2243 }
@@ -2548,6 +2522,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
2548 return count; 2522 return count;
2549} 2523}
2550 2524
2525static ssize_t es_ui_show(struct ext4_attr *a,
2526 struct ext4_sb_info *sbi, char *buf)
2527{
2528
2529 unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
2530 a->u.offset);
2531
2532 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2533}
2534
2551static ssize_t reserved_clusters_show(struct ext4_attr *a, 2535static ssize_t reserved_clusters_show(struct ext4_attr *a,
2552 struct ext4_sb_info *sbi, char *buf) 2536 struct ext4_sb_info *sbi, char *buf)
2553{ 2537{
@@ -2601,14 +2585,29 @@ static struct ext4_attr ext4_attr_##_name = { \
2601 .offset = offsetof(struct ext4_sb_info, _elname),\ 2585 .offset = offsetof(struct ext4_sb_info, _elname),\
2602 }, \ 2586 }, \
2603} 2587}
2588
2589#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \
2590static struct ext4_attr ext4_attr_##_name = { \
2591 .attr = {.name = __stringify(_name), .mode = _mode }, \
2592 .show = _show, \
2593 .store = _store, \
2594 .u = { \
2595 .offset = offsetof(struct ext4_super_block, _elname), \
2596 }, \
2597}
2598
2604#define EXT4_ATTR(name, mode, show, store) \ 2599#define EXT4_ATTR(name, mode, show, store) \
2605static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2600static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2606 2601
2607#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2602#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2608#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2603#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2609#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2604#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2605
2606#define EXT4_RO_ATTR_ES_UI(name, elname) \
2607 EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
2610#define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2608#define EXT4_RW_ATTR_SBI_UI(name, elname) \
2611 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2609 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2610
2612#define ATTR_LIST(name) &ext4_attr_##name.attr 2611#define ATTR_LIST(name) &ext4_attr_##name.attr
2613#define EXT4_DEPRECATED_ATTR(_name, _val) \ 2612#define EXT4_DEPRECATED_ATTR(_name, _val) \
2614static struct ext4_attr ext4_attr_##_name = { \ 2613static struct ext4_attr ext4_attr_##_name = { \
@@ -2641,6 +2640,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int
2641EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); 2640EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2642EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); 2641EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2643EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); 2642EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2643EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
2644EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
2645EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
2644 2646
2645static struct attribute *ext4_attrs[] = { 2647static struct attribute *ext4_attrs[] = {
2646 ATTR_LIST(delayed_allocation_blocks), 2648 ATTR_LIST(delayed_allocation_blocks),
@@ -2664,6 +2666,9 @@ static struct attribute *ext4_attrs[] = {
2664 ATTR_LIST(warning_ratelimit_burst), 2666 ATTR_LIST(warning_ratelimit_burst),
2665 ATTR_LIST(msg_ratelimit_interval_ms), 2667 ATTR_LIST(msg_ratelimit_interval_ms),
2666 ATTR_LIST(msg_ratelimit_burst), 2668 ATTR_LIST(msg_ratelimit_burst),
2669 ATTR_LIST(errors_count),
2670 ATTR_LIST(first_error_time),
2671 ATTR_LIST(last_error_time),
2667 NULL, 2672 NULL,
2668}; 2673};
2669 2674
@@ -2723,9 +2728,25 @@ static void ext4_feat_release(struct kobject *kobj)
2723 complete(&ext4_feat->f_kobj_unregister); 2728 complete(&ext4_feat->f_kobj_unregister);
2724} 2729}
2725 2730
2731static ssize_t ext4_feat_show(struct kobject *kobj,
2732 struct attribute *attr, char *buf)
2733{
2734 return snprintf(buf, PAGE_SIZE, "supported\n");
2735}
2736
2737/*
2738 * We can not use ext4_attr_show/store because it relies on the kobject
2739 * being embedded in the ext4_sb_info structure which is definitely not
2740 * true in this case.
2741 */
2742static const struct sysfs_ops ext4_feat_ops = {
2743 .show = ext4_feat_show,
2744 .store = NULL,
2745};
2746
2726static struct kobj_type ext4_feat_ktype = { 2747static struct kobj_type ext4_feat_ktype = {
2727 .default_attrs = ext4_feat_attrs, 2748 .default_attrs = ext4_feat_attrs,
2728 .sysfs_ops = &ext4_attr_ops, 2749 .sysfs_ops = &ext4_feat_ops,
2729 .release = ext4_feat_release, 2750 .release = ext4_feat_release,
2730}; 2751};
2731 2752
@@ -3179,8 +3200,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3179 int compat, incompat; 3200 int compat, incompat;
3180 struct ext4_sb_info *sbi = EXT4_SB(sb); 3201 struct ext4_sb_info *sbi = EXT4_SB(sb);
3181 3202
3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3203 if (ext4_has_metadata_csum(sb)) {
3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3184 /* journal checksum v3 */ 3204 /* journal checksum v3 */
3185 compat = 0; 3205 compat = 0;
3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; 3206 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3190,6 +3210,10 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3190 incompat = 0; 3210 incompat = 0;
3191 } 3211 }
3192 3212
3213 jbd2_journal_clear_features(sbi->s_journal,
3214 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3215 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3216 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3193 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3217 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3194 ret = jbd2_journal_set_features(sbi->s_journal, 3218 ret = jbd2_journal_set_features(sbi->s_journal,
3195 compat, 0, 3219 compat, 0,
@@ -3202,11 +3226,8 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3202 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3226 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3203 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3227 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3204 } else { 3228 } else {
3205 jbd2_journal_clear_features(sbi->s_journal, 3229 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3230 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3209 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3210 } 3231 }
3211 3232
3212 return ret; 3233 return ret;
@@ -3436,7 +3457,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3436 logical_sb_block = sb_block; 3457 logical_sb_block = sb_block;
3437 } 3458 }
3438 3459
3439 if (!(bh = sb_bread(sb, logical_sb_block))) { 3460 if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3440 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3461 ext4_msg(sb, KERN_ERR, "unable to read superblock");
3441 goto out_fail; 3462 goto out_fail;
3442 } 3463 }
@@ -3487,8 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3487 } 3508 }
3488 3509
3489 /* Precompute checksum seed for all metadata */ 3510 /* Precompute checksum seed for all metadata */
3490 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3511 if (ext4_has_metadata_csum(sb))
3491 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
3492 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3512 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3493 sizeof(es->s_uuid)); 3513 sizeof(es->s_uuid));
3494 3514
@@ -3519,8 +3539,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3519 set_opt(sb, ERRORS_CONT); 3539 set_opt(sb, ERRORS_CONT);
3520 else 3540 else
3521 set_opt(sb, ERRORS_RO); 3541 set_opt(sb, ERRORS_RO);
3522 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3542 /* block_validity enabled by default; disable with noblock_validity */
3523 set_opt(sb, BLOCK_VALIDITY); 3543 set_opt(sb, BLOCK_VALIDITY);
3524 if (def_mount_opts & EXT4_DEFM_DISCARD) 3544 if (def_mount_opts & EXT4_DEFM_DISCARD)
3525 set_opt(sb, DISCARD); 3545 set_opt(sb, DISCARD);
3526 3546
@@ -3646,7 +3666,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3646 brelse(bh); 3666 brelse(bh);
3647 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3667 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3648 offset = do_div(logical_sb_block, blocksize); 3668 offset = do_div(logical_sb_block, blocksize);
3649 bh = sb_bread(sb, logical_sb_block); 3669 bh = sb_bread_unmovable(sb, logical_sb_block);
3650 if (!bh) { 3670 if (!bh) {
3651 ext4_msg(sb, KERN_ERR, 3671 ext4_msg(sb, KERN_ERR,
3652 "Can't read superblock on 2nd try"); 3672 "Can't read superblock on 2nd try");
@@ -3868,7 +3888,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3868 3888
3869 for (i = 0; i < db_count; i++) { 3889 for (i = 0; i < db_count; i++) {
3870 block = descriptor_loc(sb, logical_sb_block, i); 3890 block = descriptor_loc(sb, logical_sb_block, i);
3871 sbi->s_group_desc[i] = sb_bread(sb, block); 3891 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
3872 if (!sbi->s_group_desc[i]) { 3892 if (!sbi->s_group_desc[i]) {
3873 ext4_msg(sb, KERN_ERR, 3893 ext4_msg(sb, KERN_ERR,
3874 "can't read group descriptor %d", i); 3894 "can't read group descriptor %d", i);
@@ -3890,13 +3910,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3890 sbi->s_err_report.data = (unsigned long) sb; 3910 sbi->s_err_report.data = (unsigned long) sb;
3891 3911
3892 /* Register extent status tree shrinker */ 3912 /* Register extent status tree shrinker */
3893 ext4_es_register_shrinker(sbi); 3913 if (ext4_es_register_shrinker(sbi))
3894
3895 err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
3896 if (err) {
3897 ext4_msg(sb, KERN_ERR, "insufficient memory");
3898 goto failed_mount3; 3914 goto failed_mount3;
3899 }
3900 3915
3901 sbi->s_stripe = ext4_get_stripe_size(sbi); 3916 sbi->s_stripe = ext4_get_stripe_size(sbi);
3902 sbi->s_extent_max_zeroout_kb = 32; 3917 sbi->s_extent_max_zeroout_kb = 32;
@@ -3904,11 +3919,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3904 /* 3919 /*
3905 * set up enough so that it can read an inode 3920 * set up enough so that it can read an inode
3906 */ 3921 */
3907 if (!test_opt(sb, NOLOAD) && 3922 sb->s_op = &ext4_sops;
3908 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3909 sb->s_op = &ext4_sops;
3910 else
3911 sb->s_op = &ext4_nojournal_sops;
3912 sb->s_export_op = &ext4_export_ops; 3923 sb->s_export_op = &ext4_export_ops;
3913 sb->s_xattr = ext4_xattr_handlers; 3924 sb->s_xattr = ext4_xattr_handlers;
3914#ifdef CONFIG_QUOTA 3925#ifdef CONFIG_QUOTA
@@ -4229,10 +4240,9 @@ failed_mount_wq:
4229 jbd2_journal_destroy(sbi->s_journal); 4240 jbd2_journal_destroy(sbi->s_journal);
4230 sbi->s_journal = NULL; 4241 sbi->s_journal = NULL;
4231 } 4242 }
4232failed_mount3:
4233 ext4_es_unregister_shrinker(sbi); 4243 ext4_es_unregister_shrinker(sbi);
4244failed_mount3:
4234 del_timer_sync(&sbi->s_err_report); 4245 del_timer_sync(&sbi->s_err_report);
4235 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4236 if (sbi->s_mmp_tsk) 4246 if (sbi->s_mmp_tsk)
4237 kthread_stop(sbi->s_mmp_tsk); 4247 kthread_stop(sbi->s_mmp_tsk);
4238failed_mount2: 4248failed_mount2:
@@ -4247,7 +4257,7 @@ failed_mount:
4247 remove_proc_entry(sb->s_id, ext4_proc_root); 4257 remove_proc_entry(sb->s_id, ext4_proc_root);
4248 } 4258 }
4249#ifdef CONFIG_QUOTA 4259#ifdef CONFIG_QUOTA
4250 for (i = 0; i < MAXQUOTAS; i++) 4260 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4251 kfree(sbi->s_qf_names[i]); 4261 kfree(sbi->s_qf_names[i]);
4252#endif 4262#endif
4253 ext4_blkdev_remove(sbi); 4263 ext4_blkdev_remove(sbi);
@@ -4375,6 +4385,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
4375 goto out_bdev; 4385 goto out_bdev;
4376 } 4386 }
4377 4387
4388 if ((le32_to_cpu(es->s_feature_ro_compat) &
4389 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4390 es->s_checksum != ext4_superblock_csum(sb, es)) {
4391 ext4_msg(sb, KERN_ERR, "external journal has "
4392 "corrupt superblock");
4393 brelse(bh);
4394 goto out_bdev;
4395 }
4396
4378 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 4397 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4379 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 4398 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4380 brelse(bh); 4399 brelse(bh);
@@ -4677,15 +4696,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4677 * being sent at the end of the function. But we can skip it if 4696 * being sent at the end of the function. But we can skip it if
4678 * transaction_commit will do it for us. 4697 * transaction_commit will do it for us.
4679 */ 4698 */
4680 target = jbd2_get_latest_transaction(sbi->s_journal); 4699 if (sbi->s_journal) {
4681 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && 4700 target = jbd2_get_latest_transaction(sbi->s_journal);
4682 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) 4701 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4702 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4703 needs_barrier = true;
4704
4705 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4706 if (wait)
4707 ret = jbd2_log_wait_commit(sbi->s_journal,
4708 target);
4709 }
4710 } else if (wait && test_opt(sb, BARRIER))
4683 needs_barrier = true; 4711 needs_barrier = true;
4684
4685 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4686 if (wait)
4687 ret = jbd2_log_wait_commit(sbi->s_journal, target);
4688 }
4689 if (needs_barrier) { 4712 if (needs_barrier) {
4690 int err; 4713 int err;
4691 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4714 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
@@ -4696,19 +4719,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4696 return ret; 4719 return ret;
4697} 4720}
4698 4721
4699static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
4700{
4701 int ret = 0;
4702
4703 trace_ext4_sync_fs(sb, wait);
4704 flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4705 dquot_writeback_dquots(sb, -1);
4706 if (wait && test_opt(sb, BARRIER))
4707 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4708
4709 return ret;
4710}
4711
4712/* 4722/*
4713 * LVM calls this function before a (read-only) snapshot is created. This 4723 * LVM calls this function before a (read-only) snapshot is created. This
4714 * gives us a chance to flush the journal completely and mark the fs clean. 4724 * gives us a chance to flush the journal completely and mark the fs clean.
@@ -4727,23 +4737,26 @@ static int ext4_freeze(struct super_block *sb)
4727 4737
4728 journal = EXT4_SB(sb)->s_journal; 4738 journal = EXT4_SB(sb)->s_journal;
4729 4739
4730 /* Now we set up the journal barrier. */ 4740 if (journal) {
4731 jbd2_journal_lock_updates(journal); 4741 /* Now we set up the journal barrier. */
4742 jbd2_journal_lock_updates(journal);
4732 4743
4733 /* 4744 /*
4734 * Don't clear the needs_recovery flag if we failed to flush 4745 * Don't clear the needs_recovery flag if we failed to
4735 * the journal. 4746 * flush the journal.
4736 */ 4747 */
4737 error = jbd2_journal_flush(journal); 4748 error = jbd2_journal_flush(journal);
4738 if (error < 0) 4749 if (error < 0)
4739 goto out; 4750 goto out;
4751 }
4740 4752
4741 /* Journal blocked and flushed, clear needs_recovery flag. */ 4753 /* Journal blocked and flushed, clear needs_recovery flag. */
4742 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4754 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4743 error = ext4_commit_super(sb, 1); 4755 error = ext4_commit_super(sb, 1);
4744out: 4756out:
4745 /* we rely on upper layer to stop further updates */ 4757 if (journal)
4746 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4758 /* we rely on upper layer to stop further updates */
4759 jbd2_journal_unlock_updates(journal);
4747 return error; 4760 return error;
4748} 4761}
4749 4762
@@ -4774,7 +4787,7 @@ struct ext4_mount_options {
4774 u32 s_min_batch_time, s_max_batch_time; 4787 u32 s_min_batch_time, s_max_batch_time;
4775#ifdef CONFIG_QUOTA 4788#ifdef CONFIG_QUOTA
4776 int s_jquota_fmt; 4789 int s_jquota_fmt;
4777 char *s_qf_names[MAXQUOTAS]; 4790 char *s_qf_names[EXT4_MAXQUOTAS];
4778#endif 4791#endif
4779}; 4792};
4780 4793
@@ -4804,7 +4817,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4804 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4817 old_opts.s_max_batch_time = sbi->s_max_batch_time;
4805#ifdef CONFIG_QUOTA 4818#ifdef CONFIG_QUOTA
4806 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4819 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4807 for (i = 0; i < MAXQUOTAS; i++) 4820 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4808 if (sbi->s_qf_names[i]) { 4821 if (sbi->s_qf_names[i]) {
4809 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], 4822 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4810 GFP_KERNEL); 4823 GFP_KERNEL);
@@ -4965,7 +4978,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4965 4978
4966#ifdef CONFIG_QUOTA 4979#ifdef CONFIG_QUOTA
4967 /* Release old quota file names */ 4980 /* Release old quota file names */
4968 for (i = 0; i < MAXQUOTAS; i++) 4981 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4969 kfree(old_opts.s_qf_names[i]); 4982 kfree(old_opts.s_qf_names[i]);
4970 if (enable_quota) { 4983 if (enable_quota) {
4971 if (sb_any_quota_suspended(sb)) 4984 if (sb_any_quota_suspended(sb))
@@ -4994,7 +5007,7 @@ restore_opts:
4994 sbi->s_max_batch_time = old_opts.s_max_batch_time; 5007 sbi->s_max_batch_time = old_opts.s_max_batch_time;
4995#ifdef CONFIG_QUOTA 5008#ifdef CONFIG_QUOTA
4996 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 5009 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4997 for (i = 0; i < MAXQUOTAS; i++) { 5010 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
4998 kfree(sbi->s_qf_names[i]); 5011 kfree(sbi->s_qf_names[i]);
4999 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 5012 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5000 } 5013 }
@@ -5197,7 +5210,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5197{ 5210{
5198 int err; 5211 int err;
5199 struct inode *qf_inode; 5212 struct inode *qf_inode;
5200 unsigned long qf_inums[MAXQUOTAS] = { 5213 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5201 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5214 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5202 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5215 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5203 }; 5216 };
@@ -5225,13 +5238,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5225static int ext4_enable_quotas(struct super_block *sb) 5238static int ext4_enable_quotas(struct super_block *sb)
5226{ 5239{
5227 int type, err = 0; 5240 int type, err = 0;
5228 unsigned long qf_inums[MAXQUOTAS] = { 5241 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5229 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5242 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5230 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5243 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5231 }; 5244 };
5232 5245
5233 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; 5246 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5234 for (type = 0; type < MAXQUOTAS; type++) { 5247 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5235 if (qf_inums[type]) { 5248 if (qf_inums[type]) {
5236 err = ext4_quota_enable(sb, type, QFMT_VFS_V1, 5249 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5237 DQUOT_USAGE_ENABLED); 5250 DQUOT_USAGE_ENABLED);
@@ -5309,7 +5322,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5309{ 5322{
5310 struct inode *inode = sb_dqopt(sb)->files[type]; 5323 struct inode *inode = sb_dqopt(sb)->files[type];
5311 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5324 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5312 int err = 0;
5313 int offset = off & (sb->s_blocksize - 1); 5325 int offset = off & (sb->s_blocksize - 1);
5314 int tocopy; 5326 int tocopy;
5315 size_t toread; 5327 size_t toread;
@@ -5324,9 +5336,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5324 while (toread > 0) { 5336 while (toread > 0) {
5325 tocopy = sb->s_blocksize - offset < toread ? 5337 tocopy = sb->s_blocksize - offset < toread ?
5326 sb->s_blocksize - offset : toread; 5338 sb->s_blocksize - offset : toread;
5327 bh = ext4_bread(NULL, inode, blk, 0, &err); 5339 bh = ext4_bread(NULL, inode, blk, 0);
5328 if (err) 5340 if (IS_ERR(bh))
5329 return err; 5341 return PTR_ERR(bh);
5330 if (!bh) /* A hole? */ 5342 if (!bh) /* A hole? */
5331 memset(data, 0, tocopy); 5343 memset(data, 0, tocopy);
5332 else 5344 else
@@ -5347,8 +5359,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5347{ 5359{
5348 struct inode *inode = sb_dqopt(sb)->files[type]; 5360 struct inode *inode = sb_dqopt(sb)->files[type];
5349 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5361 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5350 int err = 0; 5362 int err, offset = off & (sb->s_blocksize - 1);
5351 int offset = off & (sb->s_blocksize - 1);
5352 struct buffer_head *bh; 5363 struct buffer_head *bh;
5353 handle_t *handle = journal_current_handle(); 5364 handle_t *handle = journal_current_handle();
5354 5365
@@ -5369,14 +5380,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5369 return -EIO; 5380 return -EIO;
5370 } 5381 }
5371 5382
5372 bh = ext4_bread(handle, inode, blk, 1, &err); 5383 bh = ext4_bread(handle, inode, blk, 1);
5384 if (IS_ERR(bh))
5385 return PTR_ERR(bh);
5373 if (!bh) 5386 if (!bh)
5374 goto out; 5387 goto out;
5375 BUFFER_TRACE(bh, "get write access"); 5388 BUFFER_TRACE(bh, "get write access");
5376 err = ext4_journal_get_write_access(handle, bh); 5389 err = ext4_journal_get_write_access(handle, bh);
5377 if (err) { 5390 if (err) {
5378 brelse(bh); 5391 brelse(bh);
5379 goto out; 5392 return err;
5380 } 5393 }
5381 lock_buffer(bh); 5394 lock_buffer(bh);
5382 memcpy(bh->b_data+offset, data, len); 5395 memcpy(bh->b_data+offset, data, len);
@@ -5385,8 +5398,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5385 err = ext4_handle_dirty_metadata(handle, NULL, bh); 5398 err = ext4_handle_dirty_metadata(handle, NULL, bh);
5386 brelse(bh); 5399 brelse(bh);
5387out: 5400out:
5388 if (err)
5389 return err;
5390 if (inode->i_size < off + len) { 5401 if (inode->i_size < off + len) {
5391 i_size_write(inode, off + len); 5402 i_size_write(inode, off + len);
5392 EXT4_I(inode)->i_disksize = inode->i_size; 5403 EXT4_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e7387337060c..1e09fc77395c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
142 sector_t block_nr, 142 sector_t block_nr,
143 struct ext4_xattr_header *hdr) 143 struct ext4_xattr_header *hdr)
144{ 144{
145 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 145 if (ext4_has_metadata_csum(inode->i_sb) &&
146 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
147 (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) 146 (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
148 return 0; 147 return 0;
149 return 1; 148 return 1;
@@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode,
153 sector_t block_nr, 152 sector_t block_nr,
154 struct ext4_xattr_header *hdr) 153 struct ext4_xattr_header *hdr)
155{ 154{
156 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 155 if (!ext4_has_metadata_csum(inode->i_sb))
157 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
158 return; 156 return;
159 157
160 hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); 158 hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
@@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
190} 188}
191 189
192static int 190static int
193ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) 191ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
192 void *value_start)
194{ 193{
195 while (!IS_LAST_ENTRY(entry)) { 194 struct ext4_xattr_entry *e = entry;
196 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); 195
196 while (!IS_LAST_ENTRY(e)) {
197 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
197 if ((void *)next >= end) 198 if ((void *)next >= end)
198 return -EIO; 199 return -EIO;
199 entry = next; 200 e = next;
200 } 201 }
202
203 while (!IS_LAST_ENTRY(entry)) {
204 if (entry->e_value_size != 0 &&
205 (value_start + le16_to_cpu(entry->e_value_offs) <
206 (void *)e + sizeof(__u32) ||
207 value_start + le16_to_cpu(entry->e_value_offs) +
208 le32_to_cpu(entry->e_value_size) > end))
209 return -EIO;
210 entry = EXT4_XATTR_NEXT(entry);
211 }
212
201 return 0; 213 return 0;
202} 214}
203 215
@@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
214 return -EIO; 226 return -EIO;
215 if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) 227 if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
216 return -EIO; 228 return -EIO;
217 error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 229 error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
230 bh->b_data);
218 if (!error) 231 if (!error)
219 set_buffer_verified(bh); 232 set_buffer_verified(bh);
220 return error; 233 return error;
@@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
331 header = IHDR(inode, raw_inode); 344 header = IHDR(inode, raw_inode);
332 entry = IFIRST(header); 345 entry = IFIRST(header);
333 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 346 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
334 error = ext4_xattr_check_names(entry, end); 347 error = ext4_xattr_check_names(entry, end, entry);
335 if (error) 348 if (error)
336 goto cleanup; 349 goto cleanup;
337 error = ext4_xattr_find_entry(&entry, name_index, name, 350 error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
463 raw_inode = ext4_raw_inode(&iloc); 476 raw_inode = ext4_raw_inode(&iloc);
464 header = IHDR(inode, raw_inode); 477 header = IHDR(inode, raw_inode);
465 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 478 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
466 error = ext4_xattr_check_names(IFIRST(header), end); 479 error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
467 if (error) 480 if (error)
468 goto cleanup; 481 goto cleanup;
469 error = ext4_xattr_list_entries(dentry, IFIRST(header), 482 error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -899,14 +912,8 @@ inserted:
899 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 912 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
900 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 913 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
901 914
902 /*
903 * take i_data_sem because we will test
904 * i_delalloc_reserved_flag in ext4_mb_new_blocks
905 */
906 down_read(&EXT4_I(inode)->i_data_sem);
907 block = ext4_new_meta_blocks(handle, inode, goal, 0, 915 block = ext4_new_meta_blocks(handle, inode, goal, 0,
908 NULL, &error); 916 NULL, &error);
909 up_read((&EXT4_I(inode)->i_data_sem));
910 if (error) 917 if (error)
911 goto cleanup; 918 goto cleanup;
912 919
@@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
986 is->s.here = is->s.first; 993 is->s.here = is->s.first;
987 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 994 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
988 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 995 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
989 error = ext4_xattr_check_names(IFIRST(header), is->s.end); 996 error = ext4_xattr_check_names(IFIRST(header), is->s.end,
997 IFIRST(header));
990 if (error) 998 if (error)
991 return error; 999 return error;
992 /* Find the named attribute. */ 1000 /* Find the named attribute. */
diff --git a/fs/internal.h b/fs/internal.h
index 9477f8f6aefc..757ba2abf21e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -47,7 +47,6 @@ extern void __init chrdev_init(void);
47/* 47/*
48 * namei.c 48 * namei.c
49 */ 49 */
50extern int __inode_permission(struct inode *, int);
51extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); 50extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
52extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 51extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
53 const char *, unsigned int, struct path *); 52 const char *, unsigned int, struct path *);
@@ -139,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
139extern int rw_verify_area(int, struct file *, const loff_t *, size_t); 138extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
140 139
141/* 140/*
142 * splice.c
143 */
144extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
145 loff_t *opos, size_t len, unsigned int flags);
146
147/*
148 * pipe.c 141 * pipe.c
149 */ 142 */
150extern const struct file_operations pipefifo_fops; 143extern const struct file_operations pipefifo_fops;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06fe11e0abfa..aab8549591e7 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -886,7 +886,7 @@ journal_t * journal_init_inode (struct inode *inode)
886 goto out_err; 886 goto out_err;
887 } 887 }
888 888
889 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 889 bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
890 if (!bh) { 890 if (!bh) {
891 printk(KERN_ERR 891 printk(KERN_ERR
892 "%s: Cannot get buffer for journal superblock\n", 892 "%s: Cannot get buffer for journal superblock\n",
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 7f34f4716165..988b32ed4c87 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
96 96
97 if (jh->b_transaction == NULL && !buffer_locked(bh) && 97 if (jh->b_transaction == NULL && !buffer_locked(bh) &&
98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
99 /*
100 * Get our reference so that bh cannot be freed before
101 * we unlock it
102 */
103 get_bh(bh);
104 JBUFFER_TRACE(jh, "remove from checkpoint list"); 99 JBUFFER_TRACE(jh, "remove from checkpoint list");
105 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 100 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
106 BUFFER_TRACE(bh, "release");
107 __brelse(bh);
108 } 101 }
109 return ret; 102 return ret;
110} 103}
@@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
122 115
123 nblocks = jbd2_space_needed(journal); 116 nblocks = jbd2_space_needed(journal);
124 while (jbd2_log_space_left(journal) < nblocks) { 117 while (jbd2_log_space_left(journal) < nblocks) {
125 if (journal->j_flags & JBD2_ABORT)
126 return;
127 write_unlock(&journal->j_state_lock); 118 write_unlock(&journal->j_state_lock);
128 mutex_lock(&journal->j_checkpoint_mutex); 119 mutex_lock(&journal->j_checkpoint_mutex);
129 120
@@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal)
139 * trace for forensic evidence. 130 * trace for forensic evidence.
140 */ 131 */
141 write_lock(&journal->j_state_lock); 132 write_lock(&journal->j_state_lock);
133 if (journal->j_flags & JBD2_ABORT) {
134 mutex_unlock(&journal->j_checkpoint_mutex);
135 return;
136 }
142 spin_lock(&journal->j_list_lock); 137 spin_lock(&journal->j_list_lock);
143 nblocks = jbd2_space_needed(journal); 138 nblocks = jbd2_space_needed(journal);
144 space_left = jbd2_log_space_left(journal); 139 space_left = jbd2_log_space_left(journal);
@@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
183 } 178 }
184} 179}
185 180
186/*
187 * Clean up transaction's list of buffers submitted for io.
188 * We wait for any pending IO to complete and remove any clean
189 * buffers. Note that we take the buffers in the opposite ordering
190 * from the one in which they were submitted for IO.
191 *
192 * Return 0 on success, and return <0 if some buffers have failed
193 * to be written out.
194 *
195 * Called with j_list_lock held.
196 */
197static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
198{
199 struct journal_head *jh;
200 struct buffer_head *bh;
201 tid_t this_tid;
202 int released = 0;
203 int ret = 0;
204
205 this_tid = transaction->t_tid;
206restart:
207 /* Did somebody clean up the transaction in the meanwhile? */
208 if (journal->j_checkpoint_transactions != transaction ||
209 transaction->t_tid != this_tid)
210 return ret;
211 while (!released && transaction->t_checkpoint_io_list) {
212 jh = transaction->t_checkpoint_io_list;
213 bh = jh2bh(jh);
214 get_bh(bh);
215 if (buffer_locked(bh)) {
216 spin_unlock(&journal->j_list_lock);
217 wait_on_buffer(bh);
218 /* the journal_head may have gone by now */
219 BUFFER_TRACE(bh, "brelse");
220 __brelse(bh);
221 spin_lock(&journal->j_list_lock);
222 goto restart;
223 }
224 if (unlikely(buffer_write_io_error(bh)))
225 ret = -EIO;
226
227 /*
228 * Now in whatever state the buffer currently is, we know that
229 * it has been written out and so we can drop it from the list
230 */
231 released = __jbd2_journal_remove_checkpoint(jh);
232 __brelse(bh);
233 }
234
235 return ret;
236}
237
238static void 181static void
239__flush_batch(journal_t *journal, int *batch_count) 182__flush_batch(journal_t *journal, int *batch_count)
240{ 183{
@@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count)
255} 198}
256 199
257/* 200/*
258 * Try to flush one buffer from the checkpoint list to disk.
259 *
260 * Return 1 if something happened which requires us to abort the current
261 * scan of the checkpoint list. Return <0 if the buffer has failed to
262 * be written out.
263 *
264 * Called with j_list_lock held and drops it if 1 is returned
265 */
266static int __process_buffer(journal_t *journal, struct journal_head *jh,
267 int *batch_count, transaction_t *transaction)
268{
269 struct buffer_head *bh = jh2bh(jh);
270 int ret = 0;
271
272 if (buffer_locked(bh)) {
273 get_bh(bh);
274 spin_unlock(&journal->j_list_lock);
275 wait_on_buffer(bh);
276 /* the journal_head may have gone by now */
277 BUFFER_TRACE(bh, "brelse");
278 __brelse(bh);
279 ret = 1;
280 } else if (jh->b_transaction != NULL) {
281 transaction_t *t = jh->b_transaction;
282 tid_t tid = t->t_tid;
283
284 transaction->t_chp_stats.cs_forced_to_close++;
285 spin_unlock(&journal->j_list_lock);
286 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
287 /*
288 * The journal thread is dead; so starting and
289 * waiting for a commit to finish will cause
290 * us to wait for a _very_ long time.
291 */
292 printk(KERN_ERR "JBD2: %s: "
293 "Waiting for Godot: block %llu\n",
294 journal->j_devname,
295 (unsigned long long) bh->b_blocknr);
296 jbd2_log_start_commit(journal, tid);
297 jbd2_log_wait_commit(journal, tid);
298 ret = 1;
299 } else if (!buffer_dirty(bh)) {
300 ret = 1;
301 if (unlikely(buffer_write_io_error(bh)))
302 ret = -EIO;
303 get_bh(bh);
304 BUFFER_TRACE(bh, "remove from checkpoint");
305 __jbd2_journal_remove_checkpoint(jh);
306 spin_unlock(&journal->j_list_lock);
307 __brelse(bh);
308 } else {
309 /*
310 * Important: we are about to write the buffer, and
311 * possibly block, while still holding the journal lock.
312 * We cannot afford to let the transaction logic start
313 * messing around with this buffer before we write it to
314 * disk, as that would break recoverability.
315 */
316 BUFFER_TRACE(bh, "queue");
317 get_bh(bh);
318 J_ASSERT_BH(bh, !buffer_jwrite(bh));
319 journal->j_chkpt_bhs[*batch_count] = bh;
320 __buffer_relink_io(jh);
321 transaction->t_chp_stats.cs_written++;
322 (*batch_count)++;
323 if (*batch_count == JBD2_NR_BATCH) {
324 spin_unlock(&journal->j_list_lock);
325 __flush_batch(journal, batch_count);
326 ret = 1;
327 }
328 }
329 return ret;
330}
331
332/*
333 * Perform an actual checkpoint. We take the first transaction on the 201 * Perform an actual checkpoint. We take the first transaction on the
334 * list of transactions to be checkpointed and send all its buffers 202 * list of transactions to be checkpointed and send all its buffers
335 * to disk. We submit larger chunks of data at once. 203 * to disk. We submit larger chunks of data at once.
@@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
339 */ 207 */
340int jbd2_log_do_checkpoint(journal_t *journal) 208int jbd2_log_do_checkpoint(journal_t *journal)
341{ 209{
342 transaction_t *transaction; 210 struct journal_head *jh;
343 tid_t this_tid; 211 struct buffer_head *bh;
344 int result; 212 transaction_t *transaction;
213 tid_t this_tid;
214 int result, batch_count = 0;
345 215
346 jbd_debug(1, "Start checkpoint\n"); 216 jbd_debug(1, "Start checkpoint\n");
347 217
@@ -374,45 +244,117 @@ restart:
374 * done (maybe it's a new transaction, but it fell at the same 244 * done (maybe it's a new transaction, but it fell at the same
375 * address). 245 * address).
376 */ 246 */
377 if (journal->j_checkpoint_transactions == transaction && 247 if (journal->j_checkpoint_transactions != transaction ||
378 transaction->t_tid == this_tid) { 248 transaction->t_tid != this_tid)
379 int batch_count = 0; 249 goto out;
380 struct journal_head *jh; 250
381 int retry = 0, err; 251 /* checkpoint all of the transaction's buffers */
382 252 while (transaction->t_checkpoint_list) {
383 while (!retry && transaction->t_checkpoint_list) { 253 jh = transaction->t_checkpoint_list;
384 jh = transaction->t_checkpoint_list; 254 bh = jh2bh(jh);
385 retry = __process_buffer(journal, jh, &batch_count, 255
386 transaction); 256 if (buffer_locked(bh)) {
387 if (retry < 0 && !result) 257 spin_unlock(&journal->j_list_lock);
388 result = retry; 258 get_bh(bh);
389 if (!retry && (need_resched() || 259 wait_on_buffer(bh);
390 spin_needbreak(&journal->j_list_lock))) { 260 /* the journal_head may have gone by now */
391 spin_unlock(&journal->j_list_lock); 261 BUFFER_TRACE(bh, "brelse");
392 retry = 1; 262 __brelse(bh);
393 break; 263 goto retry;
394 }
395 } 264 }
265 if (jh->b_transaction != NULL) {
266 transaction_t *t = jh->b_transaction;
267 tid_t tid = t->t_tid;
396 268
397 if (batch_count) { 269 transaction->t_chp_stats.cs_forced_to_close++;
398 if (!retry) { 270 spin_unlock(&journal->j_list_lock);
399 spin_unlock(&journal->j_list_lock); 271 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
400 retry = 1; 272 /*
401 } 273 * The journal thread is dead; so
402 __flush_batch(journal, &batch_count); 274 * starting and waiting for a commit
275 * to finish will cause us to wait for
276 * a _very_ long time.
277 */
278 printk(KERN_ERR
279 "JBD2: %s: Waiting for Godot: block %llu\n",
280 journal->j_devname, (unsigned long long) bh->b_blocknr);
281
282 jbd2_log_start_commit(journal, tid);
283 jbd2_log_wait_commit(journal, tid);
284 goto retry;
285 }
286 if (!buffer_dirty(bh)) {
287 if (unlikely(buffer_write_io_error(bh)) && !result)
288 result = -EIO;
289 BUFFER_TRACE(bh, "remove from checkpoint");
290 if (__jbd2_journal_remove_checkpoint(jh))
291 /* The transaction was released; we're done */
292 goto out;
293 continue;
403 } 294 }
295 /*
296 * Important: we are about to write the buffer, and
297 * possibly block, while still holding the journal
298 * lock. We cannot afford to let the transaction
299 * logic start messing around with this buffer before
300 * we write it to disk, as that would break
301 * recoverability.
302 */
303 BUFFER_TRACE(bh, "queue");
304 get_bh(bh);
305 J_ASSERT_BH(bh, !buffer_jwrite(bh));
306 journal->j_chkpt_bhs[batch_count++] = bh;
307 __buffer_relink_io(jh);
308 transaction->t_chp_stats.cs_written++;
309 if ((batch_count == JBD2_NR_BATCH) ||
310 need_resched() ||
311 spin_needbreak(&journal->j_list_lock))
312 goto unlock_and_flush;
313 }
404 314
405 if (retry) { 315 if (batch_count) {
316 unlock_and_flush:
317 spin_unlock(&journal->j_list_lock);
318 retry:
319 if (batch_count)
320 __flush_batch(journal, &batch_count);
406 spin_lock(&journal->j_list_lock); 321 spin_lock(&journal->j_list_lock);
407 goto restart; 322 goto restart;
323 }
324
325 /*
326 * Now we issued all of the transaction's buffers, let's deal
327 * with the buffers that are out for I/O.
328 */
329restart2:
330 /* Did somebody clean up the transaction in the meanwhile? */
331 if (journal->j_checkpoint_transactions != transaction ||
332 transaction->t_tid != this_tid)
333 goto out;
334
335 while (transaction->t_checkpoint_io_list) {
336 jh = transaction->t_checkpoint_io_list;
337 bh = jh2bh(jh);
338 if (buffer_locked(bh)) {
339 spin_unlock(&journal->j_list_lock);
340 get_bh(bh);
341 wait_on_buffer(bh);
342 /* the journal_head may have gone by now */
343 BUFFER_TRACE(bh, "brelse");
344 __brelse(bh);
345 spin_lock(&journal->j_list_lock);
346 goto restart2;
408 } 347 }
348 if (unlikely(buffer_write_io_error(bh)) && !result)
349 result = -EIO;
350
409 /* 351 /*
410 * Now we have cleaned up the first transaction's checkpoint 352 * Now in whatever state the buffer currently is, we
411 * list. Let's clean up the second one 353 * know that it has been written out and so we can
354 * drop it from the list
412 */ 355 */
413 err = __wait_cp_io(journal, transaction); 356 if (__jbd2_journal_remove_checkpoint(jh))
414 if (!result) 357 break;
415 result = err;
416 } 358 }
417out: 359out:
418 spin_unlock(&journal->j_list_lock); 360 spin_unlock(&journal->j_list_lock);
@@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
478 * Find all the written-back checkpoint buffers in the given list and 420 * Find all the written-back checkpoint buffers in the given list and
479 * release them. 421 * release them.
480 * 422 *
481 * Called with the journal locked.
482 * Called with j_list_lock held. 423 * Called with j_list_lock held.
483 * Returns number of buffers reaped (for debug) 424 * Returns 1 if we freed the transaction, 0 otherwise.
484 */ 425 */
485 426static int journal_clean_one_cp_list(struct journal_head *jh)
486static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
487{ 427{
488 struct journal_head *last_jh; 428 struct journal_head *last_jh;
489 struct journal_head *next_jh = jh; 429 struct journal_head *next_jh = jh;
490 int ret, freed = 0; 430 int ret;
431 int freed = 0;
491 432
492 *released = 0;
493 if (!jh) 433 if (!jh)
494 return 0; 434 return 0;
495 435
@@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
498 jh = next_jh; 438 jh = next_jh;
499 next_jh = jh->b_cpnext; 439 next_jh = jh->b_cpnext;
500 ret = __try_to_free_cp_buf(jh); 440 ret = __try_to_free_cp_buf(jh);
501 if (ret) { 441 if (!ret)
502 freed++; 442 return freed;
503 if (ret == 2) { 443 if (ret == 2)
504 *released = 1; 444 return 1;
505 return freed; 445 freed = 1;
506 }
507 }
508 /* 446 /*
509 * This function only frees up some memory 447 * This function only frees up some memory
510 * if possible so we dont have an obligation 448 * if possible so we dont have an obligation
@@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
523 * 461 *
524 * Find all the written-back checkpoint buffers in the journal and release them. 462 * Find all the written-back checkpoint buffers in the journal and release them.
525 * 463 *
526 * Called with the journal locked.
527 * Called with j_list_lock held. 464 * Called with j_list_lock held.
528 * Returns number of buffers reaped (for debug)
529 */ 465 */
530 466void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
531int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
532{ 467{
533 transaction_t *transaction, *last_transaction, *next_transaction; 468 transaction_t *transaction, *last_transaction, *next_transaction;
534 int ret = 0; 469 int ret;
535 int released;
536 470
537 transaction = journal->j_checkpoint_transactions; 471 transaction = journal->j_checkpoint_transactions;
538 if (!transaction) 472 if (!transaction)
539 goto out; 473 return;
540 474
541 last_transaction = transaction->t_cpprev; 475 last_transaction = transaction->t_cpprev;
542 next_transaction = transaction; 476 next_transaction = transaction;
543 do { 477 do {
544 transaction = next_transaction; 478 transaction = next_transaction;
545 next_transaction = transaction->t_cpnext; 479 next_transaction = transaction->t_cpnext;
546 ret += journal_clean_one_cp_list(transaction-> 480 ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
547 t_checkpoint_list, &released);
548 /* 481 /*
549 * This function only frees up some memory if possible so we 482 * This function only frees up some memory if possible so we
550 * dont have an obligation to finish processing. Bail out if 483 * dont have an obligation to finish processing. Bail out if
551 * preemption requested: 484 * preemption requested:
552 */ 485 */
553 if (need_resched()) 486 if (need_resched())
554 goto out; 487 return;
555 if (released) 488 if (ret)
556 continue; 489 continue;
557 /* 490 /*
558 * It is essential that we are as careful as in the case of 491 * It is essential that we are as careful as in the case of
559 * t_checkpoint_list with removing the buffer from the list as 492 * t_checkpoint_list with removing the buffer from the list as
560 * we can possibly see not yet submitted buffers on io_list 493 * we can possibly see not yet submitted buffers on io_list
561 */ 494 */
562 ret += journal_clean_one_cp_list(transaction-> 495 ret = journal_clean_one_cp_list(transaction->
563 t_checkpoint_io_list, &released); 496 t_checkpoint_io_list);
564 if (need_resched()) 497 if (need_resched())
565 goto out; 498 return;
499 /*
500 * Stop scanning if we couldn't free the transaction. This
501 * avoids pointless scanning of transactions which still
502 * weren't checkpointed.
503 */
504 if (!ret)
505 return;
566 } while (transaction != last_transaction); 506 } while (transaction != last_transaction);
567out:
568 return ret;
569} 507}
570 508
571/* 509/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 19d74d86d99c..e4dc74713a43 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1237 goto out_err; 1237 goto out_err;
1238 } 1238 }
1239 1239
1240 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 1240 bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
1241 if (!bh) { 1241 if (!bh) {
1242 printk(KERN_ERR 1242 printk(KERN_ERR
1243 "%s: Cannot get buffer for journal superblock\n", 1243 "%s: Cannot get buffer for journal superblock\n",
@@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal)
1522 goto out; 1522 goto out;
1523 } 1523 }
1524 1524
1525 if (jbd2_journal_has_csum_v2or3(journal) &&
1526 JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
1527 /* Can't have checksum v1 and v2 on at the same time! */
1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
1529 "at the same time!\n");
1530 goto out;
1531 }
1532
1533 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && 1525 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
1534 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 1526 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1535 /* Can't have checksum v2 and v3 at the same time! */ 1527 /* Can't have checksum v2 and v3 at the same time! */
@@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal)
1538 goto out; 1530 goto out;
1539 } 1531 }
1540 1532
1533 if (jbd2_journal_has_csum_v2or3(journal) &&
1534 JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
1535 /* Can't have checksum v1 and v2 on at the same time! */
1536 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
1537 "at the same time!\n");
1538 goto out;
1539 }
1540
1541 if (!jbd2_verify_csum_type(journal, sb)) { 1541 if (!jbd2_verify_csum_type(journal, sb)) {
1542 printk(KERN_ERR "JBD2: Unknown checksum type\n"); 1542 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1543 goto out; 1543 goto out;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 9b329b55ffe3..bcbef08a4d8f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal,
525 !jbd2_descr_block_csum_verify(journal, 525 !jbd2_descr_block_csum_verify(journal,
526 bh->b_data)) { 526 bh->b_data)) {
527 err = -EIO; 527 err = -EIO;
528 brelse(bh);
528 goto failed; 529 goto failed;
529 } 530 }
530 531
diff --git a/fs/namei.c b/fs/namei.c
index 43927d14db67..42df664e95e5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask)
416 416
417 return security_inode_permission(inode, mask); 417 return security_inode_permission(inode, mask);
418} 418}
419EXPORT_SYMBOL(__inode_permission);
419 420
420/** 421/**
421 * sb_permission - Check superblock-level permissions 422 * sb_permission - Check superblock-level permissions
@@ -2383,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path,
2383} 2384}
2384EXPORT_SYMBOL(kern_path_mountpoint); 2385EXPORT_SYMBOL(kern_path_mountpoint);
2385 2386
2386/* 2387int __check_sticky(struct inode *dir, struct inode *inode)
2387 * It's inline, so penalty for filesystems that don't use sticky bit is
2388 * minimal.
2389 */
2390static inline int check_sticky(struct inode *dir, struct inode *inode)
2391{ 2388{
2392 kuid_t fsuid = current_fsuid(); 2389 kuid_t fsuid = current_fsuid();
2393 2390
2394 if (!(dir->i_mode & S_ISVTX))
2395 return 0;
2396 if (uid_eq(inode->i_uid, fsuid)) 2391 if (uid_eq(inode->i_uid, fsuid))
2397 return 0; 2392 return 0;
2398 if (uid_eq(dir->i_uid, fsuid)) 2393 if (uid_eq(dir->i_uid, fsuid))
2399 return 0; 2394 return 0;
2400 return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); 2395 return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
2401} 2396}
2397EXPORT_SYMBOL(__check_sticky);
2402 2398
2403/* 2399/*
2404 * Check whether we can remove a link victim from directory dir, check 2400 * Check whether we can remove a link victim from directory dir, check
@@ -3064,9 +3060,12 @@ finish_open_created:
3064 error = may_open(&nd->path, acc_mode, open_flag); 3060 error = may_open(&nd->path, acc_mode, open_flag);
3065 if (error) 3061 if (error)
3066 goto out; 3062 goto out;
3067 file->f_path.mnt = nd->path.mnt; 3063
3068 error = finish_open(file, nd->path.dentry, NULL, opened); 3064 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
3069 if (error) { 3065 error = vfs_open(&nd->path, file, current_cred());
3066 if (!error) {
3067 *opened |= FILE_OPENED;
3068 } else {
3070 if (error == -EOPENSTALE) 3069 if (error == -EOPENSTALE)
3071 goto stale_open; 3070 goto stale_open;
3072 goto out; 3071 goto out;
@@ -4210,12 +4209,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4210 bool should_retry = false; 4209 bool should_retry = false;
4211 int error; 4210 int error;
4212 4211
4213 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 4212 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
4214 return -EINVAL; 4213 return -EINVAL;
4215 4214
4216 if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) 4215 if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
4216 (flags & RENAME_EXCHANGE))
4217 return -EINVAL; 4217 return -EINVAL;
4218 4218
4219 if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
4220 return -EPERM;
4221
4219retry: 4222retry:
4220 from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); 4223 from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
4221 if (IS_ERR(from)) { 4224 if (IS_ERR(from)) {
@@ -4347,6 +4350,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
4347 return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 4350 return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
4348} 4351}
4349 4352
4353int vfs_whiteout(struct inode *dir, struct dentry *dentry)
4354{
4355 int error = may_create(dir, dentry);
4356 if (error)
4357 return error;
4358
4359 if (!dir->i_op->mknod)
4360 return -EPERM;
4361
4362 return dir->i_op->mknod(dir, dentry,
4363 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
4364}
4365EXPORT_SYMBOL(vfs_whiteout);
4366
4350int readlink_copy(char __user *buffer, int buflen, const char *link) 4367int readlink_copy(char __user *buffer, int buflen, const char *link)
4351{ 4368{
4352 int len = PTR_ERR(link); 4369 int len = PTR_ERR(link);
diff --git a/fs/namespace.c b/fs/namespace.c
index fbba8b17330d..5b66b2b3624d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt)
1686 namespace_unlock(); 1686 namespace_unlock();
1687} 1687}
1688 1688
1689/**
1690 * clone_private_mount - create a private clone of a path
1691 *
1692 * This creates a new vfsmount, which will be the clone of @path. The new will
1693 * not be attached anywhere in the namespace and will be private (i.e. changes
1694 * to the originating mount won't be propagated into this).
1695 *
1696 * Release with mntput().
1697 */
1698struct vfsmount *clone_private_mount(struct path *path)
1699{
1700 struct mount *old_mnt = real_mount(path->mnt);
1701 struct mount *new_mnt;
1702
1703 if (IS_MNT_UNBINDABLE(old_mnt))
1704 return ERR_PTR(-EINVAL);
1705
1706 down_read(&namespace_sem);
1707 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1708 up_read(&namespace_sem);
1709 if (IS_ERR(new_mnt))
1710 return ERR_CAST(new_mnt);
1711
1712 return &new_mnt->mnt;
1713}
1714EXPORT_SYMBOL_GPL(clone_private_mount);
1715
1689int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 1716int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1690 struct vfsmount *root) 1717 struct vfsmount *root)
1691{ 1718{
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6e4bda63000..9e5bc42180e4 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -5,7 +5,7 @@
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Benny Halevy <bhalevy@panasas.com> 7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com> 8 * Boaz Harrosh <ooo@electrozaur.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 11 * it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index c89357c7a914..919efd4a1a23 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -5,7 +5,7 @@
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Benny Halevy <bhalevy@panasas.com> 7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com> 8 * Boaz Harrosh <ooo@electrozaur.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 11 * it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 3a0828d57339..2641dbad345c 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -6,7 +6,7 @@
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * Boaz Harrosh <bharrosh@panasas.com> 9 * Boaz Harrosh <ooo@electrozaur.com>
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 12 * it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index b3918f7ac34d..f093c7ec983b 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -5,7 +5,7 @@
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Benny Halevy <bhalevy@panasas.com> 7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com> 8 * Boaz Harrosh <ooo@electrozaur.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 11 * it under the terms of the GNU General Public License version 2
diff --git a/fs/open.c b/fs/open.c
index d6fd3acde134..de92c13b58be 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags,
823 f = get_empty_filp(); 823 f = get_empty_filp();
824 if (!IS_ERR(f)) { 824 if (!IS_ERR(f)) {
825 f->f_flags = flags; 825 f->f_flags = flags;
826 f->f_path = *path; 826 error = vfs_open(path, f, cred);
827 error = do_dentry_open(f, NULL, cred);
828 if (!error) { 827 if (!error) {
829 /* from now on we need fput() to dispose of f */ 828 /* from now on we need fput() to dispose of f */
830 error = open_check_o_direct(f); 829 error = open_check_o_direct(f);
@@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags,
841} 840}
842EXPORT_SYMBOL(dentry_open); 841EXPORT_SYMBOL(dentry_open);
843 842
843/**
844 * vfs_open - open the file at the given path
845 * @path: path to open
846 * @filp: newly allocated file with f_flag initialized
847 * @cred: credentials to use
848 */
849int vfs_open(const struct path *path, struct file *filp,
850 const struct cred *cred)
851{
852 struct inode *inode = path->dentry->d_inode;
853
854 if (inode->i_op->dentry_open)
855 return inode->i_op->dentry_open(path->dentry, filp, cred);
856 else {
857 filp->f_path = *path;
858 return do_dentry_open(filp, NULL, cred);
859 }
860}
861EXPORT_SYMBOL(vfs_open);
862
844static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 863static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
845{ 864{
846 int lookup_flags = 0; 865 int lookup_flags = 0;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
new file mode 100644
index 000000000000..e60125976873
--- /dev/null
+++ b/fs/overlayfs/Kconfig
@@ -0,0 +1,10 @@
1config OVERLAYFS_FS
2 tristate "Overlay filesystem support"
3 help
4 An overlay filesystem combines two filesystems - an 'upper' filesystem
5 and a 'lower' filesystem. When a name exists in both filesystems, the
6 object in the 'upper' filesystem is visible while the object in the
7 'lower' filesystem is either hidden or, in the case of directories,
8 merged with the 'upper' object.
9
10 For more information see Documentation/filesystems/overlayfs.txt
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
new file mode 100644
index 000000000000..8f91889480d0
--- /dev/null
+++ b/fs/overlayfs/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the overlay filesystem.
3#
4
5obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
6
7overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
new file mode 100644
index 000000000000..ea10a8719107
--- /dev/null
+++ b/fs/overlayfs/copy_up.c
@@ -0,0 +1,414 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/file.h>
13#include <linux/splice.h>
14#include <linux/xattr.h>
15#include <linux/security.h>
16#include <linux/uaccess.h>
17#include <linux/sched.h>
18#include <linux/namei.h>
19#include "overlayfs.h"
20
21#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
22
23int ovl_copy_xattr(struct dentry *old, struct dentry *new)
24{
25 ssize_t list_size, size;
26 char *buf, *name, *value;
27 int error;
28
29 if (!old->d_inode->i_op->getxattr ||
30 !new->d_inode->i_op->getxattr)
31 return 0;
32
33 list_size = vfs_listxattr(old, NULL, 0);
34 if (list_size <= 0) {
35 if (list_size == -EOPNOTSUPP)
36 return 0;
37 return list_size;
38 }
39
40 buf = kzalloc(list_size, GFP_KERNEL);
41 if (!buf)
42 return -ENOMEM;
43
44 error = -ENOMEM;
45 value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
46 if (!value)
47 goto out;
48
49 list_size = vfs_listxattr(old, buf, list_size);
50 if (list_size <= 0) {
51 error = list_size;
52 goto out_free_value;
53 }
54
55 for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
56 size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
57 if (size <= 0) {
58 error = size;
59 goto out_free_value;
60 }
61 error = vfs_setxattr(new, name, value, size, 0);
62 if (error)
63 goto out_free_value;
64 }
65
66out_free_value:
67 kfree(value);
68out:
69 kfree(buf);
70 return error;
71}
72
73static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
74{
75 struct file *old_file;
76 struct file *new_file;
77 loff_t old_pos = 0;
78 loff_t new_pos = 0;
79 int error = 0;
80
81 if (len == 0)
82 return 0;
83
84 old_file = ovl_path_open(old, O_RDONLY);
85 if (IS_ERR(old_file))
86 return PTR_ERR(old_file);
87
88 new_file = ovl_path_open(new, O_WRONLY);
89 if (IS_ERR(new_file)) {
90 error = PTR_ERR(new_file);
91 goto out_fput;
92 }
93
94 /* FIXME: copy up sparse files efficiently */
95 while (len) {
96 size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
97 long bytes;
98
99 if (len < this_len)
100 this_len = len;
101
102 if (signal_pending_state(TASK_KILLABLE, current)) {
103 error = -EINTR;
104 break;
105 }
106
107 bytes = do_splice_direct(old_file, &old_pos,
108 new_file, &new_pos,
109 this_len, SPLICE_F_MOVE);
110 if (bytes <= 0) {
111 error = bytes;
112 break;
113 }
114 WARN_ON(old_pos != new_pos);
115
116 len -= bytes;
117 }
118
119 fput(new_file);
120out_fput:
121 fput(old_file);
122 return error;
123}
124
125static char *ovl_read_symlink(struct dentry *realdentry)
126{
127 int res;
128 char *buf;
129 struct inode *inode = realdentry->d_inode;
130 mm_segment_t old_fs;
131
132 res = -EINVAL;
133 if (!inode->i_op->readlink)
134 goto err;
135
136 res = -ENOMEM;
137 buf = (char *) __get_free_page(GFP_KERNEL);
138 if (!buf)
139 goto err;
140
141 old_fs = get_fs();
142 set_fs(get_ds());
143 /* The cast to a user pointer is valid due to the set_fs() */
144 res = inode->i_op->readlink(realdentry,
145 (char __user *)buf, PAGE_SIZE - 1);
146 set_fs(old_fs);
147 if (res < 0) {
148 free_page((unsigned long) buf);
149 goto err;
150 }
151 buf[res] = '\0';
152
153 return buf;
154
155err:
156 return ERR_PTR(res);
157}
158
159static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
160{
161 struct iattr attr = {
162 .ia_valid =
163 ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
164 .ia_atime = stat->atime,
165 .ia_mtime = stat->mtime,
166 };
167
168 return notify_change(upperdentry, &attr, NULL);
169}
170
171int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
172{
173 int err = 0;
174
175 if (!S_ISLNK(stat->mode)) {
176 struct iattr attr = {
177 .ia_valid = ATTR_MODE,
178 .ia_mode = stat->mode,
179 };
180 err = notify_change(upperdentry, &attr, NULL);
181 }
182 if (!err) {
183 struct iattr attr = {
184 .ia_valid = ATTR_UID | ATTR_GID,
185 .ia_uid = stat->uid,
186 .ia_gid = stat->gid,
187 };
188 err = notify_change(upperdentry, &attr, NULL);
189 }
190 if (!err)
191 ovl_set_timestamps(upperdentry, stat);
192
193 return err;
194
195}
196
197static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
198 struct dentry *dentry, struct path *lowerpath,
199 struct kstat *stat, struct iattr *attr,
200 const char *link)
201{
202 struct inode *wdir = workdir->d_inode;
203 struct inode *udir = upperdir->d_inode;
204 struct dentry *newdentry = NULL;
205 struct dentry *upper = NULL;
206 umode_t mode = stat->mode;
207 int err;
208
209 newdentry = ovl_lookup_temp(workdir, dentry);
210 err = PTR_ERR(newdentry);
211 if (IS_ERR(newdentry))
212 goto out;
213
214 upper = lookup_one_len(dentry->d_name.name, upperdir,
215 dentry->d_name.len);
216 err = PTR_ERR(upper);
217 if (IS_ERR(upper))
218 goto out1;
219
220 /* Can't properly set mode on creation because of the umask */
221 stat->mode &= S_IFMT;
222 err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
223 stat->mode = mode;
224 if (err)
225 goto out2;
226
227 if (S_ISREG(stat->mode)) {
228 struct path upperpath;
229 ovl_path_upper(dentry, &upperpath);
230 BUG_ON(upperpath.dentry != NULL);
231 upperpath.dentry = newdentry;
232
233 err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
234 if (err)
235 goto out_cleanup;
236 }
237
238 err = ovl_copy_xattr(lowerpath->dentry, newdentry);
239 if (err)
240 goto out_cleanup;
241
242 mutex_lock(&newdentry->d_inode->i_mutex);
243 err = ovl_set_attr(newdentry, stat);
244 if (!err && attr)
245 err = notify_change(newdentry, attr, NULL);
246 mutex_unlock(&newdentry->d_inode->i_mutex);
247 if (err)
248 goto out_cleanup;
249
250 err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
251 if (err)
252 goto out_cleanup;
253
254 ovl_dentry_update(dentry, newdentry);
255 newdentry = NULL;
256
257 /*
258 * Non-directores become opaque when copied up.
259 */
260 if (!S_ISDIR(stat->mode))
261 ovl_dentry_set_opaque(dentry, true);
262out2:
263 dput(upper);
264out1:
265 dput(newdentry);
266out:
267 return err;
268
269out_cleanup:
270 ovl_cleanup(wdir, newdentry);
271 goto out;
272}
273
274/*
275 * Copy up a single dentry
276 *
277 * Directory renames only allowed on "pure upper" (already created on
278 * upper filesystem, never copied up). Directories which are on lower or
279 * are merged may not be renamed. For these -EXDEV is returned and
280 * userspace has to deal with it. This means, when copying up a
281 * directory we can rely on it and ancestors being stable.
282 *
283 * Non-directory renames start with copy up of source if necessary. The
284 * actual rename will only proceed once the copy up was successful. Copy
285 * up uses upper parent i_mutex for exclusion. Since rename can change
286 * d_parent it is possible that the copy up will lock the old parent. At
287 * that point the file will have already been copied up anyway.
288 */
289int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
290 struct path *lowerpath, struct kstat *stat,
291 struct iattr *attr)
292{
293 struct dentry *workdir = ovl_workdir(dentry);
294 int err;
295 struct kstat pstat;
296 struct path parentpath;
297 struct dentry *upperdir;
298 struct dentry *upperdentry;
299 const struct cred *old_cred;
300 struct cred *override_cred;
301 char *link = NULL;
302
303 ovl_path_upper(parent, &parentpath);
304 upperdir = parentpath.dentry;
305
306 err = vfs_getattr(&parentpath, &pstat);
307 if (err)
308 return err;
309
310 if (S_ISLNK(stat->mode)) {
311 link = ovl_read_symlink(lowerpath->dentry);
312 if (IS_ERR(link))
313 return PTR_ERR(link);
314 }
315
316 err = -ENOMEM;
317 override_cred = prepare_creds();
318 if (!override_cred)
319 goto out_free_link;
320
321 override_cred->fsuid = stat->uid;
322 override_cred->fsgid = stat->gid;
323 /*
324 * CAP_SYS_ADMIN for copying up extended attributes
325 * CAP_DAC_OVERRIDE for create
326 * CAP_FOWNER for chmod, timestamp update
327 * CAP_FSETID for chmod
328 * CAP_CHOWN for chown
329 * CAP_MKNOD for mknod
330 */
331 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
332 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
333 cap_raise(override_cred->cap_effective, CAP_FOWNER);
334 cap_raise(override_cred->cap_effective, CAP_FSETID);
335 cap_raise(override_cred->cap_effective, CAP_CHOWN);
336 cap_raise(override_cred->cap_effective, CAP_MKNOD);
337 old_cred = override_creds(override_cred);
338
339 err = -EIO;
340 if (lock_rename(workdir, upperdir) != NULL) {
341 pr_err("overlayfs: failed to lock workdir+upperdir\n");
342 goto out_unlock;
343 }
344 upperdentry = ovl_dentry_upper(dentry);
345 if (upperdentry) {
346 unlock_rename(workdir, upperdir);
347 err = 0;
348 /* Raced with another copy-up? Do the setattr here */
349 if (attr) {
350 mutex_lock(&upperdentry->d_inode->i_mutex);
351 err = notify_change(upperdentry, attr, NULL);
352 mutex_unlock(&upperdentry->d_inode->i_mutex);
353 }
354 goto out_put_cred;
355 }
356
357 err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
358 stat, attr, link);
359 if (!err) {
360 /* Restore timestamps on parent (best effort) */
361 ovl_set_timestamps(upperdir, &pstat);
362 }
363out_unlock:
364 unlock_rename(workdir, upperdir);
365out_put_cred:
366 revert_creds(old_cred);
367 put_cred(override_cred);
368
369out_free_link:
370 if (link)
371 free_page((unsigned long) link);
372
373 return err;
374}
375
376int ovl_copy_up(struct dentry *dentry)
377{
378 int err;
379
380 err = 0;
381 while (!err) {
382 struct dentry *next;
383 struct dentry *parent;
384 struct path lowerpath;
385 struct kstat stat;
386 enum ovl_path_type type = ovl_path_type(dentry);
387
388 if (type != OVL_PATH_LOWER)
389 break;
390
391 next = dget(dentry);
392 /* find the topmost dentry not yet copied up */
393 for (;;) {
394 parent = dget_parent(next);
395
396 type = ovl_path_type(parent);
397 if (type != OVL_PATH_LOWER)
398 break;
399
400 dput(next);
401 next = parent;
402 }
403
404 ovl_path_lower(next, &lowerpath);
405 err = vfs_getattr(&lowerpath, &stat);
406 if (!err)
407 err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
408
409 dput(parent);
410 dput(next);
411 }
412
413 return err;
414}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
new file mode 100644
index 000000000000..15cd91ad9940
--- /dev/null
+++ b/fs/overlayfs/dir.c
@@ -0,0 +1,921 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/namei.h>
12#include <linux/xattr.h>
13#include <linux/security.h>
14#include <linux/cred.h>
15#include "overlayfs.h"
16
17void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
18{
19 int err;
20
21 dget(wdentry);
22 if (S_ISDIR(wdentry->d_inode->i_mode))
23 err = ovl_do_rmdir(wdir, wdentry);
24 else
25 err = ovl_do_unlink(wdir, wdentry);
26 dput(wdentry);
27
28 if (err) {
29 pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
30 wdentry, err);
31 }
32}
33
34struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
35{
36 struct dentry *temp;
37 char name[20];
38
39 snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
40
41 temp = lookup_one_len(name, workdir, strlen(name));
42 if (!IS_ERR(temp) && temp->d_inode) {
43 pr_err("overlayfs: workdir/%s already exists\n", name);
44 dput(temp);
45 temp = ERR_PTR(-EIO);
46 }
47
48 return temp;
49}
50
51/* caller holds i_mutex on workdir */
52static struct dentry *ovl_whiteout(struct dentry *workdir,
53 struct dentry *dentry)
54{
55 int err;
56 struct dentry *whiteout;
57 struct inode *wdir = workdir->d_inode;
58
59 whiteout = ovl_lookup_temp(workdir, dentry);
60 if (IS_ERR(whiteout))
61 return whiteout;
62
63 err = ovl_do_whiteout(wdir, whiteout);
64 if (err) {
65 dput(whiteout);
66 whiteout = ERR_PTR(err);
67 }
68
69 return whiteout;
70}
71
72int ovl_create_real(struct inode *dir, struct dentry *newdentry,
73 struct kstat *stat, const char *link,
74 struct dentry *hardlink, bool debug)
75{
76 int err;
77
78 if (newdentry->d_inode)
79 return -ESTALE;
80
81 if (hardlink) {
82 err = ovl_do_link(hardlink, dir, newdentry, debug);
83 } else {
84 switch (stat->mode & S_IFMT) {
85 case S_IFREG:
86 err = ovl_do_create(dir, newdentry, stat->mode, debug);
87 break;
88
89 case S_IFDIR:
90 err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
91 break;
92
93 case S_IFCHR:
94 case S_IFBLK:
95 case S_IFIFO:
96 case S_IFSOCK:
97 err = ovl_do_mknod(dir, newdentry,
98 stat->mode, stat->rdev, debug);
99 break;
100
101 case S_IFLNK:
102 err = ovl_do_symlink(dir, newdentry, link, debug);
103 break;
104
105 default:
106 err = -EPERM;
107 }
108 }
109 if (!err && WARN_ON(!newdentry->d_inode)) {
110 /*
111 * Not quite sure if non-instantiated dentry is legal or not.
112 * VFS doesn't seem to care so check and warn here.
113 */
114 err = -ENOENT;
115 }
116 return err;
117}
118
119static int ovl_set_opaque(struct dentry *upperdentry)
120{
121 return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
122}
123
124static void ovl_remove_opaque(struct dentry *upperdentry)
125{
126 int err;
127
128 err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr);
129 if (err) {
130 pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
131 upperdentry->d_name.name, err);
132 }
133}
134
135static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
136 struct kstat *stat)
137{
138 int err;
139 enum ovl_path_type type;
140 struct path realpath;
141
142 type = ovl_path_real(dentry, &realpath);
143 err = vfs_getattr(&realpath, stat);
144 if (err)
145 return err;
146
147 stat->dev = dentry->d_sb->s_dev;
148 stat->ino = dentry->d_inode->i_ino;
149
150 /*
151 * It's probably not worth it to count subdirs to get the
152 * correct link count. nlink=1 seems to pacify 'find' and
153 * other utilities.
154 */
155 if (type == OVL_PATH_MERGE)
156 stat->nlink = 1;
157
158 return 0;
159}
160
161static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
162 struct kstat *stat, const char *link,
163 struct dentry *hardlink)
164{
165 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
166 struct inode *udir = upperdir->d_inode;
167 struct dentry *newdentry;
168 int err;
169
170 mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
171 newdentry = lookup_one_len(dentry->d_name.name, upperdir,
172 dentry->d_name.len);
173 err = PTR_ERR(newdentry);
174 if (IS_ERR(newdentry))
175 goto out_unlock;
176 err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
177 if (err)
178 goto out_dput;
179
180 ovl_dentry_version_inc(dentry->d_parent);
181 ovl_dentry_update(dentry, newdentry);
182 ovl_copyattr(newdentry->d_inode, inode);
183 d_instantiate(dentry, inode);
184 newdentry = NULL;
185out_dput:
186 dput(newdentry);
187out_unlock:
188 mutex_unlock(&udir->i_mutex);
189 return err;
190}
191
192static int ovl_lock_rename_workdir(struct dentry *workdir,
193 struct dentry *upperdir)
194{
195 /* Workdir should not be the same as upperdir */
196 if (workdir == upperdir)
197 goto err;
198
199 /* Workdir should not be subdir of upperdir and vice versa */
200 if (lock_rename(workdir, upperdir) != NULL)
201 goto err_unlock;
202
203 return 0;
204
205err_unlock:
206 unlock_rename(workdir, upperdir);
207err:
208 pr_err("overlayfs: failed to lock workdir+upperdir\n");
209 return -EIO;
210}
211
212static struct dentry *ovl_clear_empty(struct dentry *dentry,
213 struct list_head *list)
214{
215 struct dentry *workdir = ovl_workdir(dentry);
216 struct inode *wdir = workdir->d_inode;
217 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
218 struct inode *udir = upperdir->d_inode;
219 struct path upperpath;
220 struct dentry *upper;
221 struct dentry *opaquedir;
222 struct kstat stat;
223 int err;
224
225 err = ovl_lock_rename_workdir(workdir, upperdir);
226 if (err)
227 goto out;
228
229 ovl_path_upper(dentry, &upperpath);
230 err = vfs_getattr(&upperpath, &stat);
231 if (err)
232 goto out_unlock;
233
234 err = -ESTALE;
235 if (!S_ISDIR(stat.mode))
236 goto out_unlock;
237 upper = upperpath.dentry;
238 if (upper->d_parent->d_inode != udir)
239 goto out_unlock;
240
241 opaquedir = ovl_lookup_temp(workdir, dentry);
242 err = PTR_ERR(opaquedir);
243 if (IS_ERR(opaquedir))
244 goto out_unlock;
245
246 err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
247 if (err)
248 goto out_dput;
249
250 err = ovl_copy_xattr(upper, opaquedir);
251 if (err)
252 goto out_cleanup;
253
254 err = ovl_set_opaque(opaquedir);
255 if (err)
256 goto out_cleanup;
257
258 mutex_lock(&opaquedir->d_inode->i_mutex);
259 err = ovl_set_attr(opaquedir, &stat);
260 mutex_unlock(&opaquedir->d_inode->i_mutex);
261 if (err)
262 goto out_cleanup;
263
264 err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
265 if (err)
266 goto out_cleanup;
267
268 ovl_cleanup_whiteouts(upper, list);
269 ovl_cleanup(wdir, upper);
270 unlock_rename(workdir, upperdir);
271
272 /* dentry's upper doesn't match now, get rid of it */
273 d_drop(dentry);
274
275 return opaquedir;
276
277out_cleanup:
278 ovl_cleanup(wdir, opaquedir);
279out_dput:
280 dput(opaquedir);
281out_unlock:
282 unlock_rename(workdir, upperdir);
283out:
284 return ERR_PTR(err);
285}
286
287static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry,
288 enum ovl_path_type type)
289{
290 int err;
291 struct dentry *ret = NULL;
292 LIST_HEAD(list);
293
294 err = ovl_check_empty_dir(dentry, &list);
295 if (err)
296 ret = ERR_PTR(err);
297 else if (type == OVL_PATH_MERGE)
298 ret = ovl_clear_empty(dentry, &list);
299
300 ovl_cache_free(&list);
301
302 return ret;
303}
304
305static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
306 struct kstat *stat, const char *link,
307 struct dentry *hardlink)
308{
309 struct dentry *workdir = ovl_workdir(dentry);
310 struct inode *wdir = workdir->d_inode;
311 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
312 struct inode *udir = upperdir->d_inode;
313 struct dentry *upper;
314 struct dentry *newdentry;
315 int err;
316
317 err = ovl_lock_rename_workdir(workdir, upperdir);
318 if (err)
319 goto out;
320
321 newdentry = ovl_lookup_temp(workdir, dentry);
322 err = PTR_ERR(newdentry);
323 if (IS_ERR(newdentry))
324 goto out_unlock;
325
326 upper = lookup_one_len(dentry->d_name.name, upperdir,
327 dentry->d_name.len);
328 err = PTR_ERR(upper);
329 if (IS_ERR(upper))
330 goto out_dput;
331
332 err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
333 if (err)
334 goto out_dput2;
335
336 if (S_ISDIR(stat->mode)) {
337 err = ovl_set_opaque(newdentry);
338 if (err)
339 goto out_cleanup;
340
341 err = ovl_do_rename(wdir, newdentry, udir, upper,
342 RENAME_EXCHANGE);
343 if (err)
344 goto out_cleanup;
345
346 ovl_cleanup(wdir, upper);
347 } else {
348 err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
349 if (err)
350 goto out_cleanup;
351 }
352 ovl_dentry_version_inc(dentry->d_parent);
353 ovl_dentry_update(dentry, newdentry);
354 ovl_copyattr(newdentry->d_inode, inode);
355 d_instantiate(dentry, inode);
356 newdentry = NULL;
357out_dput2:
358 dput(upper);
359out_dput:
360 dput(newdentry);
361out_unlock:
362 unlock_rename(workdir, upperdir);
363out:
364 return err;
365
366out_cleanup:
367 ovl_cleanup(wdir, newdentry);
368 goto out_dput2;
369}
370
371static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
372 const char *link, struct dentry *hardlink)
373{
374 int err;
375 struct inode *inode;
376 struct kstat stat = {
377 .mode = mode,
378 .rdev = rdev,
379 };
380
381 err = -ENOMEM;
382 inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
383 if (!inode)
384 goto out;
385
386 err = ovl_copy_up(dentry->d_parent);
387 if (err)
388 goto out_iput;
389
390 if (!ovl_dentry_is_opaque(dentry)) {
391 err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
392 } else {
393 const struct cred *old_cred;
394 struct cred *override_cred;
395
396 err = -ENOMEM;
397 override_cred = prepare_creds();
398 if (!override_cred)
399 goto out_iput;
400
401 /*
402 * CAP_SYS_ADMIN for setting opaque xattr
403 * CAP_DAC_OVERRIDE for create in workdir, rename
404 * CAP_FOWNER for removing whiteout from sticky dir
405 */
406 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
407 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
408 cap_raise(override_cred->cap_effective, CAP_FOWNER);
409 old_cred = override_creds(override_cred);
410
411 err = ovl_create_over_whiteout(dentry, inode, &stat, link,
412 hardlink);
413
414 revert_creds(old_cred);
415 put_cred(override_cred);
416 }
417
418 if (!err)
419 inode = NULL;
420out_iput:
421 iput(inode);
422out:
423 return err;
424}
425
426static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
427 const char *link)
428{
429 int err;
430
431 err = ovl_want_write(dentry);
432 if (!err) {
433 err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
434 ovl_drop_write(dentry);
435 }
436
437 return err;
438}
439
440static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
441 bool excl)
442{
443 return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
444}
445
446static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
447{
448 return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
449}
450
451static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
452 dev_t rdev)
453{
454 /* Don't allow creation of "whiteout" on overlay */
455 if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
456 return -EPERM;
457
458 return ovl_create_object(dentry, mode, rdev, NULL);
459}
460
461static int ovl_symlink(struct inode *dir, struct dentry *dentry,
462 const char *link)
463{
464 return ovl_create_object(dentry, S_IFLNK, 0, link);
465}
466
467static int ovl_link(struct dentry *old, struct inode *newdir,
468 struct dentry *new)
469{
470 int err;
471 struct dentry *upper;
472
473 err = ovl_want_write(old);
474 if (err)
475 goto out;
476
477 err = ovl_copy_up(old);
478 if (err)
479 goto out_drop_write;
480
481 upper = ovl_dentry_upper(old);
482 err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
483
484out_drop_write:
485 ovl_drop_write(old);
486out:
487 return err;
488}
489
490static int ovl_remove_and_whiteout(struct dentry *dentry,
491 enum ovl_path_type type, bool is_dir)
492{
493 struct dentry *workdir = ovl_workdir(dentry);
494 struct inode *wdir = workdir->d_inode;
495 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
496 struct inode *udir = upperdir->d_inode;
497 struct dentry *whiteout;
498 struct dentry *upper;
499 struct dentry *opaquedir = NULL;
500 int err;
501
502 if (is_dir) {
503 opaquedir = ovl_check_empty_and_clear(dentry, type);
504 err = PTR_ERR(opaquedir);
505 if (IS_ERR(opaquedir))
506 goto out;
507 }
508
509 err = ovl_lock_rename_workdir(workdir, upperdir);
510 if (err)
511 goto out_dput;
512
513 whiteout = ovl_whiteout(workdir, dentry);
514 err = PTR_ERR(whiteout);
515 if (IS_ERR(whiteout))
516 goto out_unlock;
517
518 if (type == OVL_PATH_LOWER) {
519 upper = lookup_one_len(dentry->d_name.name, upperdir,
520 dentry->d_name.len);
521 err = PTR_ERR(upper);
522 if (IS_ERR(upper))
523 goto kill_whiteout;
524
525 err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
526 dput(upper);
527 if (err)
528 goto kill_whiteout;
529 } else {
530 int flags = 0;
531
532 upper = ovl_dentry_upper(dentry);
533 if (opaquedir)
534 upper = opaquedir;
535 err = -ESTALE;
536 if (upper->d_parent != upperdir)
537 goto kill_whiteout;
538
539 if (is_dir)
540 flags |= RENAME_EXCHANGE;
541
542 err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
543 if (err)
544 goto kill_whiteout;
545
546 if (is_dir)
547 ovl_cleanup(wdir, upper);
548 }
549 ovl_dentry_version_inc(dentry->d_parent);
550out_d_drop:
551 d_drop(dentry);
552 dput(whiteout);
553out_unlock:
554 unlock_rename(workdir, upperdir);
555out_dput:
556 dput(opaquedir);
557out:
558 return err;
559
560kill_whiteout:
561 ovl_cleanup(wdir, whiteout);
562 goto out_d_drop;
563}
564
565static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
566{
567 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
568 struct inode *dir = upperdir->d_inode;
569 struct dentry *upper = ovl_dentry_upper(dentry);
570 int err;
571
572 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
573 err = -ESTALE;
574 if (upper->d_parent == upperdir) {
575 /* Don't let d_delete() think it can reset d_inode */
576 dget(upper);
577 if (is_dir)
578 err = vfs_rmdir(dir, upper);
579 else
580 err = vfs_unlink(dir, upper, NULL);
581 dput(upper);
582 ovl_dentry_version_inc(dentry->d_parent);
583 }
584
585 /*
586 * Keeping this dentry hashed would mean having to release
587 * upperpath/lowerpath, which could only be done if we are the
588 * sole user of this dentry. Too tricky... Just unhash for
589 * now.
590 */
591 d_drop(dentry);
592 mutex_unlock(&dir->i_mutex);
593
594 return err;
595}
596
597static inline int ovl_check_sticky(struct dentry *dentry)
598{
599 struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
600 struct inode *inode = ovl_dentry_real(dentry)->d_inode;
601
602 if (check_sticky(dir, inode))
603 return -EPERM;
604
605 return 0;
606}
607
608static int ovl_do_remove(struct dentry *dentry, bool is_dir)
609{
610 enum ovl_path_type type;
611 int err;
612
613 err = ovl_check_sticky(dentry);
614 if (err)
615 goto out;
616
617 err = ovl_want_write(dentry);
618 if (err)
619 goto out;
620
621 err = ovl_copy_up(dentry->d_parent);
622 if (err)
623 goto out_drop_write;
624
625 type = ovl_path_type(dentry);
626 if (type == OVL_PATH_PURE_UPPER) {
627 err = ovl_remove_upper(dentry, is_dir);
628 } else {
629 const struct cred *old_cred;
630 struct cred *override_cred;
631
632 err = -ENOMEM;
633 override_cred = prepare_creds();
634 if (!override_cred)
635 goto out_drop_write;
636
637 /*
638 * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
639 * CAP_DAC_OVERRIDE for create in workdir, rename
640 * CAP_FOWNER for removing whiteout from sticky dir
641 * CAP_FSETID for chmod of opaque dir
642 * CAP_CHOWN for chown of opaque dir
643 */
644 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
645 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
646 cap_raise(override_cred->cap_effective, CAP_FOWNER);
647 cap_raise(override_cred->cap_effective, CAP_FSETID);
648 cap_raise(override_cred->cap_effective, CAP_CHOWN);
649 old_cred = override_creds(override_cred);
650
651 err = ovl_remove_and_whiteout(dentry, type, is_dir);
652
653 revert_creds(old_cred);
654 put_cred(override_cred);
655 }
656out_drop_write:
657 ovl_drop_write(dentry);
658out:
659 return err;
660}
661
662static int ovl_unlink(struct inode *dir, struct dentry *dentry)
663{
664 return ovl_do_remove(dentry, false);
665}
666
667static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
668{
669 return ovl_do_remove(dentry, true);
670}
671
672static int ovl_rename2(struct inode *olddir, struct dentry *old,
673 struct inode *newdir, struct dentry *new,
674 unsigned int flags)
675{
676 int err;
677 enum ovl_path_type old_type;
678 enum ovl_path_type new_type;
679 struct dentry *old_upperdir;
680 struct dentry *new_upperdir;
681 struct dentry *olddentry;
682 struct dentry *newdentry;
683 struct dentry *trap;
684 bool old_opaque;
685 bool new_opaque;
686 bool new_create = false;
687 bool cleanup_whiteout = false;
688 bool overwrite = !(flags & RENAME_EXCHANGE);
689 bool is_dir = S_ISDIR(old->d_inode->i_mode);
690 bool new_is_dir = false;
691 struct dentry *opaquedir = NULL;
692 const struct cred *old_cred = NULL;
693 struct cred *override_cred = NULL;
694
695 err = -EINVAL;
696 if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
697 goto out;
698
699 flags &= ~RENAME_NOREPLACE;
700
701 err = ovl_check_sticky(old);
702 if (err)
703 goto out;
704
705 /* Don't copy up directory trees */
706 old_type = ovl_path_type(old);
707 err = -EXDEV;
708 if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir)
709 goto out;
710
711 if (new->d_inode) {
712 err = ovl_check_sticky(new);
713 if (err)
714 goto out;
715
716 if (S_ISDIR(new->d_inode->i_mode))
717 new_is_dir = true;
718
719 new_type = ovl_path_type(new);
720 err = -EXDEV;
721 if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir)
722 goto out;
723
724 err = 0;
725 if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
726 if (ovl_dentry_lower(old)->d_inode ==
727 ovl_dentry_lower(new)->d_inode)
728 goto out;
729 }
730 if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
731 if (ovl_dentry_upper(old)->d_inode ==
732 ovl_dentry_upper(new)->d_inode)
733 goto out;
734 }
735 } else {
736 if (ovl_dentry_is_opaque(new))
737 new_type = OVL_PATH_UPPER;
738 else
739 new_type = OVL_PATH_PURE_UPPER;
740 }
741
742 err = ovl_want_write(old);
743 if (err)
744 goto out;
745
746 err = ovl_copy_up(old);
747 if (err)
748 goto out_drop_write;
749
750 err = ovl_copy_up(new->d_parent);
751 if (err)
752 goto out_drop_write;
753 if (!overwrite) {
754 err = ovl_copy_up(new);
755 if (err)
756 goto out_drop_write;
757 }
758
759 old_opaque = old_type != OVL_PATH_PURE_UPPER;
760 new_opaque = new_type != OVL_PATH_PURE_UPPER;
761
762 if (old_opaque || new_opaque) {
763 err = -ENOMEM;
764 override_cred = prepare_creds();
765 if (!override_cred)
766 goto out_drop_write;
767
768 /*
769 * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
770 * CAP_DAC_OVERRIDE for create in workdir
771 * CAP_FOWNER for removing whiteout from sticky dir
772 * CAP_FSETID for chmod of opaque dir
773 * CAP_CHOWN for chown of opaque dir
774 */
775 cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
776 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
777 cap_raise(override_cred->cap_effective, CAP_FOWNER);
778 cap_raise(override_cred->cap_effective, CAP_FSETID);
779 cap_raise(override_cred->cap_effective, CAP_CHOWN);
780 old_cred = override_creds(override_cred);
781 }
782
783 if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) {
784 opaquedir = ovl_check_empty_and_clear(new, new_type);
785 err = PTR_ERR(opaquedir);
786 if (IS_ERR(opaquedir)) {
787 opaquedir = NULL;
788 goto out_revert_creds;
789 }
790 }
791
792 if (overwrite) {
793 if (old_opaque) {
794 if (new->d_inode || !new_opaque) {
795 /* Whiteout source */
796 flags |= RENAME_WHITEOUT;
797 } else {
798 /* Switch whiteouts */
799 flags |= RENAME_EXCHANGE;
800 }
801 } else if (is_dir && !new->d_inode && new_opaque) {
802 flags |= RENAME_EXCHANGE;
803 cleanup_whiteout = true;
804 }
805 }
806
807 old_upperdir = ovl_dentry_upper(old->d_parent);
808 new_upperdir = ovl_dentry_upper(new->d_parent);
809
810 trap = lock_rename(new_upperdir, old_upperdir);
811
812 olddentry = ovl_dentry_upper(old);
813 newdentry = ovl_dentry_upper(new);
814 if (newdentry) {
815 if (opaquedir) {
816 newdentry = opaquedir;
817 opaquedir = NULL;
818 } else {
819 dget(newdentry);
820 }
821 } else {
822 new_create = true;
823 newdentry = lookup_one_len(new->d_name.name, new_upperdir,
824 new->d_name.len);
825 err = PTR_ERR(newdentry);
826 if (IS_ERR(newdentry))
827 goto out_unlock;
828 }
829
830 err = -ESTALE;
831 if (olddentry->d_parent != old_upperdir)
832 goto out_dput;
833 if (newdentry->d_parent != new_upperdir)
834 goto out_dput;
835 if (olddentry == trap)
836 goto out_dput;
837 if (newdentry == trap)
838 goto out_dput;
839
840 if (is_dir && !old_opaque && new_opaque) {
841 err = ovl_set_opaque(olddentry);
842 if (err)
843 goto out_dput;
844 }
845 if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
846 err = ovl_set_opaque(newdentry);
847 if (err)
848 goto out_dput;
849 }
850
851 if (old_opaque || new_opaque) {
852 err = ovl_do_rename(old_upperdir->d_inode, olddentry,
853 new_upperdir->d_inode, newdentry,
854 flags);
855 } else {
856 /* No debug for the plain case */
857 BUG_ON(flags & ~RENAME_EXCHANGE);
858 err = vfs_rename(old_upperdir->d_inode, olddentry,
859 new_upperdir->d_inode, newdentry,
860 NULL, flags);
861 }
862
863 if (err) {
864 if (is_dir && !old_opaque && new_opaque)
865 ovl_remove_opaque(olddentry);
866 if (!overwrite && new_is_dir && old_opaque && !new_opaque)
867 ovl_remove_opaque(newdentry);
868 goto out_dput;
869 }
870
871 if (is_dir && old_opaque && !new_opaque)
872 ovl_remove_opaque(olddentry);
873 if (!overwrite && new_is_dir && !old_opaque && new_opaque)
874 ovl_remove_opaque(newdentry);
875
876 if (old_opaque != new_opaque) {
877 ovl_dentry_set_opaque(old, new_opaque);
878 if (!overwrite)
879 ovl_dentry_set_opaque(new, old_opaque);
880 }
881
882 if (cleanup_whiteout)
883 ovl_cleanup(old_upperdir->d_inode, newdentry);
884
885 ovl_dentry_version_inc(old->d_parent);
886 ovl_dentry_version_inc(new->d_parent);
887
888out_dput:
889 dput(newdentry);
890out_unlock:
891 unlock_rename(new_upperdir, old_upperdir);
892out_revert_creds:
893 if (old_opaque || new_opaque) {
894 revert_creds(old_cred);
895 put_cred(override_cred);
896 }
897out_drop_write:
898 ovl_drop_write(old);
899out:
900 dput(opaquedir);
901 return err;
902}
903
904const struct inode_operations ovl_dir_inode_operations = {
905 .lookup = ovl_lookup,
906 .mkdir = ovl_mkdir,
907 .symlink = ovl_symlink,
908 .unlink = ovl_unlink,
909 .rmdir = ovl_rmdir,
910 .rename2 = ovl_rename2,
911 .link = ovl_link,
912 .setattr = ovl_setattr,
913 .create = ovl_create,
914 .mknod = ovl_mknod,
915 .permission = ovl_permission,
916 .getattr = ovl_dir_getattr,
917 .setxattr = ovl_setxattr,
918 .getxattr = ovl_getxattr,
919 .listxattr = ovl_listxattr,
920 .removexattr = ovl_removexattr,
921};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
new file mode 100644
index 000000000000..af2d18c9fcee
--- /dev/null
+++ b/fs/overlayfs/inode.c
@@ -0,0 +1,425 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/xattr.h>
13#include "overlayfs.h"
14
15static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
16 bool no_data)
17{
18 int err;
19 struct dentry *parent;
20 struct kstat stat;
21 struct path lowerpath;
22
23 parent = dget_parent(dentry);
24 err = ovl_copy_up(parent);
25 if (err)
26 goto out_dput_parent;
27
28 ovl_path_lower(dentry, &lowerpath);
29 err = vfs_getattr(&lowerpath, &stat);
30 if (err)
31 goto out_dput_parent;
32
33 if (no_data)
34 stat.size = 0;
35
36 err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
37
38out_dput_parent:
39 dput(parent);
40 return err;
41}
42
43int ovl_setattr(struct dentry *dentry, struct iattr *attr)
44{
45 int err;
46 struct dentry *upperdentry;
47
48 err = ovl_want_write(dentry);
49 if (err)
50 goto out;
51
52 upperdentry = ovl_dentry_upper(dentry);
53 if (upperdentry) {
54 mutex_lock(&upperdentry->d_inode->i_mutex);
55 err = notify_change(upperdentry, attr, NULL);
56 mutex_unlock(&upperdentry->d_inode->i_mutex);
57 } else {
58 err = ovl_copy_up_last(dentry, attr, false);
59 }
60 ovl_drop_write(dentry);
61out:
62 return err;
63}
64
65static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
66 struct kstat *stat)
67{
68 struct path realpath;
69
70 ovl_path_real(dentry, &realpath);
71 return vfs_getattr(&realpath, stat);
72}
73
74int ovl_permission(struct inode *inode, int mask)
75{
76 struct ovl_entry *oe;
77 struct dentry *alias = NULL;
78 struct inode *realinode;
79 struct dentry *realdentry;
80 bool is_upper;
81 int err;
82
83 if (S_ISDIR(inode->i_mode)) {
84 oe = inode->i_private;
85 } else if (mask & MAY_NOT_BLOCK) {
86 return -ECHILD;
87 } else {
88 /*
89 * For non-directories find an alias and get the info
90 * from there.
91 */
92 alias = d_find_any_alias(inode);
93 if (WARN_ON(!alias))
94 return -ENOENT;
95
96 oe = alias->d_fsdata;
97 }
98
99 realdentry = ovl_entry_real(oe, &is_upper);
100
101 /* Careful in RCU walk mode */
102 realinode = ACCESS_ONCE(realdentry->d_inode);
103 if (!realinode) {
104 WARN_ON(!(mask & MAY_NOT_BLOCK));
105 err = -ENOENT;
106 goto out_dput;
107 }
108
109 if (mask & MAY_WRITE) {
110 umode_t mode = realinode->i_mode;
111
112 /*
113 * Writes will always be redirected to upper layer, so
114 * ignore lower layer being read-only.
115 *
116 * If the overlay itself is read-only then proceed
117 * with the permission check, don't return EROFS.
118 * This will only happen if this is the lower layer of
119 * another overlayfs.
120 *
121 * If upper fs becomes read-only after the overlay was
122 * constructed return EROFS to prevent modification of
123 * upper layer.
124 */
125 err = -EROFS;
126 if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
127 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
128 goto out_dput;
129 }
130
131 err = __inode_permission(realinode, mask);
132out_dput:
133 dput(alias);
134 return err;
135}
136
137
138struct ovl_link_data {
139 struct dentry *realdentry;
140 void *cookie;
141};
142
143static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
144{
145 void *ret;
146 struct dentry *realdentry;
147 struct inode *realinode;
148
149 realdentry = ovl_dentry_real(dentry);
150 realinode = realdentry->d_inode;
151
152 if (WARN_ON(!realinode->i_op->follow_link))
153 return ERR_PTR(-EPERM);
154
155 ret = realinode->i_op->follow_link(realdentry, nd);
156 if (IS_ERR(ret))
157 return ret;
158
159 if (realinode->i_op->put_link) {
160 struct ovl_link_data *data;
161
162 data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
163 if (!data) {
164 realinode->i_op->put_link(realdentry, nd, ret);
165 return ERR_PTR(-ENOMEM);
166 }
167 data->realdentry = realdentry;
168 data->cookie = ret;
169
170 return data;
171 } else {
172 return NULL;
173 }
174}
175
176static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
177{
178 struct inode *realinode;
179 struct ovl_link_data *data = c;
180
181 if (!data)
182 return;
183
184 realinode = data->realdentry->d_inode;
185 realinode->i_op->put_link(data->realdentry, nd, data->cookie);
186 kfree(data);
187}
188
189static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
190{
191 struct path realpath;
192 struct inode *realinode;
193
194 ovl_path_real(dentry, &realpath);
195 realinode = realpath.dentry->d_inode;
196
197 if (!realinode->i_op->readlink)
198 return -EINVAL;
199
200 touch_atime(&realpath);
201
202 return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
203}
204
205
206static bool ovl_is_private_xattr(const char *name)
207{
208 return strncmp(name, "trusted.overlay.", 14) == 0;
209}
210
211int ovl_setxattr(struct dentry *dentry, const char *name,
212 const void *value, size_t size, int flags)
213{
214 int err;
215 struct dentry *upperdentry;
216
217 err = ovl_want_write(dentry);
218 if (err)
219 goto out;
220
221 err = -EPERM;
222 if (ovl_is_private_xattr(name))
223 goto out_drop_write;
224
225 err = ovl_copy_up(dentry);
226 if (err)
227 goto out_drop_write;
228
229 upperdentry = ovl_dentry_upper(dentry);
230 err = vfs_setxattr(upperdentry, name, value, size, flags);
231
232out_drop_write:
233 ovl_drop_write(dentry);
234out:
235 return err;
236}
237
238ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
239 void *value, size_t size)
240{
241 if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
242 ovl_is_private_xattr(name))
243 return -ENODATA;
244
245 return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
246}
247
248ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
249{
250 ssize_t res;
251 int off;
252
253 res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
254 if (res <= 0 || size == 0)
255 return res;
256
257 if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
258 return res;
259
260 /* filter out private xattrs */
261 for (off = 0; off < res;) {
262 char *s = list + off;
263 size_t slen = strlen(s) + 1;
264
265 BUG_ON(off + slen > res);
266
267 if (ovl_is_private_xattr(s)) {
268 res -= slen;
269 memmove(s, s + slen, res - off);
270 } else {
271 off += slen;
272 }
273 }
274
275 return res;
276}
277
278int ovl_removexattr(struct dentry *dentry, const char *name)
279{
280 int err;
281 struct path realpath;
282 enum ovl_path_type type;
283
284 err = ovl_want_write(dentry);
285 if (err)
286 goto out;
287
288 if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
289 ovl_is_private_xattr(name))
290 goto out_drop_write;
291
292 type = ovl_path_real(dentry, &realpath);
293 if (type == OVL_PATH_LOWER) {
294 err = vfs_getxattr(realpath.dentry, name, NULL, 0);
295 if (err < 0)
296 goto out_drop_write;
297
298 err = ovl_copy_up(dentry);
299 if (err)
300 goto out_drop_write;
301
302 ovl_path_upper(dentry, &realpath);
303 }
304
305 err = vfs_removexattr(realpath.dentry, name);
306out_drop_write:
307 ovl_drop_write(dentry);
308out:
309 return err;
310}
311
312static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
313 struct dentry *realdentry)
314{
315 if (type != OVL_PATH_LOWER)
316 return false;
317
318 if (special_file(realdentry->d_inode->i_mode))
319 return false;
320
321 if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
322 return false;
323
324 return true;
325}
326
327static int ovl_dentry_open(struct dentry *dentry, struct file *file,
328 const struct cred *cred)
329{
330 int err;
331 struct path realpath;
332 enum ovl_path_type type;
333 bool want_write = false;
334
335 type = ovl_path_real(dentry, &realpath);
336 if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
337 want_write = true;
338 err = ovl_want_write(dentry);
339 if (err)
340 goto out;
341
342 if (file->f_flags & O_TRUNC)
343 err = ovl_copy_up_last(dentry, NULL, true);
344 else
345 err = ovl_copy_up(dentry);
346 if (err)
347 goto out_drop_write;
348
349 ovl_path_upper(dentry, &realpath);
350 }
351
352 err = vfs_open(&realpath, file, cred);
353out_drop_write:
354 if (want_write)
355 ovl_drop_write(dentry);
356out:
357 return err;
358}
359
360static const struct inode_operations ovl_file_inode_operations = {
361 .setattr = ovl_setattr,
362 .permission = ovl_permission,
363 .getattr = ovl_getattr,
364 .setxattr = ovl_setxattr,
365 .getxattr = ovl_getxattr,
366 .listxattr = ovl_listxattr,
367 .removexattr = ovl_removexattr,
368 .dentry_open = ovl_dentry_open,
369};
370
371static const struct inode_operations ovl_symlink_inode_operations = {
372 .setattr = ovl_setattr,
373 .follow_link = ovl_follow_link,
374 .put_link = ovl_put_link,
375 .readlink = ovl_readlink,
376 .getattr = ovl_getattr,
377 .setxattr = ovl_setxattr,
378 .getxattr = ovl_getxattr,
379 .listxattr = ovl_listxattr,
380 .removexattr = ovl_removexattr,
381};
382
383struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
384 struct ovl_entry *oe)
385{
386 struct inode *inode;
387
388 inode = new_inode(sb);
389 if (!inode)
390 return NULL;
391
392 mode &= S_IFMT;
393
394 inode->i_ino = get_next_ino();
395 inode->i_mode = mode;
396 inode->i_flags |= S_NOATIME | S_NOCMTIME;
397
398 switch (mode) {
399 case S_IFDIR:
400 inode->i_private = oe;
401 inode->i_op = &ovl_dir_inode_operations;
402 inode->i_fop = &ovl_dir_operations;
403 break;
404
405 case S_IFLNK:
406 inode->i_op = &ovl_symlink_inode_operations;
407 break;
408
409 case S_IFREG:
410 case S_IFSOCK:
411 case S_IFBLK:
412 case S_IFCHR:
413 case S_IFIFO:
414 inode->i_op = &ovl_file_inode_operations;
415 break;
416
417 default:
418 WARN(1, "illegal file type: %i\n", mode);
419 iput(inode);
420 inode = NULL;
421 }
422
423 return inode;
424
425}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
new file mode 100644
index 000000000000..814bed33dd07
--- /dev/null
+++ b/fs/overlayfs/overlayfs.h
@@ -0,0 +1,191 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11
12struct ovl_entry;
13
14enum ovl_path_type {
15 OVL_PATH_PURE_UPPER,
16 OVL_PATH_UPPER,
17 OVL_PATH_MERGE,
18 OVL_PATH_LOWER,
19};
20
21extern const char *ovl_opaque_xattr;
22
23static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
24{
25 int err = vfs_rmdir(dir, dentry);
26 pr_debug("rmdir(%pd2) = %i\n", dentry, err);
27 return err;
28}
29
30static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
31{
32 int err = vfs_unlink(dir, dentry, NULL);
33 pr_debug("unlink(%pd2) = %i\n", dentry, err);
34 return err;
35}
36
37static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
38 struct dentry *new_dentry, bool debug)
39{
40 int err = vfs_link(old_dentry, dir, new_dentry, NULL);
41 if (debug) {
42 pr_debug("link(%pd2, %pd2) = %i\n",
43 old_dentry, new_dentry, err);
44 }
45 return err;
46}
47
48static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
49 umode_t mode, bool debug)
50{
51 int err = vfs_create(dir, dentry, mode, true);
52 if (debug)
53 pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
54 return err;
55}
56
57static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
58 umode_t mode, bool debug)
59{
60 int err = vfs_mkdir(dir, dentry, mode);
61 if (debug)
62 pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
63 return err;
64}
65
66static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
67 umode_t mode, dev_t dev, bool debug)
68{
69 int err = vfs_mknod(dir, dentry, mode, dev);
70 if (debug) {
71 pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
72 dentry, mode, dev, err);
73 }
74 return err;
75}
76
77static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
78 const char *oldname, bool debug)
79{
80 int err = vfs_symlink(dir, dentry, oldname);
81 if (debug)
82 pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
83 return err;
84}
85
86static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
87 const void *value, size_t size, int flags)
88{
89 int err = vfs_setxattr(dentry, name, value, size, flags);
90 pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
91 dentry, name, (int) size, (char *) value, flags, err);
92 return err;
93}
94
95static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
96{
97 int err = vfs_removexattr(dentry, name);
98 pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
99 return err;
100}
101
102static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
103 struct inode *newdir, struct dentry *newdentry,
104 unsigned int flags)
105{
106 int err;
107
108 pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
109 olddentry, newdentry, flags);
110
111 err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
112
113 if (err) {
114 pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
115 olddentry, newdentry, err);
116 }
117 return err;
118}
119
120static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
121{
122 int err = vfs_whiteout(dir, dentry);
123 pr_debug("whiteout(%pd2) = %i\n", dentry, err);
124 return err;
125}
126
127enum ovl_path_type ovl_path_type(struct dentry *dentry);
128u64 ovl_dentry_version_get(struct dentry *dentry);
129void ovl_dentry_version_inc(struct dentry *dentry);
130void ovl_path_upper(struct dentry *dentry, struct path *path);
131void ovl_path_lower(struct dentry *dentry, struct path *path);
132enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
133struct dentry *ovl_dentry_upper(struct dentry *dentry);
134struct dentry *ovl_dentry_lower(struct dentry *dentry);
135struct dentry *ovl_dentry_real(struct dentry *dentry);
136struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
137struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
138void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
139struct dentry *ovl_workdir(struct dentry *dentry);
140int ovl_want_write(struct dentry *dentry);
141void ovl_drop_write(struct dentry *dentry);
142bool ovl_dentry_is_opaque(struct dentry *dentry);
143void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
144bool ovl_is_whiteout(struct dentry *dentry);
145void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
146struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
147 unsigned int flags);
148struct file *ovl_path_open(struct path *path, int flags);
149
150struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
151 struct kstat *stat, const char *link);
152
153/* readdir.c */
154extern const struct file_operations ovl_dir_operations;
155int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
156void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
157void ovl_cache_free(struct list_head *list);
158
159/* inode.c */
160int ovl_setattr(struct dentry *dentry, struct iattr *attr);
161int ovl_permission(struct inode *inode, int mask);
162int ovl_setxattr(struct dentry *dentry, const char *name,
163 const void *value, size_t size, int flags);
164ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
165 void *value, size_t size);
166ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
167int ovl_removexattr(struct dentry *dentry, const char *name);
168
169struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
170 struct ovl_entry *oe);
171static inline void ovl_copyattr(struct inode *from, struct inode *to)
172{
173 to->i_uid = from->i_uid;
174 to->i_gid = from->i_gid;
175}
176
177/* dir.c */
178extern const struct inode_operations ovl_dir_inode_operations;
179struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
180int ovl_create_real(struct inode *dir, struct dentry *newdentry,
181 struct kstat *stat, const char *link,
182 struct dentry *hardlink, bool debug);
183void ovl_cleanup(struct inode *dir, struct dentry *dentry);
184
185/* copy_up.c */
186int ovl_copy_up(struct dentry *dentry);
187int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
188 struct path *lowerpath, struct kstat *stat,
189 struct iattr *attr);
190int ovl_copy_xattr(struct dentry *old, struct dentry *new);
191int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
new file mode 100644
index 000000000000..910553f37aca
--- /dev/null
+++ b/fs/overlayfs/readdir.c
@@ -0,0 +1,590 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/slab.h>
12#include <linux/namei.h>
13#include <linux/file.h>
14#include <linux/xattr.h>
15#include <linux/rbtree.h>
16#include <linux/security.h>
17#include <linux/cred.h>
18#include "overlayfs.h"
19
20struct ovl_cache_entry {
21 unsigned int len;
22 unsigned int type;
23 u64 ino;
24 bool is_whiteout;
25 struct list_head l_node;
26 struct rb_node node;
27 char name[];
28};
29
30struct ovl_dir_cache {
31 long refcount;
32 u64 version;
33 struct list_head entries;
34};
35
36struct ovl_readdir_data {
37 struct dir_context ctx;
38 bool is_merge;
39 struct rb_root root;
40 struct list_head *list;
41 struct list_head middle;
42 int count;
43 int err;
44};
45
46struct ovl_dir_file {
47 bool is_real;
48 bool is_upper;
49 struct ovl_dir_cache *cache;
50 struct ovl_cache_entry cursor;
51 struct file *realfile;
52 struct file *upperfile;
53};
54
55static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
56{
57 return container_of(n, struct ovl_cache_entry, node);
58}
59
60static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
61 const char *name, int len)
62{
63 struct rb_node *node = root->rb_node;
64 int cmp;
65
66 while (node) {
67 struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
68
69 cmp = strncmp(name, p->name, len);
70 if (cmp > 0)
71 node = p->node.rb_right;
72 else if (cmp < 0 || len < p->len)
73 node = p->node.rb_left;
74 else
75 return p;
76 }
77
78 return NULL;
79}
80
81static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
82 u64 ino, unsigned int d_type)
83{
84 struct ovl_cache_entry *p;
85 size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
86
87 p = kmalloc(size, GFP_KERNEL);
88 if (p) {
89 memcpy(p->name, name, len);
90 p->name[len] = '\0';
91 p->len = len;
92 p->type = d_type;
93 p->ino = ino;
94 p->is_whiteout = false;
95 }
96
97 return p;
98}
99
100static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
101 const char *name, int len, u64 ino,
102 unsigned int d_type)
103{
104 struct rb_node **newp = &rdd->root.rb_node;
105 struct rb_node *parent = NULL;
106 struct ovl_cache_entry *p;
107
108 while (*newp) {
109 int cmp;
110 struct ovl_cache_entry *tmp;
111
112 parent = *newp;
113 tmp = ovl_cache_entry_from_node(*newp);
114 cmp = strncmp(name, tmp->name, len);
115 if (cmp > 0)
116 newp = &tmp->node.rb_right;
117 else if (cmp < 0 || len < tmp->len)
118 newp = &tmp->node.rb_left;
119 else
120 return 0;
121 }
122
123 p = ovl_cache_entry_new(name, len, ino, d_type);
124 if (p == NULL)
125 return -ENOMEM;
126
127 list_add_tail(&p->l_node, rdd->list);
128 rb_link_node(&p->node, parent, newp);
129 rb_insert_color(&p->node, &rdd->root);
130
131 return 0;
132}
133
134static int ovl_fill_lower(struct ovl_readdir_data *rdd,
135 const char *name, int namelen,
136 loff_t offset, u64 ino, unsigned int d_type)
137{
138 struct ovl_cache_entry *p;
139
140 p = ovl_cache_entry_find(&rdd->root, name, namelen);
141 if (p) {
142 list_move_tail(&p->l_node, &rdd->middle);
143 } else {
144 p = ovl_cache_entry_new(name, namelen, ino, d_type);
145 if (p == NULL)
146 rdd->err = -ENOMEM;
147 else
148 list_add_tail(&p->l_node, &rdd->middle);
149 }
150
151 return rdd->err;
152}
153
154void ovl_cache_free(struct list_head *list)
155{
156 struct ovl_cache_entry *p;
157 struct ovl_cache_entry *n;
158
159 list_for_each_entry_safe(p, n, list, l_node)
160 kfree(p);
161
162 INIT_LIST_HEAD(list);
163}
164
165static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
166{
167 struct ovl_dir_cache *cache = od->cache;
168
169 list_del(&od->cursor.l_node);
170 WARN_ON(cache->refcount <= 0);
171 cache->refcount--;
172 if (!cache->refcount) {
173 if (ovl_dir_cache(dentry) == cache)
174 ovl_set_dir_cache(dentry, NULL);
175
176 ovl_cache_free(&cache->entries);
177 kfree(cache);
178 }
179}
180
181static int ovl_fill_merge(void *buf, const char *name, int namelen,
182 loff_t offset, u64 ino, unsigned int d_type)
183{
184 struct ovl_readdir_data *rdd = buf;
185
186 rdd->count++;
187 if (!rdd->is_merge)
188 return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
189 else
190 return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
191}
192
193static inline int ovl_dir_read(struct path *realpath,
194 struct ovl_readdir_data *rdd)
195{
196 struct file *realfile;
197 int err;
198
199 realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
200 if (IS_ERR(realfile))
201 return PTR_ERR(realfile);
202
203 rdd->ctx.pos = 0;
204 do {
205 rdd->count = 0;
206 rdd->err = 0;
207 err = iterate_dir(realfile, &rdd->ctx);
208 if (err >= 0)
209 err = rdd->err;
210 } while (!err && rdd->count);
211 fput(realfile);
212
213 return err;
214}
215
216static void ovl_dir_reset(struct file *file)
217{
218 struct ovl_dir_file *od = file->private_data;
219 struct ovl_dir_cache *cache = od->cache;
220 struct dentry *dentry = file->f_path.dentry;
221 enum ovl_path_type type = ovl_path_type(dentry);
222
223 if (cache && ovl_dentry_version_get(dentry) != cache->version) {
224 ovl_cache_put(od, dentry);
225 od->cache = NULL;
226 }
227 WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
228 if (od->is_real && type == OVL_PATH_MERGE)
229 od->is_real = false;
230}
231
232static int ovl_dir_mark_whiteouts(struct dentry *dir,
233 struct ovl_readdir_data *rdd)
234{
235 struct ovl_cache_entry *p;
236 struct dentry *dentry;
237 const struct cred *old_cred;
238 struct cred *override_cred;
239
240 override_cred = prepare_creds();
241 if (!override_cred) {
242 ovl_cache_free(rdd->list);
243 return -ENOMEM;
244 }
245
246 /*
247 * CAP_DAC_OVERRIDE for lookup
248 */
249 cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
250 old_cred = override_creds(override_cred);
251
252 mutex_lock(&dir->d_inode->i_mutex);
253 list_for_each_entry(p, rdd->list, l_node) {
254 if (!p->name)
255 continue;
256
257 if (p->type != DT_CHR)
258 continue;
259
260 dentry = lookup_one_len(p->name, dir, p->len);
261 if (IS_ERR(dentry))
262 continue;
263
264 p->is_whiteout = ovl_is_whiteout(dentry);
265 dput(dentry);
266 }
267 mutex_unlock(&dir->d_inode->i_mutex);
268
269 revert_creds(old_cred);
270 put_cred(override_cred);
271
272 return 0;
273}
274
275static inline int ovl_dir_read_merged(struct path *upperpath,
276 struct path *lowerpath,
277 struct list_head *list)
278{
279 int err;
280 struct ovl_readdir_data rdd = {
281 .ctx.actor = ovl_fill_merge,
282 .list = list,
283 .root = RB_ROOT,
284 .is_merge = false,
285 };
286
287 if (upperpath->dentry) {
288 err = ovl_dir_read(upperpath, &rdd);
289 if (err)
290 goto out;
291
292 if (lowerpath->dentry) {
293 err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd);
294 if (err)
295 goto out;
296 }
297 }
298 if (lowerpath->dentry) {
299 /*
300 * Insert lowerpath entries before upperpath ones, this allows
301 * offsets to be reasonably constant
302 */
303 list_add(&rdd.middle, rdd.list);
304 rdd.is_merge = true;
305 err = ovl_dir_read(lowerpath, &rdd);
306 list_del(&rdd.middle);
307 }
308out:
309 return err;
310
311}
312
313static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
314{
315 struct ovl_cache_entry *p;
316 loff_t off = 0;
317
318 list_for_each_entry(p, &od->cache->entries, l_node) {
319 if (!p->name)
320 continue;
321 if (off >= pos)
322 break;
323 off++;
324 }
325 list_move_tail(&od->cursor.l_node, &p->l_node);
326}
327
328static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
329{
330 int res;
331 struct path lowerpath;
332 struct path upperpath;
333 struct ovl_dir_cache *cache;
334
335 cache = ovl_dir_cache(dentry);
336 if (cache && ovl_dentry_version_get(dentry) == cache->version) {
337 cache->refcount++;
338 return cache;
339 }
340 ovl_set_dir_cache(dentry, NULL);
341
342 cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
343 if (!cache)
344 return ERR_PTR(-ENOMEM);
345
346 cache->refcount = 1;
347 INIT_LIST_HEAD(&cache->entries);
348
349 ovl_path_lower(dentry, &lowerpath);
350 ovl_path_upper(dentry, &upperpath);
351
352 res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
353 if (res) {
354 ovl_cache_free(&cache->entries);
355 kfree(cache);
356 return ERR_PTR(res);
357 }
358
359 cache->version = ovl_dentry_version_get(dentry);
360 ovl_set_dir_cache(dentry, cache);
361
362 return cache;
363}
364
365static int ovl_iterate(struct file *file, struct dir_context *ctx)
366{
367 struct ovl_dir_file *od = file->private_data;
368 struct dentry *dentry = file->f_path.dentry;
369
370 if (!ctx->pos)
371 ovl_dir_reset(file);
372
373 if (od->is_real)
374 return iterate_dir(od->realfile, ctx);
375
376 if (!od->cache) {
377 struct ovl_dir_cache *cache;
378
379 cache = ovl_cache_get(dentry);
380 if (IS_ERR(cache))
381 return PTR_ERR(cache);
382
383 od->cache = cache;
384 ovl_seek_cursor(od, ctx->pos);
385 }
386
387 while (od->cursor.l_node.next != &od->cache->entries) {
388 struct ovl_cache_entry *p;
389
390 p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node);
391 /* Skip cursors */
392 if (p->name) {
393 if (!p->is_whiteout) {
394 if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
395 break;
396 }
397 ctx->pos++;
398 }
399 list_move(&od->cursor.l_node, &p->l_node);
400 }
401 return 0;
402}
403
404static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
405{
406 loff_t res;
407 struct ovl_dir_file *od = file->private_data;
408
409 mutex_lock(&file_inode(file)->i_mutex);
410 if (!file->f_pos)
411 ovl_dir_reset(file);
412
413 if (od->is_real) {
414 res = vfs_llseek(od->realfile, offset, origin);
415 file->f_pos = od->realfile->f_pos;
416 } else {
417 res = -EINVAL;
418
419 switch (origin) {
420 case SEEK_CUR:
421 offset += file->f_pos;
422 break;
423 case SEEK_SET:
424 break;
425 default:
426 goto out_unlock;
427 }
428 if (offset < 0)
429 goto out_unlock;
430
431 if (offset != file->f_pos) {
432 file->f_pos = offset;
433 if (od->cache)
434 ovl_seek_cursor(od, offset);
435 }
436 res = offset;
437 }
438out_unlock:
439 mutex_unlock(&file_inode(file)->i_mutex);
440
441 return res;
442}
443
444static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
445 int datasync)
446{
447 struct ovl_dir_file *od = file->private_data;
448 struct dentry *dentry = file->f_path.dentry;
449 struct file *realfile = od->realfile;
450
451 /*
452 * Need to check if we started out being a lower dir, but got copied up
453 */
454 if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) {
455 struct inode *inode = file_inode(file);
456
457 realfile = od->upperfile;
458 if (!realfile) {
459 struct path upperpath;
460
461 ovl_path_upper(dentry, &upperpath);
462 realfile = ovl_path_open(&upperpath, O_RDONLY);
463 mutex_lock(&inode->i_mutex);
464 if (!od->upperfile) {
465 if (IS_ERR(realfile)) {
466 mutex_unlock(&inode->i_mutex);
467 return PTR_ERR(realfile);
468 }
469 od->upperfile = realfile;
470 } else {
471 /* somebody has beaten us to it */
472 if (!IS_ERR(realfile))
473 fput(realfile);
474 realfile = od->upperfile;
475 }
476 mutex_unlock(&inode->i_mutex);
477 }
478 }
479
480 return vfs_fsync_range(realfile, start, end, datasync);
481}
482
483static int ovl_dir_release(struct inode *inode, struct file *file)
484{
485 struct ovl_dir_file *od = file->private_data;
486
487 if (od->cache) {
488 mutex_lock(&inode->i_mutex);
489 ovl_cache_put(od, file->f_path.dentry);
490 mutex_unlock(&inode->i_mutex);
491 }
492 fput(od->realfile);
493 if (od->upperfile)
494 fput(od->upperfile);
495 kfree(od);
496
497 return 0;
498}
499
500static int ovl_dir_open(struct inode *inode, struct file *file)
501{
502 struct path realpath;
503 struct file *realfile;
504 struct ovl_dir_file *od;
505 enum ovl_path_type type;
506
507 od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
508 if (!od)
509 return -ENOMEM;
510
511 type = ovl_path_real(file->f_path.dentry, &realpath);
512 realfile = ovl_path_open(&realpath, file->f_flags);
513 if (IS_ERR(realfile)) {
514 kfree(od);
515 return PTR_ERR(realfile);
516 }
517 INIT_LIST_HEAD(&od->cursor.l_node);
518 od->realfile = realfile;
519 od->is_real = (type != OVL_PATH_MERGE);
520 od->is_upper = (type != OVL_PATH_LOWER);
521 file->private_data = od;
522
523 return 0;
524}
525
526const struct file_operations ovl_dir_operations = {
527 .read = generic_read_dir,
528 .open = ovl_dir_open,
529 .iterate = ovl_iterate,
530 .llseek = ovl_dir_llseek,
531 .fsync = ovl_dir_fsync,
532 .release = ovl_dir_release,
533};
534
535int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
536{
537 int err;
538 struct path lowerpath;
539 struct path upperpath;
540 struct ovl_cache_entry *p;
541
542 ovl_path_upper(dentry, &upperpath);
543 ovl_path_lower(dentry, &lowerpath);
544
545 err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
546 if (err)
547 return err;
548
549 err = 0;
550
551 list_for_each_entry(p, list, l_node) {
552 if (p->is_whiteout)
553 continue;
554
555 if (p->name[0] == '.') {
556 if (p->len == 1)
557 continue;
558 if (p->len == 2 && p->name[1] == '.')
559 continue;
560 }
561 err = -ENOTEMPTY;
562 break;
563 }
564
565 return err;
566}
567
568void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
569{
570 struct ovl_cache_entry *p;
571
572 mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT);
573 list_for_each_entry(p, list, l_node) {
574 struct dentry *dentry;
575
576 if (!p->is_whiteout)
577 continue;
578
579 dentry = lookup_one_len(p->name, upper, p->len);
580 if (IS_ERR(dentry)) {
581 pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
582 upper->d_name.name, p->len, p->name,
583 (int) PTR_ERR(dentry));
584 continue;
585 }
586 ovl_cleanup(upper->d_inode, dentry);
587 dput(dentry);
588 }
589 mutex_unlock(&upper->d_inode->i_mutex);
590}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
new file mode 100644
index 000000000000..08b704cebfc4
--- /dev/null
+++ b/fs/overlayfs/super.c
@@ -0,0 +1,796 @@
1/*
2 *
3 * Copyright (C) 2011 Novell Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/fs.h>
11#include <linux/namei.h>
12#include <linux/xattr.h>
13#include <linux/security.h>
14#include <linux/mount.h>
15#include <linux/slab.h>
16#include <linux/parser.h>
17#include <linux/module.h>
18#include <linux/sched.h>
19#include <linux/statfs.h>
20#include <linux/seq_file.h>
21#include "overlayfs.h"
22
23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
24MODULE_DESCRIPTION("Overlay filesystem");
25MODULE_LICENSE("GPL");
26
27#define OVERLAYFS_SUPER_MAGIC 0x794c764f
28
29struct ovl_config {
30 char *lowerdir;
31 char *upperdir;
32 char *workdir;
33};
34
35/* private information held for overlayfs's superblock */
36struct ovl_fs {
37 struct vfsmount *upper_mnt;
38 struct vfsmount *lower_mnt;
39 struct dentry *workdir;
40 long lower_namelen;
41 /* pathnames of lower and upper dirs, for show_options */
42 struct ovl_config config;
43};
44
45struct ovl_dir_cache;
46
47/* private information held for every overlayfs dentry */
48struct ovl_entry {
49 struct dentry *__upperdentry;
50 struct dentry *lowerdentry;
51 struct ovl_dir_cache *cache;
52 union {
53 struct {
54 u64 version;
55 bool opaque;
56 };
57 struct rcu_head rcu;
58 };
59};
60
61const char *ovl_opaque_xattr = "trusted.overlay.opaque";
62
63
64enum ovl_path_type ovl_path_type(struct dentry *dentry)
65{
66 struct ovl_entry *oe = dentry->d_fsdata;
67
68 if (oe->__upperdentry) {
69 if (oe->lowerdentry) {
70 if (S_ISDIR(dentry->d_inode->i_mode))
71 return OVL_PATH_MERGE;
72 else
73 return OVL_PATH_UPPER;
74 } else {
75 if (oe->opaque)
76 return OVL_PATH_UPPER;
77 else
78 return OVL_PATH_PURE_UPPER;
79 }
80 } else {
81 return OVL_PATH_LOWER;
82 }
83}
84
85static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
86{
87 struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
88 /*
89 * Make sure to order reads to upperdentry wrt ovl_dentry_update()
90 */
91 smp_read_barrier_depends();
92 return upperdentry;
93}
94
95void ovl_path_upper(struct dentry *dentry, struct path *path)
96{
97 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
98 struct ovl_entry *oe = dentry->d_fsdata;
99
100 path->mnt = ofs->upper_mnt;
101 path->dentry = ovl_upperdentry_dereference(oe);
102}
103
104enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
105{
106
107 enum ovl_path_type type = ovl_path_type(dentry);
108
109 if (type == OVL_PATH_LOWER)
110 ovl_path_lower(dentry, path);
111 else
112 ovl_path_upper(dentry, path);
113
114 return type;
115}
116
117struct dentry *ovl_dentry_upper(struct dentry *dentry)
118{
119 struct ovl_entry *oe = dentry->d_fsdata;
120
121 return ovl_upperdentry_dereference(oe);
122}
123
124struct dentry *ovl_dentry_lower(struct dentry *dentry)
125{
126 struct ovl_entry *oe = dentry->d_fsdata;
127
128 return oe->lowerdentry;
129}
130
131struct dentry *ovl_dentry_real(struct dentry *dentry)
132{
133 struct ovl_entry *oe = dentry->d_fsdata;
134 struct dentry *realdentry;
135
136 realdentry = ovl_upperdentry_dereference(oe);
137 if (!realdentry)
138 realdentry = oe->lowerdentry;
139
140 return realdentry;
141}
142
143struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
144{
145 struct dentry *realdentry;
146
147 realdentry = ovl_upperdentry_dereference(oe);
148 if (realdentry) {
149 *is_upper = true;
150 } else {
151 realdentry = oe->lowerdentry;
152 *is_upper = false;
153 }
154 return realdentry;
155}
156
157struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
158{
159 struct ovl_entry *oe = dentry->d_fsdata;
160
161 return oe->cache;
162}
163
164void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
165{
166 struct ovl_entry *oe = dentry->d_fsdata;
167
168 oe->cache = cache;
169}
170
171void ovl_path_lower(struct dentry *dentry, struct path *path)
172{
173 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
174 struct ovl_entry *oe = dentry->d_fsdata;
175
176 path->mnt = ofs->lower_mnt;
177 path->dentry = oe->lowerdentry;
178}
179
180int ovl_want_write(struct dentry *dentry)
181{
182 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
183 return mnt_want_write(ofs->upper_mnt);
184}
185
186void ovl_drop_write(struct dentry *dentry)
187{
188 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
189 mnt_drop_write(ofs->upper_mnt);
190}
191
192struct dentry *ovl_workdir(struct dentry *dentry)
193{
194 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
195 return ofs->workdir;
196}
197
198bool ovl_dentry_is_opaque(struct dentry *dentry)
199{
200 struct ovl_entry *oe = dentry->d_fsdata;
201 return oe->opaque;
202}
203
204void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
205{
206 struct ovl_entry *oe = dentry->d_fsdata;
207 oe->opaque = opaque;
208}
209
210void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
211{
212 struct ovl_entry *oe = dentry->d_fsdata;
213
214 WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
215 WARN_ON(oe->__upperdentry);
216 BUG_ON(!upperdentry->d_inode);
217 /*
218 * Make sure upperdentry is consistent before making it visible to
219 * ovl_upperdentry_dereference().
220 */
221 smp_wmb();
222 oe->__upperdentry = upperdentry;
223}
224
225void ovl_dentry_version_inc(struct dentry *dentry)
226{
227 struct ovl_entry *oe = dentry->d_fsdata;
228
229 WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
230 oe->version++;
231}
232
233u64 ovl_dentry_version_get(struct dentry *dentry)
234{
235 struct ovl_entry *oe = dentry->d_fsdata;
236
237 WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
238 return oe->version;
239}
240
241bool ovl_is_whiteout(struct dentry *dentry)
242{
243 struct inode *inode = dentry->d_inode;
244
245 return inode && IS_WHITEOUT(inode);
246}
247
248static bool ovl_is_opaquedir(struct dentry *dentry)
249{
250 int res;
251 char val;
252 struct inode *inode = dentry->d_inode;
253
254 if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr)
255 return false;
256
257 res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1);
258 if (res == 1 && val == 'y')
259 return true;
260
261 return false;
262}
263
264static void ovl_dentry_release(struct dentry *dentry)
265{
266 struct ovl_entry *oe = dentry->d_fsdata;
267
268 if (oe) {
269 dput(oe->__upperdentry);
270 dput(oe->lowerdentry);
271 kfree_rcu(oe, rcu);
272 }
273}
274
275static const struct dentry_operations ovl_dentry_operations = {
276 .d_release = ovl_dentry_release,
277};
278
279static struct ovl_entry *ovl_alloc_entry(void)
280{
281 return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
282}
283
284static inline struct dentry *ovl_lookup_real(struct dentry *dir,
285 struct qstr *name)
286{
287 struct dentry *dentry;
288
289 mutex_lock(&dir->d_inode->i_mutex);
290 dentry = lookup_one_len(name->name, dir, name->len);
291 mutex_unlock(&dir->d_inode->i_mutex);
292
293 if (IS_ERR(dentry)) {
294 if (PTR_ERR(dentry) == -ENOENT)
295 dentry = NULL;
296 } else if (!dentry->d_inode) {
297 dput(dentry);
298 dentry = NULL;
299 }
300 return dentry;
301}
302
303struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
304 unsigned int flags)
305{
306 struct ovl_entry *oe;
307 struct dentry *upperdir;
308 struct dentry *lowerdir;
309 struct dentry *upperdentry = NULL;
310 struct dentry *lowerdentry = NULL;
311 struct inode *inode = NULL;
312 int err;
313
314 err = -ENOMEM;
315 oe = ovl_alloc_entry();
316 if (!oe)
317 goto out;
318
319 upperdir = ovl_dentry_upper(dentry->d_parent);
320 lowerdir = ovl_dentry_lower(dentry->d_parent);
321
322 if (upperdir) {
323 upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
324 err = PTR_ERR(upperdentry);
325 if (IS_ERR(upperdentry))
326 goto out_put_dir;
327
328 if (lowerdir && upperdentry) {
329 if (ovl_is_whiteout(upperdentry)) {
330 dput(upperdentry);
331 upperdentry = NULL;
332 oe->opaque = true;
333 } else if (ovl_is_opaquedir(upperdentry)) {
334 oe->opaque = true;
335 }
336 }
337 }
338 if (lowerdir && !oe->opaque) {
339 lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
340 err = PTR_ERR(lowerdentry);
341 if (IS_ERR(lowerdentry))
342 goto out_dput_upper;
343 }
344
345 if (lowerdentry && upperdentry &&
346 (!S_ISDIR(upperdentry->d_inode->i_mode) ||
347 !S_ISDIR(lowerdentry->d_inode->i_mode))) {
348 dput(lowerdentry);
349 lowerdentry = NULL;
350 oe->opaque = true;
351 }
352
353 if (lowerdentry || upperdentry) {
354 struct dentry *realdentry;
355
356 realdentry = upperdentry ? upperdentry : lowerdentry;
357 err = -ENOMEM;
358 inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
359 oe);
360 if (!inode)
361 goto out_dput;
362 ovl_copyattr(realdentry->d_inode, inode);
363 }
364
365 oe->__upperdentry = upperdentry;
366 oe->lowerdentry = lowerdentry;
367
368 dentry->d_fsdata = oe;
369 d_add(dentry, inode);
370
371 return NULL;
372
373out_dput:
374 dput(lowerdentry);
375out_dput_upper:
376 dput(upperdentry);
377out_put_dir:
378 kfree(oe);
379out:
380 return ERR_PTR(err);
381}
382
383struct file *ovl_path_open(struct path *path, int flags)
384{
385 return dentry_open(path, flags, current_cred());
386}
387
388static void ovl_put_super(struct super_block *sb)
389{
390 struct ovl_fs *ufs = sb->s_fs_info;
391
392 dput(ufs->workdir);
393 mntput(ufs->upper_mnt);
394 mntput(ufs->lower_mnt);
395
396 kfree(ufs->config.lowerdir);
397 kfree(ufs->config.upperdir);
398 kfree(ufs->config.workdir);
399 kfree(ufs);
400}
401
402/**
403 * ovl_statfs
404 * @sb: The overlayfs super block
405 * @buf: The struct kstatfs to fill in with stats
406 *
407 * Get the filesystem statistics. As writes always target the upper layer
408 * filesystem pass the statfs to the same filesystem.
409 */
410static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
411{
412 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
413 struct dentry *root_dentry = dentry->d_sb->s_root;
414 struct path path;
415 int err;
416
417 ovl_path_upper(root_dentry, &path);
418
419 err = vfs_statfs(&path, buf);
420 if (!err) {
421 buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
422 buf->f_type = OVERLAYFS_SUPER_MAGIC;
423 }
424
425 return err;
426}
427
428/**
429 * ovl_show_options
430 *
431 * Prints the mount options for a given superblock.
432 * Returns zero; does not fail.
433 */
434static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
435{
436 struct super_block *sb = dentry->d_sb;
437 struct ovl_fs *ufs = sb->s_fs_info;
438
439 seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
440 seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
441 seq_printf(m, ",workdir=%s", ufs->config.workdir);
442 return 0;
443}
444
445static const struct super_operations ovl_super_operations = {
446 .put_super = ovl_put_super,
447 .statfs = ovl_statfs,
448 .show_options = ovl_show_options,
449};
450
451enum {
452 OPT_LOWERDIR,
453 OPT_UPPERDIR,
454 OPT_WORKDIR,
455 OPT_ERR,
456};
457
458static const match_table_t ovl_tokens = {
459 {OPT_LOWERDIR, "lowerdir=%s"},
460 {OPT_UPPERDIR, "upperdir=%s"},
461 {OPT_WORKDIR, "workdir=%s"},
462 {OPT_ERR, NULL}
463};
464
465static int ovl_parse_opt(char *opt, struct ovl_config *config)
466{
467 char *p;
468
469 while ((p = strsep(&opt, ",")) != NULL) {
470 int token;
471 substring_t args[MAX_OPT_ARGS];
472
473 if (!*p)
474 continue;
475
476 token = match_token(p, ovl_tokens, args);
477 switch (token) {
478 case OPT_UPPERDIR:
479 kfree(config->upperdir);
480 config->upperdir = match_strdup(&args[0]);
481 if (!config->upperdir)
482 return -ENOMEM;
483 break;
484
485 case OPT_LOWERDIR:
486 kfree(config->lowerdir);
487 config->lowerdir = match_strdup(&args[0]);
488 if (!config->lowerdir)
489 return -ENOMEM;
490 break;
491
492 case OPT_WORKDIR:
493 kfree(config->workdir);
494 config->workdir = match_strdup(&args[0]);
495 if (!config->workdir)
496 return -ENOMEM;
497 break;
498
499 default:
500 return -EINVAL;
501 }
502 }
503 return 0;
504}
505
506#define OVL_WORKDIR_NAME "work"
507
508static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
509 struct dentry *dentry)
510{
511 struct inode *dir = dentry->d_inode;
512 struct dentry *work;
513 int err;
514 bool retried = false;
515
516 err = mnt_want_write(mnt);
517 if (err)
518 return ERR_PTR(err);
519
520 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
521retry:
522 work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
523 strlen(OVL_WORKDIR_NAME));
524
525 if (!IS_ERR(work)) {
526 struct kstat stat = {
527 .mode = S_IFDIR | 0,
528 };
529
530 if (work->d_inode) {
531 err = -EEXIST;
532 if (retried)
533 goto out_dput;
534
535 retried = true;
536 ovl_cleanup(dir, work);
537 dput(work);
538 goto retry;
539 }
540
541 err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
542 if (err)
543 goto out_dput;
544 }
545out_unlock:
546 mutex_unlock(&dir->i_mutex);
547 mnt_drop_write(mnt);
548
549 return work;
550
551out_dput:
552 dput(work);
553 work = ERR_PTR(err);
554 goto out_unlock;
555}
556
557static int ovl_mount_dir(const char *name, struct path *path)
558{
559 int err;
560
561 err = kern_path(name, LOOKUP_FOLLOW, path);
562 if (err) {
563 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
564 err = -EINVAL;
565 }
566 return err;
567}
568
569static bool ovl_is_allowed_fs_type(struct dentry *root)
570{
571 const struct dentry_operations *dop = root->d_op;
572
573 /*
574 * We don't support:
575 * - automount filesystems
576 * - filesystems with revalidate (FIXME for lower layer)
577 * - filesystems with case insensitive names
578 */
579 if (dop &&
580 (dop->d_manage || dop->d_automount ||
581 dop->d_revalidate || dop->d_weak_revalidate ||
582 dop->d_compare || dop->d_hash)) {
583 return false;
584 }
585 return true;
586}
587
588/* Workdir should not be subdir of upperdir and vice versa */
589static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
590{
591 bool ok = false;
592
593 if (workdir != upperdir) {
594 ok = (lock_rename(workdir, upperdir) == NULL);
595 unlock_rename(workdir, upperdir);
596 }
597 return ok;
598}
599
600static int ovl_fill_super(struct super_block *sb, void *data, int silent)
601{
602 struct path lowerpath;
603 struct path upperpath;
604 struct path workpath;
605 struct inode *root_inode;
606 struct dentry *root_dentry;
607 struct ovl_entry *oe;
608 struct ovl_fs *ufs;
609 struct kstatfs statfs;
610 int err;
611
612 err = -ENOMEM;
613 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
614 if (!ufs)
615 goto out;
616
617 err = ovl_parse_opt((char *) data, &ufs->config);
618 if (err)
619 goto out_free_config;
620
621 /* FIXME: workdir is not needed for a R/O mount */
622 err = -EINVAL;
623 if (!ufs->config.upperdir || !ufs->config.lowerdir ||
624 !ufs->config.workdir) {
625 pr_err("overlayfs: missing upperdir or lowerdir or workdir\n");
626 goto out_free_config;
627 }
628
629 err = -ENOMEM;
630 oe = ovl_alloc_entry();
631 if (oe == NULL)
632 goto out_free_config;
633
634 err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
635 if (err)
636 goto out_free_oe;
637
638 err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath);
639 if (err)
640 goto out_put_upperpath;
641
642 err = ovl_mount_dir(ufs->config.workdir, &workpath);
643 if (err)
644 goto out_put_lowerpath;
645
646 err = -EINVAL;
647 if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
648 !S_ISDIR(lowerpath.dentry->d_inode->i_mode) ||
649 !S_ISDIR(workpath.dentry->d_inode->i_mode)) {
650 pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n");
651 goto out_put_workpath;
652 }
653
654 if (upperpath.mnt != workpath.mnt) {
655 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
656 goto out_put_workpath;
657 }
658 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
659 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
660 goto out_put_workpath;
661 }
662
663 if (!ovl_is_allowed_fs_type(upperpath.dentry)) {
664 pr_err("overlayfs: filesystem of upperdir is not supported\n");
665 goto out_put_workpath;
666 }
667
668 if (!ovl_is_allowed_fs_type(lowerpath.dentry)) {
669 pr_err("overlayfs: filesystem of lowerdir is not supported\n");
670 goto out_put_workpath;
671 }
672
673 err = vfs_statfs(&lowerpath, &statfs);
674 if (err) {
675 pr_err("overlayfs: statfs failed on lowerpath\n");
676 goto out_put_workpath;
677 }
678 ufs->lower_namelen = statfs.f_namelen;
679
680 sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
681 lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
682
683 err = -EINVAL;
684 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
685 pr_err("overlayfs: maximum fs stacking depth exceeded\n");
686 goto out_put_workpath;
687 }
688
689 ufs->upper_mnt = clone_private_mount(&upperpath);
690 err = PTR_ERR(ufs->upper_mnt);
691 if (IS_ERR(ufs->upper_mnt)) {
692 pr_err("overlayfs: failed to clone upperpath\n");
693 goto out_put_workpath;
694 }
695
696 ufs->lower_mnt = clone_private_mount(&lowerpath);
697 err = PTR_ERR(ufs->lower_mnt);
698 if (IS_ERR(ufs->lower_mnt)) {
699 pr_err("overlayfs: failed to clone lowerpath\n");
700 goto out_put_upper_mnt;
701 }
702
703 ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
704 err = PTR_ERR(ufs->workdir);
705 if (IS_ERR(ufs->workdir)) {
706 pr_err("overlayfs: failed to create directory %s/%s\n",
707 ufs->config.workdir, OVL_WORKDIR_NAME);
708 goto out_put_lower_mnt;
709 }
710
711 /*
712 * Make lower_mnt R/O. That way fchmod/fchown on lower file
713 * will fail instead of modifying lower fs.
714 */
715 ufs->lower_mnt->mnt_flags |= MNT_READONLY;
716
717 /* If the upper fs is r/o, we mark overlayfs r/o too */
718 if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
719 sb->s_flags |= MS_RDONLY;
720
721 sb->s_d_op = &ovl_dentry_operations;
722
723 err = -ENOMEM;
724 root_inode = ovl_new_inode(sb, S_IFDIR, oe);
725 if (!root_inode)
726 goto out_put_workdir;
727
728 root_dentry = d_make_root(root_inode);
729 if (!root_dentry)
730 goto out_put_workdir;
731
732 mntput(upperpath.mnt);
733 mntput(lowerpath.mnt);
734 path_put(&workpath);
735
736 oe->__upperdentry = upperpath.dentry;
737 oe->lowerdentry = lowerpath.dentry;
738
739 root_dentry->d_fsdata = oe;
740
741 sb->s_magic = OVERLAYFS_SUPER_MAGIC;
742 sb->s_op = &ovl_super_operations;
743 sb->s_root = root_dentry;
744 sb->s_fs_info = ufs;
745
746 return 0;
747
748out_put_workdir:
749 dput(ufs->workdir);
750out_put_lower_mnt:
751 mntput(ufs->lower_mnt);
752out_put_upper_mnt:
753 mntput(ufs->upper_mnt);
754out_put_workpath:
755 path_put(&workpath);
756out_put_lowerpath:
757 path_put(&lowerpath);
758out_put_upperpath:
759 path_put(&upperpath);
760out_free_oe:
761 kfree(oe);
762out_free_config:
763 kfree(ufs->config.lowerdir);
764 kfree(ufs->config.upperdir);
765 kfree(ufs->config.workdir);
766 kfree(ufs);
767out:
768 return err;
769}
770
771static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
772 const char *dev_name, void *raw_data)
773{
774 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
775}
776
777static struct file_system_type ovl_fs_type = {
778 .owner = THIS_MODULE,
779 .name = "overlayfs",
780 .mount = ovl_mount,
781 .kill_sb = kill_anon_super,
782};
783MODULE_ALIAS_FS("overlayfs");
784
785static int __init ovl_init(void)
786{
787 return register_filesystem(&ovl_fs_type);
788}
789
790static void __exit ovl_exit(void)
791{
792 unregister_filesystem(&ovl_fs_type);
793}
794
795module_init(ovl_init);
796module_exit(ovl_exit);
diff --git a/fs/splice.c b/fs/splice.c
index f5cb9ba84510..75c6058eabf2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1330 1330
1331 return ret; 1331 return ret;
1332} 1332}
1333EXPORT_SYMBOL(do_splice_direct);
1333 1334
1334static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1335static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1335 struct pipe_inode_info *opipe, 1336 struct pipe_inode_info *opipe,
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index f97804bdf1ff..7461327e14e4 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -52,6 +52,7 @@
52#define METHOD_NAME__CBA "_CBA" 52#define METHOD_NAME__CBA "_CBA"
53#define METHOD_NAME__CID "_CID" 53#define METHOD_NAME__CID "_CID"
54#define METHOD_NAME__CRS "_CRS" 54#define METHOD_NAME__CRS "_CRS"
55#define METHOD_NAME__DDN "_DDN"
55#define METHOD_NAME__HID "_HID" 56#define METHOD_NAME__HID "_HID"
56#define METHOD_NAME__INI "_INI" 57#define METHOD_NAME__INI "_INI"
57#define METHOD_NAME__PLD "_PLD" 58#define METHOD_NAME__PLD "_PLD"
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 57ee0528aacb..f34a0835aa4f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -433,6 +433,7 @@ int acpi_device_set_power(struct acpi_device *device, int state);
433int acpi_bus_init_power(struct acpi_device *device); 433int acpi_bus_init_power(struct acpi_device *device);
434int acpi_device_fix_up_power(struct acpi_device *device); 434int acpi_device_fix_up_power(struct acpi_device *device);
435int acpi_bus_update_power(acpi_handle handle, int *state_p); 435int acpi_bus_update_power(acpi_handle handle, int *state_p);
436int acpi_device_update_power(struct acpi_device *device, int *state_p);
436bool acpi_bus_power_manageable(acpi_handle handle); 437bool acpi_bus_power_manageable(acpi_handle handle);
437 438
438#ifdef CONFIG_PM 439#ifdef CONFIG_PM
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 9fc1d71c82bc..ab2acf629a64 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
46 46
47/* Current ACPICA subsystem version in YYYYMMDD format */ 47/* Current ACPICA subsystem version in YYYYMMDD format */
48 48
49#define ACPI_CA_VERSION 0x20140828 49#define ACPI_CA_VERSION 0x20140926
50 50
51#include <acpi/acconfig.h> 51#include <acpi/acconfig.h>
52#include <acpi/actypes.h> 52#include <acpi/actypes.h>
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index ac03ec81d342..7000e66f768e 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -721,7 +721,7 @@ typedef u32 acpi_event_type;
721 * | | | +--- Enabled for wake? 721 * | | | +--- Enabled for wake?
722 * | | +----- Set? 722 * | | +----- Set?
723 * | +------- Has a handler? 723 * | +------- Has a handler?
724 * +----------- <Reserved> 724 * +------------- <Reserved>
725 */ 725 */
726typedef u32 acpi_event_status; 726typedef u32 acpi_event_status;
727 727
@@ -729,7 +729,7 @@ typedef u32 acpi_event_status;
729#define ACPI_EVENT_FLAG_ENABLED (acpi_event_status) 0x01 729#define ACPI_EVENT_FLAG_ENABLED (acpi_event_status) 0x01
730#define ACPI_EVENT_FLAG_WAKE_ENABLED (acpi_event_status) 0x02 730#define ACPI_EVENT_FLAG_WAKE_ENABLED (acpi_event_status) 0x02
731#define ACPI_EVENT_FLAG_SET (acpi_event_status) 0x04 731#define ACPI_EVENT_FLAG_SET (acpi_event_status) 0x04
732#define ACPI_EVENT_FLAG_HANDLE (acpi_event_status) 0x08 732#define ACPI_EVENT_FLAG_HAS_HANDLER (acpi_event_status) 0x08
733 733
734/* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */ 734/* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */
735 735
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b7926bb9b444..407a12f663eb 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -432,6 +432,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
432int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); 432int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
433int acpi_device_modalias(struct device *, char *, int); 433int acpi_device_modalias(struct device *, char *, int);
434 434
435struct platform_device *acpi_create_platform_device(struct acpi_device *);
435#define ACPI_PTR(_ptr) (_ptr) 436#define ACPI_PTR(_ptr) (_ptr)
436 437
437#else /* !CONFIG_ACPI */ 438#else /* !CONFIG_ACPI */
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 36dffeccebdb..e58fe7df8b9c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -90,7 +90,7 @@ extern unsigned compat_dir_class[];
90extern unsigned compat_chattr_class[]; 90extern unsigned compat_chattr_class[];
91extern unsigned compat_signal_class[]; 91extern unsigned compat_signal_class[];
92 92
93extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall); 93extern int audit_classify_compat_syscall(int abi, unsigned syscall);
94 94
95/* audit_names->type values */ 95/* audit_names->type values */
96#define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ 96#define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 324329ceea1e..73b45225a7ca 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *);
175wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); 175wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
176struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, 176struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
177 unsigned size); 177 unsigned size);
178struct buffer_head *__getblk(struct block_device *bdev, sector_t block, 178struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
179 unsigned size); 179 unsigned size, gfp_t gfp);
180void __brelse(struct buffer_head *); 180void __brelse(struct buffer_head *);
181void __bforget(struct buffer_head *); 181void __bforget(struct buffer_head *);
182void __breadahead(struct block_device *, sector_t block, unsigned int size); 182void __breadahead(struct block_device *, sector_t block, unsigned int size);
183struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size); 183struct buffer_head *__bread_gfp(struct block_device *,
184 sector_t block, unsigned size, gfp_t gfp);
184void invalidate_bh_lrus(void); 185void invalidate_bh_lrus(void);
185struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); 186struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
186void free_buffer_head(struct buffer_head * bh); 187void free_buffer_head(struct buffer_head * bh);
@@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh)
295static inline struct buffer_head * 296static inline struct buffer_head *
296sb_bread(struct super_block *sb, sector_t block) 297sb_bread(struct super_block *sb, sector_t block)
297{ 298{
298 return __bread(sb->s_bdev, block, sb->s_blocksize); 299 return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
300}
301
302static inline struct buffer_head *
303sb_bread_unmovable(struct super_block *sb, sector_t block)
304{
305 return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
299} 306}
300 307
301static inline void 308static inline void
@@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block)
307static inline struct buffer_head * 314static inline struct buffer_head *
308sb_getblk(struct super_block *sb, sector_t block) 315sb_getblk(struct super_block *sb, sector_t block)
309{ 316{
310 return __getblk(sb->s_bdev, block, sb->s_blocksize); 317 return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
311} 318}
312 319
313static inline struct buffer_head * 320static inline struct buffer_head *
@@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh)
344 __lock_buffer(bh); 351 __lock_buffer(bh);
345} 352}
346 353
354static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
355 sector_t block,
356 unsigned size)
357{
358 return __getblk_gfp(bdev, block, size, 0);
359}
360
361static inline struct buffer_head *__getblk(struct block_device *bdev,
362 sector_t block,
363 unsigned size)
364{
365 return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
366}
367
368/**
369 * __bread() - reads a specified block and returns the bh
370 * @bdev: the block_device to read from
371 * @block: number of block
372 * @size: size (in bytes) to read
373 *
374 * Reads a specified block, and returns buffer head that contains it.
375 * The page cache is allocated from movable area so that it can be migrated.
376 * It returns NULL if the block was unreadable.
377 */
378static inline struct buffer_head *
379__bread(struct block_device *bdev, sector_t block, unsigned size)
380{
381 return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
382}
383
347extern int __set_page_dirty_buffers(struct page *page); 384extern int __set_page_dirty_buffers(struct page *page);
348 385
349#else /* CONFIG_BLOCK */ 386#else /* CONFIG_BLOCK */
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 653f0e2b6ca9..abcafaa20b86 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -287,7 +287,7 @@ extern struct clocksource* clocksource_get_next(void);
287extern void clocksource_change_rating(struct clocksource *cs, int rating); 287extern void clocksource_change_rating(struct clocksource *cs, int rating);
288extern void clocksource_suspend(void); 288extern void clocksource_suspend(void);
289extern void clocksource_resume(void); 289extern void clocksource_resume(void);
290extern struct clocksource * __init __weak clocksource_default_clock(void); 290extern struct clocksource * __init clocksource_default_clock(void);
291extern void clocksource_mark_unstable(struct clocksource *cs); 291extern void clocksource_mark_unstable(struct clocksource *cs);
292 292
293extern u64 293extern u64
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
new file mode 100644
index 000000000000..0414009e2c30
--- /dev/null
+++ b/include/linux/cpufreq-dt.h
@@ -0,0 +1,22 @@
1/*
2 * Copyright (C) 2014 Marvell
3 * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef __CPUFREQ_DT_H__
11#define __CPUFREQ_DT_H__
12
13struct cpufreq_dt_platform_data {
14 /*
15 * True when each CPU has its own clock to control its
16 * frequency, false when all CPUs are controlled by a single
17 * clock.
18 */
19 bool independent_clocks;
20};
21
22#endif /* __CPUFREQ_DT_H__ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 138336b6bb04..503b085b7832 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -219,6 +219,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
219struct cpufreq_driver { 219struct cpufreq_driver {
220 char name[CPUFREQ_NAME_LEN]; 220 char name[CPUFREQ_NAME_LEN];
221 u8 flags; 221 u8 flags;
222 void *driver_data;
222 223
223 /* needed by all drivers */ 224 /* needed by all drivers */
224 int (*init) (struct cpufreq_policy *policy); 225 int (*init) (struct cpufreq_policy *policy);
@@ -312,6 +313,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data);
312int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); 313int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
313 314
314const char *cpufreq_get_current_driver(void); 315const char *cpufreq_get_current_driver(void);
316void *cpufreq_get_driver_data(void);
315 317
316static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, 318static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
317 unsigned int min, unsigned int max) 319 unsigned int min, unsigned int max)
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 72ab536ad3de..3849fce7ecfe 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -14,14 +14,13 @@
14extern unsigned long long elfcorehdr_addr; 14extern unsigned long long elfcorehdr_addr;
15extern unsigned long long elfcorehdr_size; 15extern unsigned long long elfcorehdr_size;
16 16
17extern int __weak elfcorehdr_alloc(unsigned long long *addr, 17extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
18 unsigned long long *size); 18extern void elfcorehdr_free(unsigned long long addr);
19extern void __weak elfcorehdr_free(unsigned long long addr); 19extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos);
20extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos); 20extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
21extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); 21extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
22extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, 22 unsigned long from, unsigned long pfn,
23 unsigned long from, unsigned long pfn, 23 unsigned long size, pgprot_t prot);
24 unsigned long size, pgprot_t prot);
25 24
26extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, 25extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
27 unsigned long, int); 26 unsigned long, int);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 45cb4ffdea62..0949f9c7e872 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -92,6 +92,7 @@ typedef struct {
92#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ 92#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */
93#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ 93#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */
94#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ 94#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */
95#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */
95#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ 96#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */
96#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ 97#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */
97#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ 98#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */
@@ -502,6 +503,10 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char
502typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, 503typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor,
503 u32 attr, unsigned long data_size, 504 u32 attr, unsigned long data_size,
504 void *data); 505 void *data);
506typedef efi_status_t
507efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor,
508 u32 attr, unsigned long data_size, void *data);
509
505typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count); 510typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
506typedef void efi_reset_system_t (int reset_type, efi_status_t status, 511typedef void efi_reset_system_t (int reset_type, efi_status_t status,
507 unsigned long data_size, efi_char16_t *data); 512 unsigned long data_size, efi_char16_t *data);
@@ -821,6 +826,7 @@ extern struct efi {
821 efi_get_variable_t *get_variable; 826 efi_get_variable_t *get_variable;
822 efi_get_next_variable_t *get_next_variable; 827 efi_get_next_variable_t *get_next_variable;
823 efi_set_variable_t *set_variable; 828 efi_set_variable_t *set_variable;
829 efi_set_variable_nonblocking_t *set_variable_nonblocking;
824 efi_query_variable_info_t *query_variable_info; 830 efi_query_variable_info_t *query_variable_info;
825 efi_update_capsule_t *update_capsule; 831 efi_update_capsule_t *update_capsule;
826 efi_query_capsule_caps_t *query_capsule_caps; 832 efi_query_capsule_caps_t *query_capsule_caps;
@@ -886,6 +892,13 @@ extern bool efi_poweroff_required(void);
886 (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ 892 (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
887 (md) = (void *)(md) + (m)->desc_size) 893 (md) = (void *)(md) + (m)->desc_size)
888 894
895/*
896 * Format an EFI memory descriptor's type and attributes to a user-provided
897 * character buffer, as per snprintf(), and return the buffer.
898 */
899char * __init efi_md_typeattr_format(char *buf, size_t size,
900 const efi_memory_desc_t *md);
901
889/** 902/**
890 * efi_range_is_wc - check the WC bit on an address range 903 * efi_range_is_wc - check the WC bit on an address range
891 * @start: starting kvirt address 904 * @start: starting kvirt address
@@ -1034,6 +1047,7 @@ struct efivar_operations {
1034 efi_get_variable_t *get_variable; 1047 efi_get_variable_t *get_variable;
1035 efi_get_next_variable_t *get_next_variable; 1048 efi_get_next_variable_t *get_next_variable;
1036 efi_set_variable_t *set_variable; 1049 efi_set_variable_t *set_variable;
1050 efi_set_variable_nonblocking_t *set_variable_nonblocking;
1037 efi_query_variable_store_t *query_variable_store; 1051 efi_query_variable_store_t *query_variable_store;
1038}; 1052};
1039 1053
@@ -1227,4 +1241,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
1227 unsigned long *load_addr, 1241 unsigned long *load_addr,
1228 unsigned long *load_size); 1242 unsigned long *load_size);
1229 1243
1244efi_status_t efi_parse_options(char *cmdline);
1245
1246bool efi_runtime_disabled(void);
1230#endif /* _LINUX_EFI_H */ 1247#endif /* _LINUX_EFI_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d4366c24..4e41a4a331bb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -223,6 +223,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
223#define ATTR_TIMES_SET (1 << 16) 223#define ATTR_TIMES_SET (1 << 16)
224 224
225/* 225/*
226 * Whiteout is represented by a char device. The following constants define the
227 * mode and device number to use.
228 */
229#define WHITEOUT_MODE 0
230#define WHITEOUT_DEV 0
231
232/*
226 * This is the Inode Attributes structure, used for notify_change(). It 233 * This is the Inode Attributes structure, used for notify_change(). It
227 * uses the above definitions as flags, to know which values have changed. 234 * uses the above definitions as flags, to know which values have changed.
228 * Also, in this manner, a Filesystem can look at only the values it cares 235 * Also, in this manner, a Filesystem can look at only the values it cares
@@ -254,6 +261,12 @@ struct iattr {
254 */ 261 */
255#include <linux/quota.h> 262#include <linux/quota.h>
256 263
264/*
265 * Maximum number of layers of fs stack. Needs to be limited to
266 * prevent kernel stack overflow
267 */
268#define FILESYSTEM_MAX_STACK_DEPTH 2
269
257/** 270/**
258 * enum positive_aop_returns - aop return codes with specific semantics 271 * enum positive_aop_returns - aop return codes with specific semantics
259 * 272 *
@@ -1266,6 +1279,11 @@ struct super_block {
1266 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; 1279 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
1267 struct list_lru s_inode_lru ____cacheline_aligned_in_smp; 1280 struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
1268 struct rcu_head rcu; 1281 struct rcu_head rcu;
1282
1283 /*
1284 * Indicates how deep in a filesystem stack this SB is
1285 */
1286 int s_stack_depth;
1269}; 1287};
1270 1288
1271extern struct timespec current_fs_time(struct super_block *sb); 1289extern struct timespec current_fs_time(struct super_block *sb);
@@ -1398,6 +1416,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
1398extern int vfs_rmdir(struct inode *, struct dentry *); 1416extern int vfs_rmdir(struct inode *, struct dentry *);
1399extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); 1417extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
1400extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); 1418extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
1419extern int vfs_whiteout(struct inode *, struct dentry *);
1401 1420
1402/* 1421/*
1403 * VFS dentry helper functions. 1422 * VFS dentry helper functions.
@@ -1528,6 +1547,9 @@ struct inode_operations {
1528 umode_t create_mode, int *opened); 1547 umode_t create_mode, int *opened);
1529 int (*tmpfile) (struct inode *, struct dentry *, umode_t); 1548 int (*tmpfile) (struct inode *, struct dentry *, umode_t);
1530 int (*set_acl)(struct inode *, struct posix_acl *, int); 1549 int (*set_acl)(struct inode *, struct posix_acl *, int);
1550
1551 /* WARNING: probably going away soon, do not use! */
1552 int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
1531} ____cacheline_aligned; 1553} ____cacheline_aligned;
1532 1554
1533ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 1555ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
@@ -1625,6 +1647,9 @@ struct super_operations {
1625#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) 1647#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
1626#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) 1648#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
1627 1649
1650#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
1651 (inode)->i_rdev == WHITEOUT_DEV)
1652
1628/* 1653/*
1629 * Inode state bits. Protected by inode->i_lock 1654 * Inode state bits. Protected by inode->i_lock
1630 * 1655 *
@@ -2040,6 +2065,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t);
2040extern struct file *filp_open(const char *, int, umode_t); 2065extern struct file *filp_open(const char *, int, umode_t);
2041extern struct file *file_open_root(struct dentry *, struct vfsmount *, 2066extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2042 const char *, int); 2067 const char *, int);
2068extern int vfs_open(const struct path *, struct file *, const struct cred *);
2043extern struct file * dentry_open(const struct path *, int, const struct cred *); 2069extern struct file * dentry_open(const struct path *, int, const struct cred *);
2044extern int filp_close(struct file *, fl_owner_t id); 2070extern int filp_close(struct file *, fl_owner_t id);
2045 2071
@@ -2253,7 +2279,9 @@ extern sector_t bmap(struct inode *, sector_t);
2253#endif 2279#endif
2254extern int notify_change(struct dentry *, struct iattr *, struct inode **); 2280extern int notify_change(struct dentry *, struct iattr *, struct inode **);
2255extern int inode_permission(struct inode *, int); 2281extern int inode_permission(struct inode *, int);
2282extern int __inode_permission(struct inode *, int);
2256extern int generic_permission(struct inode *, int); 2283extern int generic_permission(struct inode *, int);
2284extern int __check_sticky(struct inode *dir, struct inode *inode);
2257 2285
2258static inline bool execute_ok(struct inode *inode) 2286static inline bool execute_ok(struct inode *inode)
2259{ 2287{
@@ -2452,6 +2480,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
2452 struct file *, loff_t *, size_t, unsigned int); 2480 struct file *, loff_t *, size_t, unsigned int);
2453extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, 2481extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
2454 struct file *out, loff_t *, size_t len, unsigned int flags); 2482 struct file *out, loff_t *, size_t len, unsigned int flags);
2483extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
2484 loff_t *opos, size_t len, unsigned int flags);
2485
2455 2486
2456extern void 2487extern void
2457file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); 2488file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
@@ -2737,6 +2768,14 @@ static inline int is_sxid(umode_t mode)
2737 return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); 2768 return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
2738} 2769}
2739 2770
2771static inline int check_sticky(struct inode *dir, struct inode *inode)
2772{
2773 if (!(dir->i_mode & S_ISVTX))
2774 return 0;
2775
2776 return __check_sticky(dir, inode);
2777}
2778
2740static inline void inode_has_no_xattr(struct inode *inode) 2779static inline void inode_has_no_xattr(struct inode *inode)
2741{ 2780{
2742 if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) 2781 if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0dae71e9971c..704b9a599b26 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
1042extern void jbd2_journal_commit_transaction(journal_t *); 1042extern void jbd2_journal_commit_transaction(journal_t *);
1043 1043
1044/* Checkpoint list management */ 1044/* Checkpoint list management */
1045int __jbd2_journal_clean_checkpoint_list(journal_t *journal); 1045void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
1046int __jbd2_journal_remove_checkpoint(struct journal_head *); 1046int __jbd2_journal_remove_checkpoint(struct journal_head *);
1047void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); 1047void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
1048 1048
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 40728cf1c452..3d770f5564b8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -403,6 +403,7 @@ int vsscanf(const char *, const char *, va_list);
403extern int get_option(char **str, int *pint); 403extern int get_option(char **str, int *pint);
404extern char *get_options(const char *str, int nints, int *ints); 404extern char *get_options(const char *str, int nints, int *ints);
405extern unsigned long long memparse(const char *ptr, char **retptr); 405extern unsigned long long memparse(const char *ptr, char **retptr);
406extern bool parse_option_str(const char *str, const char *option);
406 407
407extern int core_kernel_text(unsigned long addr); 408extern int core_kernel_text(unsigned long addr);
408extern int core_kernel_data(unsigned long addr); 409extern int core_kernel_data(unsigned long addr);
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6b06d378f3df..e465bb15912d 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -283,7 +283,7 @@ struct kgdb_io {
283 283
284extern struct kgdb_arch arch_kgdb_ops; 284extern struct kgdb_arch arch_kgdb_ops;
285 285
286extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); 286extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs);
287 287
288#ifdef CONFIG_SERIAL_KGDB_NMI 288#ifdef CONFIG_SERIAL_KGDB_NMI
289extern int kgdb_register_nmi_console(void); 289extern int kgdb_register_nmi_console(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 28be31f49250..ea53b04993f2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
1080void kvm_device_put(struct kvm_device *dev); 1080void kvm_device_put(struct kvm_device *dev);
1081struct kvm_device *kvm_device_from_filp(struct file *filp); 1081struct kvm_device *kvm_device_from_filp(struct file *filp);
1082int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); 1082int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1083void kvm_unregister_device_ops(u32 type);
1083 1084
1084extern struct kvm_device_ops kvm_mpic_ops; 1085extern struct kvm_device_ops kvm_mpic_ops;
1085extern struct kvm_device_ops kvm_xics_ops; 1086extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e43686472197..a57611d0c94e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,8 +13,8 @@
13#define __LINUX_LEDS_H_INCLUDED 13#define __LINUX_LEDS_H_INCLUDED
14 14
15#include <linux/list.h> 15#include <linux/list.h>
16#include <linux/spinlock.h>
17#include <linux/rwsem.h> 16#include <linux/rwsem.h>
17#include <linux/spinlock.h>
18#include <linux/timer.h> 18#include <linux/timer.h>
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20 20
@@ -31,8 +31,8 @@ enum led_brightness {
31 31
32struct led_classdev { 32struct led_classdev {
33 const char *name; 33 const char *name;
34 int brightness; 34 enum led_brightness brightness;
35 int max_brightness; 35 enum led_brightness max_brightness;
36 int flags; 36 int flags;
37 37
38 /* Lower 16 bits reflect status */ 38 /* Lower 16 bits reflect status */
@@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
140 */ 140 */
141extern void led_set_brightness(struct led_classdev *led_cdev, 141extern void led_set_brightness(struct led_classdev *led_cdev,
142 enum led_brightness brightness); 142 enum led_brightness brightness);
143/**
144 * led_update_brightness - update LED brightness
145 * @led_cdev: the LED to query
146 *
147 * Get an LED's current brightness and update led_cdev->brightness
148 * member with the obtained value.
149 *
150 * Returns: 0 on success or negative error value on failure
151 */
152extern int led_update_brightness(struct led_classdev *led_cdev);
143 153
144/* 154/*
145 * LED Triggers 155 * LED Triggers
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 000000000000..307d9cab2026
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
1/*
2 * Copyright (C) 2013-2014 Linaro Ltd.
3 * Author: Jassi Brar <jassisinghbrar@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef __MAILBOX_CLIENT_H
11#define __MAILBOX_CLIENT_H
12
13#include <linux/of.h>
14#include <linux/device.h>
15
16struct mbox_chan;
17
18/**
19 * struct mbox_client - User of a mailbox
20 * @dev: The client device
21 * @tx_block: If the mbox_send_message should block until data is
22 * transmitted.
23 * @tx_tout: Max block period in ms before TX is assumed failure
24 * @knows_txdone: If the client could run the TX state machine. Usually
25 * if the client receives some ACK packet for transmission.
26 * Unused if the controller already has TX_Done/RTR IRQ.
27 * @rx_callback: Atomic callback to provide client the data received
28 * @tx_done: Atomic callback to tell client of data transmission
29 */
30struct mbox_client {
31 struct device *dev;
32 bool tx_block;
33 unsigned long tx_tout;
34 bool knows_txdone;
35
36 void (*rx_callback)(struct mbox_client *cl, void *mssg);
37 void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
38};
39
40struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
41int mbox_send_message(struct mbox_chan *chan, void *mssg);
42void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
43bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
44void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
45
46#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 000000000000..d4cf96f07cfc
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,133 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation.
5 */
6
7#ifndef __MAILBOX_CONTROLLER_H
8#define __MAILBOX_CONTROLLER_H
9
10#include <linux/of.h>
11#include <linux/types.h>
12#include <linux/timer.h>
13#include <linux/device.h>
14#include <linux/completion.h>
15
16struct mbox_chan;
17
18/**
19 * struct mbox_chan_ops - methods to control mailbox channels
20 * @send_data: The API asks the MBOX controller driver, in atomic
21 * context try to transmit a message on the bus. Returns 0 if
22 * data is accepted for transmission, -EBUSY while rejecting
23 * if the remote hasn't yet read the last data sent. Actual
24 * transmission of data is reported by the controller via
25 * mbox_chan_txdone (if it has some TX ACK irq). It must not
26 * sleep.
27 * @startup: Called when a client requests the chan. The controller
28 * could ask clients for additional parameters of communication
29 * to be provided via client's chan_data. This call may
30 * block. After this call the Controller must forward any
31 * data received on the chan by calling mbox_chan_received_data.
32 * The controller may do stuff that need to sleep.
33 * @shutdown: Called when a client relinquishes control of a chan.
34 * This call may block too. The controller must not forward
35 * any received data anymore.
36 * The controller may do stuff that need to sleep.
37 * @last_tx_done: If the controller sets 'txdone_poll', the API calls
38 * this to poll status of last TX. The controller must
39 * give priority to IRQ method over polling and never
40 * set both txdone_poll and txdone_irq. Only in polling
41 * mode 'send_data' is expected to return -EBUSY.
42 * The controller may do stuff that need to sleep/block.
43 * Used only if txdone_poll:=true && txdone_irq:=false
44 * @peek_data: Atomic check for any received data. Return true if controller
45 * has some data to push to the client. False otherwise.
46 */
47struct mbox_chan_ops {
48 int (*send_data)(struct mbox_chan *chan, void *data);
49 int (*startup)(struct mbox_chan *chan);
50 void (*shutdown)(struct mbox_chan *chan);
51 bool (*last_tx_done)(struct mbox_chan *chan);
52 bool (*peek_data)(struct mbox_chan *chan);
53};
54
55/**
56 * struct mbox_controller - Controller of a class of communication channels
57 * @dev: Device backing this controller
58 * @ops: Operators that work on each communication chan
59 * @chans: Array of channels
60 * @num_chans: Number of channels in the 'chans' array.
61 * @txdone_irq: Indicates if the controller can report to API when
62 * the last transmitted data was read by the remote.
63 * Eg, if it has some TX ACK irq.
64 * @txdone_poll: If the controller can read but not report the TX
65 * done. Ex, some register shows the TX status but
66 * no interrupt rises. Ignored if 'txdone_irq' is set.
67 * @txpoll_period: If 'txdone_poll' is in effect, the API polls for
68 * last TX's status after these many millisecs
69 * @of_xlate: Controller driver specific mapping of channel via DT
70 * @poll: API private. Used to poll for TXDONE on all channels.
71 * @node: API private. To hook into list of controllers.
72 */
73struct mbox_controller {
74 struct device *dev;
75 struct mbox_chan_ops *ops;
76 struct mbox_chan *chans;
77 int num_chans;
78 bool txdone_irq;
79 bool txdone_poll;
80 unsigned txpoll_period;
81 struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
82 const struct of_phandle_args *sp);
83 /* Internal to API */
84 struct timer_list poll;
85 struct list_head node;
86};
87
88/*
89 * The length of circular buffer for queuing messages from a client.
90 * 'msg_count' tracks the number of buffered messages while 'msg_free'
91 * is the index where the next message would be buffered.
92 * We shouldn't need it too big because every transfer is interrupt
93 * triggered and if we have lots of data to transfer, the interrupt
94 * latencies are going to be the bottleneck, not the buffer length.
95 * Besides, mbox_send_message could be called from atomic context and
96 * the client could also queue another message from the notifier 'tx_done'
97 * of the last transfer done.
98 * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
99 * print, it needs to be taken from config option or somesuch.
100 */
101#define MBOX_TX_QUEUE_LEN 20
102
103/**
104 * struct mbox_chan - s/w representation of a communication chan
105 * @mbox: Pointer to the parent/provider of this channel
106 * @txdone_method: Way to detect TXDone chosen by the API
107 * @cl: Pointer to the current owner of this channel
108 * @tx_complete: Transmission completion
109 * @active_req: Currently active request hook
110 * @msg_count: No. of mssg currently queued
111 * @msg_free: Index of next available mssg slot
112 * @msg_data: Hook for data packet
113 * @lock: Serialise access to the channel
114 * @con_priv: Hook for controller driver to attach private data
115 */
116struct mbox_chan {
117 struct mbox_controller *mbox;
118 unsigned txdone_method;
119 struct mbox_client *cl;
120 struct completion tx_complete;
121 void *active_req;
122 unsigned msg_count, msg_free;
123 void *msg_data[MBOX_TX_QUEUE_LEN];
124 spinlock_t lock; /* Serialise access to the channel */
125 void *con_priv;
126};
127
128int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
129void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
130void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
131void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
132
133#endif /* __MAILBOX_CONTROLLER_H */
diff --git a/include/linux/memory.h b/include/linux/memory.h
index bb7384e3c3d8..8b8d8d12348e 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -35,7 +35,7 @@ struct memory_block {
35}; 35};
36 36
37int arch_get_memory_phys_device(unsigned long start_pfn); 37int arch_get_memory_phys_device(unsigned long start_pfn);
38unsigned long __weak memory_block_size_bytes(void); 38unsigned long memory_block_size_bytes(void);
39 39
40/* These states are exposed to userspace as text strings in sysfs */ 40/* These states are exposed to userspace as text strings in sysfs */
41#define MEM_ONLINE (1<<0) /* exposed to userspace */ 41#define MEM_ONLINE (1<<0) /* exposed to userspace */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 02d11ee7f19d..27eb1bfbe704 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1176,6 +1176,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
1176 1176
1177extern void truncate_pagecache(struct inode *inode, loff_t new); 1177extern void truncate_pagecache(struct inode *inode, loff_t new);
1178extern void truncate_setsize(struct inode *inode, loff_t newsize); 1178extern void truncate_setsize(struct inode *inode, loff_t newsize);
1179void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
1179void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); 1180void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
1180int truncate_inode_page(struct address_space *mapping, struct page *page); 1181int truncate_inode_page(struct address_space *mapping, struct page *page);
1181int generic_error_remove_page(struct address_space *mapping, struct page *page); 1182int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 9262e4bf0cc3..c2c561dc0114 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt);
81extern struct vfsmount *mnt_clone_internal(struct path *path); 81extern struct vfsmount *mnt_clone_internal(struct path *path);
82extern int __mnt_is_readonly(struct vfsmount *mnt); 82extern int __mnt_is_readonly(struct vfsmount *mnt);
83 83
84struct path;
85extern struct vfsmount *clone_private_mount(struct path *path);
86
84struct file_system_type; 87struct file_system_type;
85extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, 88extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
86 int flags, const char *name, 89 int flags, const char *name,
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 647395a1a550..e8d6e1058723 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
50extern unsigned long oom_badness(struct task_struct *p, 50extern unsigned long oom_badness(struct task_struct *p,
51 struct mem_cgroup *memcg, const nodemask_t *nodemask, 51 struct mem_cgroup *memcg, const nodemask_t *nodemask,
52 unsigned long totalpages); 52 unsigned long totalpages);
53
54extern int oom_kills_count(void);
55extern void note_oom_kill(void);
53extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, 56extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
54 unsigned int points, unsigned long totalpages, 57 unsigned int points, unsigned long totalpages,
55 struct mem_cgroup *memcg, nodemask_t *nodemask, 58 struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/include/linux/mailbox.h b/include/linux/pl320-ipc.h
index 5161f63ec1c8..5161f63ec1c8 100644
--- a/include/linux/mailbox.h
+++ b/include/linux/pl320-ipc.h
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 9ab4bf7c4646..636e82834506 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -15,6 +15,7 @@ enum {
15 PM_QOS_CPU_DMA_LATENCY, 15 PM_QOS_CPU_DMA_LATENCY,
16 PM_QOS_NETWORK_LATENCY, 16 PM_QOS_NETWORK_LATENCY,
17 PM_QOS_NETWORK_THROUGHPUT, 17 PM_QOS_NETWORK_THROUGHPUT,
18 PM_QOS_MEMORY_BANDWIDTH,
18 19
19 /* insert new class ID */ 20 /* insert new class ID */
20 PM_QOS_NUM_CLASSES, 21 PM_QOS_NUM_CLASSES,
@@ -32,6 +33,7 @@ enum pm_qos_flags_status {
32#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) 33#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
33#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) 34#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
34#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 35#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0
36#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0
35#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 37#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0
36#define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 38#define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0
37#define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) 39#define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1)
@@ -69,7 +71,8 @@ struct dev_pm_qos_request {
69enum pm_qos_type { 71enum pm_qos_type {
70 PM_QOS_UNITIALIZED, 72 PM_QOS_UNITIALIZED,
71 PM_QOS_MAX, /* return the largest value */ 73 PM_QOS_MAX, /* return the largest value */
72 PM_QOS_MIN /* return the smallest value */ 74 PM_QOS_MIN, /* return the smallest value */
75 PM_QOS_SUM /* return the sum */
73}; 76};
74 77
75/* 78/*
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index fe25876c1a5d..17d7d0d20eca 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -5,7 +5,7 @@
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Benny Halevy <bhalevy@panasas.com> 7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com> 8 * Boaz Harrosh <ooo@electrozaur.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 11 * it under the terms of the GNU General Public License version 2
diff --git a/include/linux/string.h b/include/linux/string.h
index e6edfe51575a..2e22a2e58f3a 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -132,7 +132,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
132#endif 132#endif
133 133
134extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 134extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
135 const void *from, size_t available); 135 const void *from, size_t available);
136 136
137/** 137/**
138 * strstarts - does @str start with @prefix? 138 * strstarts - does @str start with @prefix?
@@ -144,7 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)
144 return strncmp(str, prefix, strlen(prefix)) == 0; 144 return strncmp(str, prefix, strlen(prefix)) == 0;
145} 145}
146 146
147extern size_t memweight(const void *ptr, size_t bytes); 147size_t memweight(const void *ptr, size_t bytes);
148void memzero_explicit(void *s, size_t count);
148 149
149/** 150/**
150 * kbasename - return the last part of a pathname. 151 * kbasename - return the last part of a pathname.
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0305cde21a74..ef90838b36a0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,6 +44,10 @@
44#define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ 44#define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \
45 ((long)t-2732+5)/10 : ((long)t-2732-5)/10) 45 ((long)t-2732+5)/10 : ((long)t-2732-5)/10)
46#define CELSIUS_TO_KELVIN(t) ((t)*10+2732) 46#define CELSIUS_TO_KELVIN(t) ((t)*10+2732)
47#define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
48#define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
49#define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
50#define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
47 51
48/* Adding event notification support elements */ 52/* Adding event notification support elements */
49#define THERMAL_GENL_FAMILY_NAME "thermal_event" 53#define THERMAL_GENL_FAMILY_NAME "thermal_event"
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 1ad4724458de..baa81718d985 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -63,7 +63,17 @@ struct uio_port {
63 63
64#define MAX_UIO_PORT_REGIONS 5 64#define MAX_UIO_PORT_REGIONS 5
65 65
66struct uio_device; 66struct uio_device {
67 struct module *owner;
68 struct device *dev;
69 int minor;
70 atomic_t event;
71 struct fasync_struct *async_queue;
72 wait_queue_head_t wait;
73 struct uio_info *info;
74 struct kobject *map_dir;
75 struct kobject *portio_dir;
76};
67 77
68/** 78/**
69 * struct uio_info - UIO device capabilities 79 * struct uio_info - UIO device capabilities
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f844c6b03ee..60beb5dc7977 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -98,11 +98,11 @@ struct uprobes_state {
98 struct xol_area *xol_area; 98 struct xol_area *xol_area;
99}; 99};
100 100
101extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 101extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
102extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 102extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
103extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 103extern bool is_swbp_insn(uprobe_opcode_t *insn);
104extern bool __weak is_trap_insn(uprobe_opcode_t *insn); 104extern bool is_trap_insn(uprobe_opcode_t *insn);
105extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); 105extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
106extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); 106extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
107extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); 107extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
108extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 108extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
@@ -128,8 +128,8 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
128extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); 128extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
129extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); 129extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
130extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); 130extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
131extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); 131extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
132extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, 132extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
133 void *src, unsigned long len); 133 void *src, unsigned long len);
134#else /* !CONFIG_UPROBES */ 134#else /* !CONFIG_UPROBES */
135struct uprobes_state { 135struct uprobes_state {
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 2a3038ee17a3..395b70e0eccf 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -97,13 +97,8 @@ struct watchdog_device {
97#define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ 97#define WDOG_UNREGISTERED 4 /* Has the device been unregistered */
98}; 98};
99 99
100#ifdef CONFIG_WATCHDOG_NOWAYOUT 100#define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT)
101#define WATCHDOG_NOWAYOUT 1 101#define WATCHDOG_NOWAYOUT_INIT_STATUS (WATCHDOG_NOWAYOUT << WDOG_NO_WAY_OUT)
102#define WATCHDOG_NOWAYOUT_INIT_STATUS (1 << WDOG_NO_WAY_OUT)
103#else
104#define WATCHDOG_NOWAYOUT 0
105#define WATCHDOG_NOWAYOUT_INIT_STATUS 0
106#endif
107 102
108/* Use the following function to check whether or not the watchdog is active */ 103/* Use the following function to check whether or not the watchdog is active */
109static inline bool watchdog_active(struct watchdog_device *wdd) 104static inline bool watchdog_active(struct watchdog_device *wdd)
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b2e85fdd2ae0..a09cca829082 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2008 Panasas Inc. All rights reserved. 4 * Copyright (C) 2008 Panasas Inc. All rights reserved.
5 * 5 *
6 * Authors: 6 * Authors:
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
index 6ca3265a4dca..7a8d2cd30328 100644
--- a/include/scsi/osd_ore.h
+++ b/include/scsi/osd_ore.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2011 2 * Copyright (C) 2011
3 * Boaz Harrosh <bharrosh@panasas.com> 3 * Boaz Harrosh <ooo@electrozaur.com>
4 * 4 *
5 * Public Declarations of the ORE API 5 * Public Declarations of the ORE API
6 * 6 *
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index a2594afe05c7..e0ca835e7bf7 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2008 Panasas Inc. All rights reserved. 4 * Copyright (C) 2008 Panasas Inc. All rights reserved.
5 * 5 *
6 * Authors: 6 * Authors:
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
@@ -496,7 +496,7 @@ struct osd_timestamp {
496 */ 496 */
497 497
498struct osd_key_identifier { 498struct osd_key_identifier {
499 u8 id[7]; /* if you know why 7 please email bharrosh@panasas.com */ 499 u8 id[7]; /* if you know why 7 please email ooo@electrozaur.com */
500} __packed; 500} __packed;
501 501
502/* for osd_capability.format */ 502/* for osd_capability.format */
diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h
index f96151c9c9e8..7abeb0f0db30 100644
--- a/include/scsi/osd_sec.h
+++ b/include/scsi/osd_sec.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2008 Panasas Inc. All rights reserved. 4 * Copyright (C) 2008 Panasas Inc. All rights reserved.
5 * 5 *
6 * Authors: 6 * Authors:
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h
index 91db543a5502..d52aa93a0b2d 100644
--- a/include/scsi/osd_sense.h
+++ b/include/scsi/osd_sense.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2008 Panasas Inc. All rights reserved. 4 * Copyright (C) 2008 Panasas Inc. All rights reserved.
5 * 5 *
6 * Authors: 6 * Authors:
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h
index bd0be7ed4bcf..48e8a165e136 100644
--- a/include/scsi/osd_types.h
+++ b/include/scsi/osd_types.h
@@ -4,7 +4,7 @@
4 * Copyright (C) 2008 Panasas Inc. All rights reserved. 4 * Copyright (C) 2008 Panasas Inc. All rights reserved.
5 * 5 *
6 * Authors: 6 * Authors:
7 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <ooo@electrozaur.com>
8 * Benny Halevy <bhalevy@panasas.com> 8 * Benny Halevy <bhalevy@panasas.com>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 9ec9864ecf38..23c518a0340c 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -108,6 +108,8 @@
108#define DA_EMULATE_ALUA 0 108#define DA_EMULATE_ALUA 0
109/* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */ 109/* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
110#define DA_ENFORCE_PR_ISIDS 1 110#define DA_ENFORCE_PR_ISIDS 1
111/* Force SPC-3 PR Activate Persistence across Target Power Loss */
112#define DA_FORCE_PR_APTPL 0
111#define DA_STATUS_MAX_SECTORS_MIN 16 113#define DA_STATUS_MAX_SECTORS_MIN 16
112#define DA_STATUS_MAX_SECTORS_MAX 8192 114#define DA_STATUS_MAX_SECTORS_MAX 8192
113/* By default don't report non-rotating (solid state) medium */ 115/* By default don't report non-rotating (solid state) medium */
@@ -680,6 +682,7 @@ struct se_dev_attrib {
680 enum target_prot_type pi_prot_type; 682 enum target_prot_type pi_prot_type;
681 enum target_prot_type hw_pi_prot_type; 683 enum target_prot_type hw_pi_prot_type;
682 int enforce_pr_isids; 684 int enforce_pr_isids;
685 int force_pr_aptpl;
683 int is_nonrot; 686 int is_nonrot;
684 int emulate_rest_reord; 687 int emulate_rest_reord;
685 u32 hw_block_size; 688 u32 hw_block_size;
@@ -903,4 +906,18 @@ struct se_wwn {
903 struct config_group fabric_stat_group; 906 struct config_group fabric_stat_group;
904}; 907};
905 908
909static inline void atomic_inc_mb(atomic_t *v)
910{
911 smp_mb__before_atomic();
912 atomic_inc(v);
913 smp_mb__after_atomic();
914}
915
916static inline void atomic_dec_mb(atomic_t *v)
917{
918 smp_mb__before_atomic();
919 atomic_dec(v);
920 smp_mb__after_atomic();
921}
922
906#endif /* TARGET_CORE_BASE_H */ 923#endif /* TARGET_CORE_BASE_H */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d4f70a7fe876..ff4bd1b35246 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2369,7 +2369,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
2369 show_extent_status(__entry->found ? __entry->status : 0)) 2369 show_extent_status(__entry->found ? __entry->status : 0))
2370); 2370);
2371 2371
2372TRACE_EVENT(ext4_es_shrink_enter, 2372DECLARE_EVENT_CLASS(ext4__es_shrink_enter,
2373 TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), 2373 TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
2374 2374
2375 TP_ARGS(sb, nr_to_scan, cache_cnt), 2375 TP_ARGS(sb, nr_to_scan, cache_cnt),
@@ -2391,26 +2391,38 @@ TRACE_EVENT(ext4_es_shrink_enter,
2391 __entry->nr_to_scan, __entry->cache_cnt) 2391 __entry->nr_to_scan, __entry->cache_cnt)
2392); 2392);
2393 2393
2394TRACE_EVENT(ext4_es_shrink_exit, 2394DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count,
2395 TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt), 2395 TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
2396 2396
2397 TP_ARGS(sb, shrunk_nr, cache_cnt), 2397 TP_ARGS(sb, nr_to_scan, cache_cnt)
2398);
2399
2400DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter,
2401 TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
2402
2403 TP_ARGS(sb, nr_to_scan, cache_cnt)
2404);
2405
2406TRACE_EVENT(ext4_es_shrink_scan_exit,
2407 TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt),
2408
2409 TP_ARGS(sb, nr_shrunk, cache_cnt),
2398 2410
2399 TP_STRUCT__entry( 2411 TP_STRUCT__entry(
2400 __field( dev_t, dev ) 2412 __field( dev_t, dev )
2401 __field( int, shrunk_nr ) 2413 __field( int, nr_shrunk )
2402 __field( int, cache_cnt ) 2414 __field( int, cache_cnt )
2403 ), 2415 ),
2404 2416
2405 TP_fast_assign( 2417 TP_fast_assign(
2406 __entry->dev = sb->s_dev; 2418 __entry->dev = sb->s_dev;
2407 __entry->shrunk_nr = shrunk_nr; 2419 __entry->nr_shrunk = nr_shrunk;
2408 __entry->cache_cnt = cache_cnt; 2420 __entry->cache_cnt = cache_cnt;
2409 ), 2421 ),
2410 2422
2411 TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d", 2423 TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d",
2412 MAJOR(__entry->dev), MINOR(__entry->dev), 2424 MAJOR(__entry->dev), MINOR(__entry->dev),
2413 __entry->shrunk_nr, __entry->cache_cnt) 2425 __entry->nr_shrunk, __entry->cache_cnt)
2414); 2426);
2415 2427
2416TRACE_EVENT(ext4_collapse_range, 2428TRACE_EVENT(ext4_collapse_range,
@@ -2438,6 +2450,37 @@ TRACE_EVENT(ext4_collapse_range,
2438 __entry->offset, __entry->len) 2450 __entry->offset, __entry->len)
2439); 2451);
2440 2452
2453TRACE_EVENT(ext4_es_shrink,
2454 TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time,
2455 int skip_precached, int nr_skipped, int retried),
2456
2457 TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried),
2458
2459 TP_STRUCT__entry(
2460 __field( dev_t, dev )
2461 __field( int, nr_shrunk )
2462 __field( unsigned long long, scan_time )
2463 __field( int, skip_precached )
2464 __field( int, nr_skipped )
2465 __field( int, retried )
2466 ),
2467
2468 TP_fast_assign(
2469 __entry->dev = sb->s_dev;
2470 __entry->nr_shrunk = nr_shrunk;
2471 __entry->scan_time = div_u64(scan_time, 1000);
2472 __entry->skip_precached = skip_precached;
2473 __entry->nr_skipped = nr_skipped;
2474 __entry->retried = retried;
2475 ),
2476
2477 TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d "
2478 "nr_skipped %d retried %d",
2479 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk,
2480 __entry->scan_time, __entry->skip_precached,
2481 __entry->nr_skipped, __entry->retried)
2482);
2483
2441#endif /* _TRACE_EXT4_H */ 2484#endif /* _TRACE_EXT4_H */
2442 2485
2443/* This part must be outside protection */ 2486/* This part must be outside protection */
diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h
new file mode 100644
index 000000000000..0f4f95d63c03
--- /dev/null
+++ b/include/trace/events/thermal.h
@@ -0,0 +1,83 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM thermal
3
4#if !defined(_TRACE_THERMAL_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_THERMAL_H
6
7#include <linux/thermal.h>
8#include <linux/tracepoint.h>
9
10TRACE_EVENT(thermal_temperature,
11
12 TP_PROTO(struct thermal_zone_device *tz),
13
14 TP_ARGS(tz),
15
16 TP_STRUCT__entry(
17 __string(thermal_zone, tz->type)
18 __field(int, id)
19 __field(int, temp_prev)
20 __field(int, temp)
21 ),
22
23 TP_fast_assign(
24 __assign_str(thermal_zone, tz->type);
25 __entry->id = tz->id;
26 __entry->temp_prev = tz->last_temperature;
27 __entry->temp = tz->temperature;
28 ),
29
30 TP_printk("thermal_zone=%s id=%d temp_prev=%d temp=%d",
31 __get_str(thermal_zone), __entry->id, __entry->temp_prev,
32 __entry->temp)
33);
34
35TRACE_EVENT(cdev_update,
36
37 TP_PROTO(struct thermal_cooling_device *cdev, unsigned long target),
38
39 TP_ARGS(cdev, target),
40
41 TP_STRUCT__entry(
42 __string(type, cdev->type)
43 __field(unsigned long, target)
44 ),
45
46 TP_fast_assign(
47 __assign_str(type, cdev->type);
48 __entry->target = target;
49 ),
50
51 TP_printk("type=%s target=%lu", __get_str(type), __entry->target)
52);
53
54TRACE_EVENT(thermal_zone_trip,
55
56 TP_PROTO(struct thermal_zone_device *tz, int trip,
57 enum thermal_trip_type trip_type),
58
59 TP_ARGS(tz, trip, trip_type),
60
61 TP_STRUCT__entry(
62 __string(thermal_zone, tz->type)
63 __field(int, id)
64 __field(int, trip)
65 __field(enum thermal_trip_type, trip_type)
66 ),
67
68 TP_fast_assign(
69 __assign_str(thermal_zone, tz->type);
70 __entry->id = tz->id;
71 __entry->trip = trip;
72 __entry->trip_type = trip_type;
73 ),
74
75 TP_printk("thermal_zone=%s id=%d trip=%d trip_type=%d",
76 __get_str(thermal_zone), __entry->id, __entry->trip,
77 __entry->trip_type)
78);
79
80#endif /* _TRACE_THERMAL_H */
81
82/* This part must be outside protection */
83#include <trace/define_trace.h>
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 6cad97485bad..b70237e8bc37 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -374,6 +374,7 @@ header-y += swab.h
374header-y += synclink.h 374header-y += synclink.h
375header-y += sysctl.h 375header-y += sysctl.h
376header-y += sysinfo.h 376header-y += sysinfo.h
377header-y += target_core_user.h
377header-y += taskstats.h 378header-y += taskstats.h
378header-y += tcp.h 379header-y += tcp.h
379header-y += tcp_metrics.h 380header-y += tcp_metrics.h
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index ca1a11bb4443..3735fa0a6784 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -37,6 +37,7 @@
37 37
38#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ 38#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
39#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ 39#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
40#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
40 41
41struct fstrim_range { 42struct fstrim_range {
42 __u64 start; 43 __u64 start;
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
new file mode 100644
index 000000000000..7dcfbe6771b1
--- /dev/null
+++ b/include/uapi/linux/target_core_user.h
@@ -0,0 +1,142 @@
1#ifndef __TARGET_CORE_USER_H
2#define __TARGET_CORE_USER_H
3
4/* This header will be used by application too */
5
6#include <linux/types.h>
7#include <linux/uio.h>
8
9#ifndef __packed
10#define __packed __attribute__((packed))
11#endif
12
13#define TCMU_VERSION "1.0"
14
15/*
16 * Ring Design
17 * -----------
18 *
19 * The mmaped area is divided into three parts:
20 * 1) The mailbox (struct tcmu_mailbox, below)
21 * 2) The command ring
22 * 3) Everything beyond the command ring (data)
23 *
24 * The mailbox tells userspace the offset of the command ring from the
25 * start of the shared memory region, and how big the command ring is.
26 *
27 * The kernel passes SCSI commands to userspace by putting a struct
28 * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
29 * userspace via uio's interrupt mechanism.
30 *
31 * tcmu_cmd_entry contains a header. If the header type is PAD,
32 * userspace should skip hdr->length bytes (mod cmdr_size) to find the
33 * next cmd_entry.
34 *
35 * Otherwise, the entry will contain offsets into the mmaped area that
36 * contain the cdb and data buffers -- the latter accessible via the
37 * iov array. iov addresses are also offsets into the shared area.
38 *
39 * When userspace is completed handling the command, set
40 * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate,
41 * and also set mailbox->cmd_tail equal to the old cmd_tail plus
42 * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it
43 * should process the next packet the same way, and so on.
44 */
45
46#define TCMU_MAILBOX_VERSION 1
47#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
48
49struct tcmu_mailbox {
50 __u16 version;
51 __u16 flags;
52 __u32 cmdr_off;
53 __u32 cmdr_size;
54
55 __u32 cmd_head;
56
57 /* Updated by user. On its own cacheline */
58 __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
59
60} __packed;
61
62enum tcmu_opcode {
63 TCMU_OP_PAD = 0,
64 TCMU_OP_CMD,
65};
66
67/*
68 * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
69 */
70struct tcmu_cmd_entry_hdr {
71 __u32 len_op;
72} __packed;
73
74#define TCMU_OP_MASK 0x7
75
76static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
77{
78 return hdr->len_op & TCMU_OP_MASK;
79}
80
81static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
82{
83 hdr->len_op &= ~TCMU_OP_MASK;
84 hdr->len_op |= (op & TCMU_OP_MASK);
85}
86
87static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
88{
89 return hdr->len_op & ~TCMU_OP_MASK;
90}
91
92static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
93{
94 hdr->len_op &= TCMU_OP_MASK;
95 hdr->len_op |= len;
96}
97
98/* Currently the same as SCSI_SENSE_BUFFERSIZE */
99#define TCMU_SENSE_BUFFERSIZE 96
100
101struct tcmu_cmd_entry {
102 struct tcmu_cmd_entry_hdr hdr;
103
104 uint16_t cmd_id;
105 uint16_t __pad1;
106
107 union {
108 struct {
109 uint64_t cdb_off;
110 uint64_t iov_cnt;
111 struct iovec iov[0];
112 } req;
113 struct {
114 uint8_t scsi_status;
115 uint8_t __pad1;
116 uint16_t __pad2;
117 uint32_t __pad3;
118 char sense_buffer[TCMU_SENSE_BUFFERSIZE];
119 } rsp;
120 };
121
122} __packed;
123
124#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
125
126enum tcmu_genl_cmd {
127 TCMU_CMD_UNSPEC,
128 TCMU_CMD_ADDED_DEVICE,
129 TCMU_CMD_REMOVED_DEVICE,
130 __TCMU_CMD_MAX,
131};
132#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
133
134enum tcmu_genl_attr {
135 TCMU_ATTR_UNSPEC,
136 TCMU_ATTR_DEVICE,
137 TCMU_ATTR_MINOR,
138 __TCMU_ATTR_MAX,
139};
140#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
141
142#endif
diff --git a/kernel/freezer.c b/kernel/freezer.c
index aa6a8aadb911..a8900a3bc27a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -42,6 +42,9 @@ bool freezing_slow_path(struct task_struct *p)
42 if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) 42 if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
43 return false; 43 return false;
44 44
45 if (test_thread_flag(TIF_MEMDIE))
46 return false;
47
45 if (pm_nosig_freezing || cgroup_freezing(p)) 48 if (pm_nosig_freezing || cgroup_freezing(p))
46 return true; 49 return true;
47 50
@@ -147,12 +150,6 @@ void __thaw_task(struct task_struct *p)
147{ 150{
148 unsigned long flags; 151 unsigned long flags;
149 152
150 /*
151 * Clear freezing and kick @p if FROZEN. Clearing is guaranteed to
152 * be visible to @p as waking up implies wmb. Waking up inside
153 * freezer_lock also prevents wakeups from leaking outside
154 * refrigerator.
155 */
156 spin_lock_irqsave(&freezer_lock, flags); 153 spin_lock_irqsave(&freezer_lock, flags);
157 if (frozen(p)) 154 if (frozen(p))
158 wake_up_process(p); 155 wake_up_process(p);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7b323221b9ee..5a6ec8678b9a 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -46,13 +46,13 @@ static int try_to_freeze_tasks(bool user_only)
46 while (true) { 46 while (true) {
47 todo = 0; 47 todo = 0;
48 read_lock(&tasklist_lock); 48 read_lock(&tasklist_lock);
49 do_each_thread(g, p) { 49 for_each_process_thread(g, p) {
50 if (p == current || !freeze_task(p)) 50 if (p == current || !freeze_task(p))
51 continue; 51 continue;
52 52
53 if (!freezer_should_skip(p)) 53 if (!freezer_should_skip(p))
54 todo++; 54 todo++;
55 } while_each_thread(g, p); 55 }
56 read_unlock(&tasklist_lock); 56 read_unlock(&tasklist_lock);
57 57
58 if (!user_only) { 58 if (!user_only) {
@@ -93,11 +93,11 @@ static int try_to_freeze_tasks(bool user_only)
93 93
94 if (!wakeup) { 94 if (!wakeup) {
95 read_lock(&tasklist_lock); 95 read_lock(&tasklist_lock);
96 do_each_thread(g, p) { 96 for_each_process_thread(g, p) {
97 if (p != current && !freezer_should_skip(p) 97 if (p != current && !freezer_should_skip(p)
98 && freezing(p) && !frozen(p)) 98 && freezing(p) && !frozen(p))
99 sched_show_task(p); 99 sched_show_task(p);
100 } while_each_thread(g, p); 100 }
101 read_unlock(&tasklist_lock); 101 read_unlock(&tasklist_lock);
102 } 102 }
103 } else { 103 } else {
@@ -108,6 +108,30 @@ static int try_to_freeze_tasks(bool user_only)
108 return todo ? -EBUSY : 0; 108 return todo ? -EBUSY : 0;
109} 109}
110 110
111static bool __check_frozen_processes(void)
112{
113 struct task_struct *g, *p;
114
115 for_each_process_thread(g, p)
116 if (p != current && !freezer_should_skip(p) && !frozen(p))
117 return false;
118
119 return true;
120}
121
122/*
123 * Returns true if all freezable tasks (except for current) are frozen already
124 */
125static bool check_frozen_processes(void)
126{
127 bool ret;
128
129 read_lock(&tasklist_lock);
130 ret = __check_frozen_processes();
131 read_unlock(&tasklist_lock);
132 return ret;
133}
134
111/** 135/**
112 * freeze_processes - Signal user space processes to enter the refrigerator. 136 * freeze_processes - Signal user space processes to enter the refrigerator.
113 * The current thread will not be frozen. The same process that calls 137 * The current thread will not be frozen. The same process that calls
@@ -118,6 +142,7 @@ static int try_to_freeze_tasks(bool user_only)
118int freeze_processes(void) 142int freeze_processes(void)
119{ 143{
120 int error; 144 int error;
145 int oom_kills_saved;
121 146
122 error = __usermodehelper_disable(UMH_FREEZING); 147 error = __usermodehelper_disable(UMH_FREEZING);
123 if (error) 148 if (error)
@@ -132,11 +157,25 @@ int freeze_processes(void)
132 pm_wakeup_clear(); 157 pm_wakeup_clear();
133 printk("Freezing user space processes ... "); 158 printk("Freezing user space processes ... ");
134 pm_freezing = true; 159 pm_freezing = true;
160 oom_kills_saved = oom_kills_count();
135 error = try_to_freeze_tasks(true); 161 error = try_to_freeze_tasks(true);
136 if (!error) { 162 if (!error) {
137 printk("done.");
138 __usermodehelper_set_disable_depth(UMH_DISABLED); 163 __usermodehelper_set_disable_depth(UMH_DISABLED);
139 oom_killer_disable(); 164 oom_killer_disable();
165
166 /*
167 * There might have been an OOM kill while we were
168 * freezing tasks and the killed task might be still
169 * on the way out so we have to double check for race.
170 */
171 if (oom_kills_count() != oom_kills_saved &&
172 !check_frozen_processes()) {
173 __usermodehelper_set_disable_depth(UMH_ENABLED);
174 printk("OOM in progress.");
175 error = -EBUSY;
176 } else {
177 printk("done.");
178 }
140 } 179 }
141 printk("\n"); 180 printk("\n");
142 BUG_ON(in_atomic()); 181 BUG_ON(in_atomic());
@@ -191,11 +230,11 @@ void thaw_processes(void)
191 thaw_workqueues(); 230 thaw_workqueues();
192 231
193 read_lock(&tasklist_lock); 232 read_lock(&tasklist_lock);
194 do_each_thread(g, p) { 233 for_each_process_thread(g, p) {
195 /* No other threads should have PF_SUSPEND_TASK set */ 234 /* No other threads should have PF_SUSPEND_TASK set */
196 WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK)); 235 WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
197 __thaw_task(p); 236 __thaw_task(p);
198 } while_each_thread(g, p); 237 }
199 read_unlock(&tasklist_lock); 238 read_unlock(&tasklist_lock);
200 239
201 WARN_ON(!(curr->flags & PF_SUSPEND_TASK)); 240 WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
@@ -218,10 +257,10 @@ void thaw_kernel_threads(void)
218 thaw_workqueues(); 257 thaw_workqueues();
219 258
220 read_lock(&tasklist_lock); 259 read_lock(&tasklist_lock);
221 do_each_thread(g, p) { 260 for_each_process_thread(g, p) {
222 if (p->flags & (PF_KTHREAD | PF_WQ_WORKER)) 261 if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
223 __thaw_task(p); 262 __thaw_task(p);
224 } while_each_thread(g, p); 263 }
225 read_unlock(&tasklist_lock); 264 read_unlock(&tasklist_lock);
226 265
227 schedule(); 266 schedule();
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 884b77058864..5f4c006c4b1e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -105,11 +105,27 @@ static struct pm_qos_object network_throughput_pm_qos = {
105}; 105};
106 106
107 107
108static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier);
109static struct pm_qos_constraints memory_bw_constraints = {
110 .list = PLIST_HEAD_INIT(memory_bw_constraints.list),
111 .target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
112 .default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
113 .no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
114 .type = PM_QOS_SUM,
115 .notifiers = &memory_bandwidth_notifier,
116};
117static struct pm_qos_object memory_bandwidth_pm_qos = {
118 .constraints = &memory_bw_constraints,
119 .name = "memory_bandwidth",
120};
121
122
108static struct pm_qos_object *pm_qos_array[] = { 123static struct pm_qos_object *pm_qos_array[] = {
109 &null_pm_qos, 124 &null_pm_qos,
110 &cpu_dma_pm_qos, 125 &cpu_dma_pm_qos,
111 &network_lat_pm_qos, 126 &network_lat_pm_qos,
112 &network_throughput_pm_qos 127 &network_throughput_pm_qos,
128 &memory_bandwidth_pm_qos,
113}; 129};
114 130
115static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, 131static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
@@ -130,6 +146,9 @@ static const struct file_operations pm_qos_power_fops = {
130/* unlocked internal variant */ 146/* unlocked internal variant */
131static inline int pm_qos_get_value(struct pm_qos_constraints *c) 147static inline int pm_qos_get_value(struct pm_qos_constraints *c)
132{ 148{
149 struct plist_node *node;
150 int total_value = 0;
151
133 if (plist_head_empty(&c->list)) 152 if (plist_head_empty(&c->list))
134 return c->no_constraint_value; 153 return c->no_constraint_value;
135 154
@@ -140,6 +159,12 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c)
140 case PM_QOS_MAX: 159 case PM_QOS_MAX:
141 return plist_last(&c->list)->prio; 160 return plist_last(&c->list)->prio;
142 161
162 case PM_QOS_SUM:
163 plist_for_each(node, &c->list)
164 total_value += node->prio;
165
166 return total_value;
167
143 default: 168 default:
144 /* runtime check for not using enum */ 169 /* runtime check for not using enum */
145 BUG(); 170 BUG();
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 76a712e6e20e..8f13cf73c2ec 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -160,3 +160,32 @@ unsigned long long memparse(const char *ptr, char **retptr)
160 return ret; 160 return ret;
161} 161}
162EXPORT_SYMBOL(memparse); 162EXPORT_SYMBOL(memparse);
163
164/**
165 * parse_option_str - Parse a string and check an option is set or not
166 * @str: String to be parsed
167 * @option: option name
168 *
169 * This function parses a string containing a comma-separated list of
170 * strings like a=b,c.
171 *
172 * Return true if there's such option in the string, or return false.
173 */
174bool parse_option_str(const char *str, const char *option)
175{
176 while (*str) {
177 if (!strncmp(str, option, strlen(option))) {
178 str += strlen(option);
179 if (!*str || *str == ',')
180 return true;
181 }
182
183 while (*str && *str != ',')
184 str++;
185
186 if (*str == ',')
187 str++;
188 }
189
190 return false;
191}
diff --git a/lib/string.c b/lib/string.c
index 2fc20aa06f84..10063300b830 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -598,6 +598,22 @@ void *memset(void *s, int c, size_t count)
598EXPORT_SYMBOL(memset); 598EXPORT_SYMBOL(memset);
599#endif 599#endif
600 600
601/**
602 * memzero_explicit - Fill a region of memory (e.g. sensitive
603 * keying data) with 0s.
604 * @s: Pointer to the start of the area.
605 * @count: The size of the area.
606 *
607 * memzero_explicit() doesn't need an arch-specific version as
608 * it just invokes the one of memset() implicitly.
609 */
610void memzero_explicit(void *s, size_t count)
611{
612 memset(s, 0, count);
613 OPTIMIZER_HIDE_VAR(s);
614}
615EXPORT_SYMBOL(memzero_explicit);
616
601#ifndef __HAVE_ARCH_MEMCPY 617#ifndef __HAVE_ARCH_MEMCPY
602/** 618/**
603 * memcpy - Copy one area of memory to another 619 * memcpy - Copy one area of memory to another
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bbf405a3a18f..5340f6b91312 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -404,6 +404,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
404 dump_tasks(memcg, nodemask); 404 dump_tasks(memcg, nodemask);
405} 405}
406 406
407/*
408 * Number of OOM killer invocations (including memcg OOM killer).
409 * Primarily used by PM freezer to check for potential races with
410 * OOM killed frozen task.
411 */
412static atomic_t oom_kills = ATOMIC_INIT(0);
413
414int oom_kills_count(void)
415{
416 return atomic_read(&oom_kills);
417}
418
419void note_oom_kill(void)
420{
421 atomic_inc(&oom_kills);
422}
423
407#define K(x) ((x) << (PAGE_SHIFT-10)) 424#define K(x) ((x) << (PAGE_SHIFT-10))
408/* 425/*
409 * Must be called while holding a reference to p, which will be released upon 426 * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1b6381..9cd36b822444 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2252,6 +2252,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2252 } 2252 }
2253 2253
2254 /* 2254 /*
2255 * PM-freezer should be notified that there might be an OOM killer on
2256 * its way to kill and wake somebody up. This is too early and we might
2257 * end up not killing anything but false positives are acceptable.
2258 * See freeze_processes.
2259 */
2260 note_oom_kill();
2261
2262 /*
2255 * Go through the zonelist yet one more time, keep very high watermark 2263 * Go through the zonelist yet one more time, keep very high watermark
2256 * here, this is only to catch a parallel oom killing, we must fail if 2264 * here, this is only to catch a parallel oom killing, we must fail if
2257 * we're still under heavy pressure. 2265 * we're still under heavy pressure.
diff --git a/mm/shmem.c b/mm/shmem.c
index cd6fc7590e54..185836ba53ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2345,6 +2345,32 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru
2345 return 0; 2345 return 0;
2346} 2346}
2347 2347
2348static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
2349{
2350 struct dentry *whiteout;
2351 int error;
2352
2353 whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name);
2354 if (!whiteout)
2355 return -ENOMEM;
2356
2357 error = shmem_mknod(old_dir, whiteout,
2358 S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
2359 dput(whiteout);
2360 if (error)
2361 return error;
2362
2363 /*
2364 * Cheat and hash the whiteout while the old dentry is still in
2365 * place, instead of playing games with FS_RENAME_DOES_D_MOVE.
2366 *
2367 * d_lookup() will consistently find one of them at this point,
2368 * not sure which one, but that isn't even important.
2369 */
2370 d_rehash(whiteout);
2371 return 0;
2372}
2373
2348/* 2374/*
2349 * The VFS layer already does all the dentry stuff for rename, 2375 * The VFS layer already does all the dentry stuff for rename,
2350 * we just have to decrement the usage count for the target if 2376 * we just have to decrement the usage count for the target if
@@ -2356,7 +2382,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
2356 struct inode *inode = old_dentry->d_inode; 2382 struct inode *inode = old_dentry->d_inode;
2357 int they_are_dirs = S_ISDIR(inode->i_mode); 2383 int they_are_dirs = S_ISDIR(inode->i_mode);
2358 2384
2359 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 2385 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
2360 return -EINVAL; 2386 return -EINVAL;
2361 2387
2362 if (flags & RENAME_EXCHANGE) 2388 if (flags & RENAME_EXCHANGE)
@@ -2365,6 +2391,14 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
2365 if (!simple_empty(new_dentry)) 2391 if (!simple_empty(new_dentry))
2366 return -ENOTEMPTY; 2392 return -ENOTEMPTY;
2367 2393
2394 if (flags & RENAME_WHITEOUT) {
2395 int error;
2396
2397 error = shmem_whiteout(old_dir, old_dentry);
2398 if (error)
2399 return error;
2400 }
2401
2368 if (new_dentry->d_inode) { 2402 if (new_dentry->d_inode) {
2369 (void) shmem_unlink(new_dir, new_dentry); 2403 (void) shmem_unlink(new_dir, new_dentry);
2370 if (they_are_dirs) { 2404 if (they_are_dirs) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 96d167372d89..261eaf6e5a19 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
20#include <linux/buffer_head.h> /* grr. try_to_release_page, 20#include <linux/buffer_head.h> /* grr. try_to_release_page,
21 do_invalidatepage */ 21 do_invalidatepage */
22#include <linux/cleancache.h> 22#include <linux/cleancache.h>
23#include <linux/rmap.h>
23#include "internal.h" 24#include "internal.h"
24 25
25static void clear_exceptional_entry(struct address_space *mapping, 26static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,12 +720,68 @@ EXPORT_SYMBOL(truncate_pagecache);
719 */ 720 */
720void truncate_setsize(struct inode *inode, loff_t newsize) 721void truncate_setsize(struct inode *inode, loff_t newsize)
721{ 722{
723 loff_t oldsize = inode->i_size;
724
722 i_size_write(inode, newsize); 725 i_size_write(inode, newsize);
726 if (newsize > oldsize)
727 pagecache_isize_extended(inode, oldsize, newsize);
723 truncate_pagecache(inode, newsize); 728 truncate_pagecache(inode, newsize);
724} 729}
725EXPORT_SYMBOL(truncate_setsize); 730EXPORT_SYMBOL(truncate_setsize);
726 731
727/** 732/**
733 * pagecache_isize_extended - update pagecache after extension of i_size
734 * @inode: inode for which i_size was extended
735 * @from: original inode size
736 * @to: new inode size
737 *
738 * Handle extension of inode size either caused by extending truncate or by
739 * write starting after current i_size. We mark the page straddling current
740 * i_size RO so that page_mkwrite() is called on the nearest write access to
741 * the page. This way filesystem can be sure that page_mkwrite() is called on
742 * the page before user writes to the page via mmap after the i_size has been
743 * changed.
744 *
745 * The function must be called after i_size is updated so that page fault
746 * coming after we unlock the page will already see the new i_size.
747 * The function must be called while we still hold i_mutex - this not only
748 * makes sure i_size is stable but also that userspace cannot observe new
749 * i_size value before we are prepared to store mmap writes at new inode size.
750 */
751void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
752{
753 int bsize = 1 << inode->i_blkbits;
754 loff_t rounded_from;
755 struct page *page;
756 pgoff_t index;
757
758 WARN_ON(!mutex_is_locked(&inode->i_mutex));
759 WARN_ON(to > inode->i_size);
760
761 if (from >= to || bsize == PAGE_CACHE_SIZE)
762 return;
763 /* Page straddling @from will not have any hole block created? */
764 rounded_from = round_up(from, bsize);
765 if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
766 return;
767
768 index = from >> PAGE_CACHE_SHIFT;
769 page = find_lock_page(inode->i_mapping, index);
770 /* Page not cached? Nothing to do */
771 if (!page)
772 return;
773 /*
774 * See clear_page_dirty_for_io() for details why set_page_dirty()
775 * is needed.
776 */
777 if (page_mkclean(page))
778 set_page_dirty(page);
779 unlock_page(page);
780 page_cache_release(page);
781}
782EXPORT_SYMBOL(pagecache_isize_extended);
783
784/**
728 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched 785 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
729 * @inode: inode 786 * @inode: inode
730 * @lstart: offset of beginning of hole 787 * @lstart: offset of beginning of hole
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index bfe1cf6b492f..166d59cdc86b 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -781,16 +781,15 @@ static int snd_pcm_action_group(struct action_ops *ops,
781{ 781{
782 struct snd_pcm_substream *s = NULL; 782 struct snd_pcm_substream *s = NULL;
783 struct snd_pcm_substream *s1; 783 struct snd_pcm_substream *s1;
784 int res = 0; 784 int res = 0, depth = 1;
785 785
786 snd_pcm_group_for_each_entry(s, substream) { 786 snd_pcm_group_for_each_entry(s, substream) {
787 if (do_lock && s != substream) { 787 if (do_lock && s != substream) {
788 if (s->pcm->nonatomic) 788 if (s->pcm->nonatomic)
789 mutex_lock_nested(&s->self_group.mutex, 789 mutex_lock_nested(&s->self_group.mutex, depth);
790 SINGLE_DEPTH_NESTING);
791 else 790 else
792 spin_lock_nested(&s->self_group.lock, 791 spin_lock_nested(&s->self_group.lock, depth);
793 SINGLE_DEPTH_NESTING); 792 depth++;
794 } 793 }
795 res = ops->pre_action(s, state); 794 res = ops->pre_action(s, state);
796 if (res < 0) 795 if (res < 0)
@@ -906,8 +905,7 @@ static int snd_pcm_action_lock_mutex(struct action_ops *ops,
906 down_read(&snd_pcm_link_rwsem); 905 down_read(&snd_pcm_link_rwsem);
907 if (snd_pcm_stream_linked(substream)) { 906 if (snd_pcm_stream_linked(substream)) {
908 mutex_lock(&substream->group->mutex); 907 mutex_lock(&substream->group->mutex);
909 mutex_lock_nested(&substream->self_group.mutex, 908 mutex_lock(&substream->self_group.mutex);
910 SINGLE_DEPTH_NESTING);
911 res = snd_pcm_action_group(ops, substream, state, 1); 909 res = snd_pcm_action_group(ops, substream, state, 1);
912 mutex_unlock(&substream->self_group.mutex); 910 mutex_unlock(&substream->self_group.mutex);
913 mutex_unlock(&substream->group->mutex); 911 mutex_unlock(&substream->group->mutex);
@@ -3311,7 +3309,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = {
3311 3309
3312#ifndef ARCH_HAS_DMA_MMAP_COHERENT 3310#ifndef ARCH_HAS_DMA_MMAP_COHERENT
3313/* This should be defined / handled globally! */ 3311/* This should be defined / handled globally! */
3314#ifdef CONFIG_ARM 3312#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
3315#define ARCH_HAS_DMA_MMAP_COHERENT 3313#define ARCH_HAS_DMA_MMAP_COHERENT
3316#endif 3314#endif
3317#endif 3315#endif
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 7eb44e78e141..62658f2f8c9f 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -419,7 +419,7 @@ struct snd_hda_pin_quirk {
419 .subvendor = _subvendor,\ 419 .subvendor = _subvendor,\
420 .name = _name,\ 420 .name = _name,\
421 .value = _value,\ 421 .value = _value,\
422 .pins = (const struct hda_pintbl[]) { _pins } \ 422 .pins = (const struct hda_pintbl[]) { _pins, {0, 0}} \
423 } 423 }
424#else 424#else
425 425
@@ -427,7 +427,7 @@ struct snd_hda_pin_quirk {
427 { .codec = _codec,\ 427 { .codec = _codec,\
428 .subvendor = _subvendor,\ 428 .subvendor = _subvendor,\
429 .value = _value,\ 429 .value = _value,\
430 .pins = (const struct hda_pintbl[]) { _pins } \ 430 .pins = (const struct hda_pintbl[]) { _pins, {0, 0}} \
431 } 431 }
432 432
433#endif 433#endif
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 39862e98551c..9dc9cf8c90e9 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1583,19 +1583,22 @@ static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll)
1583 } 1583 }
1584 } 1584 }
1585 1585
1586 if (pin_eld->eld_valid && !eld->eld_valid) { 1586 if (pin_eld->eld_valid != eld->eld_valid)
1587 update_eld = true;
1588 eld_changed = true; 1587 eld_changed = true;
1589 } 1588
1589 if (pin_eld->eld_valid && !eld->eld_valid)
1590 update_eld = true;
1591
1590 if (update_eld) { 1592 if (update_eld) {
1591 bool old_eld_valid = pin_eld->eld_valid; 1593 bool old_eld_valid = pin_eld->eld_valid;
1592 pin_eld->eld_valid = eld->eld_valid; 1594 pin_eld->eld_valid = eld->eld_valid;
1593 eld_changed = pin_eld->eld_size != eld->eld_size || 1595 if (pin_eld->eld_size != eld->eld_size ||
1594 memcmp(pin_eld->eld_buffer, eld->eld_buffer, 1596 memcmp(pin_eld->eld_buffer, eld->eld_buffer,
1595 eld->eld_size) != 0; 1597 eld->eld_size) != 0) {
1596 if (eld_changed)
1597 memcpy(pin_eld->eld_buffer, eld->eld_buffer, 1598 memcpy(pin_eld->eld_buffer, eld->eld_buffer,
1598 eld->eld_size); 1599 eld->eld_size);
1600 eld_changed = true;
1601 }
1599 pin_eld->eld_size = eld->eld_size; 1602 pin_eld->eld_size = eld->eld_size;
1600 pin_eld->info = eld->info; 1603 pin_eld->info = eld->info;
1601 1604
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index bc86c36b4bfa..34b7bdb510c7 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2884,6 +2884,9 @@ static void alc283_shutup(struct hda_codec *codec)
2884 2884
2885 alc_write_coef_idx(codec, 0x43, 0x9004); 2885 alc_write_coef_idx(codec, 0x43, 0x9004);
2886 2886
2887 /*depop hp during suspend*/
2888 alc_write_coef_idx(codec, 0x06, 0x2100);
2889
2887 snd_hda_codec_write(codec, hp_pin, 0, 2890 snd_hda_codec_write(codec, hp_pin, 0,
2888 AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); 2891 AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
2889 2892
@@ -5610,9 +5613,9 @@ static void alc662_led_gpio1_mute_hook(void *private_data, int enabled)
5610 unsigned int oldval = spec->gpio_led; 5613 unsigned int oldval = spec->gpio_led;
5611 5614
5612 if (enabled) 5615 if (enabled)
5613 spec->gpio_led &= ~0x01;
5614 else
5615 spec->gpio_led |= 0x01; 5616 spec->gpio_led |= 0x01;
5617 else
5618 spec->gpio_led &= ~0x01;
5616 if (spec->gpio_led != oldval) 5619 if (spec->gpio_led != oldval)
5617 snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 5620 snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
5618 spec->gpio_led); 5621 spec->gpio_led);
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 223c47b33ba3..c657752a420c 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -385,6 +385,36 @@ YAMAHA_DEVICE(0x105d, NULL),
385 } 385 }
386}, 386},
387{ 387{
388 USB_DEVICE(0x0499, 0x1509),
389 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
390 /* .vendor_name = "Yamaha", */
391 /* .product_name = "Steinberg UR22", */
392 .ifnum = QUIRK_ANY_INTERFACE,
393 .type = QUIRK_COMPOSITE,
394 .data = (const struct snd_usb_audio_quirk[]) {
395 {
396 .ifnum = 1,
397 .type = QUIRK_AUDIO_STANDARD_INTERFACE
398 },
399 {
400 .ifnum = 2,
401 .type = QUIRK_AUDIO_STANDARD_INTERFACE
402 },
403 {
404 .ifnum = 3,
405 .type = QUIRK_MIDI_YAMAHA
406 },
407 {
408 .ifnum = 4,
409 .type = QUIRK_IGNORE_INTERFACE
410 },
411 {
412 .ifnum = -1
413 }
414 }
415 }
416},
417{
388 USB_DEVICE(0x0499, 0x150a), 418 USB_DEVICE(0x0499, 0x150a),
389 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { 419 .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
390 /* .vendor_name = "Yamaha", */ 420 /* .vendor_name = "Yamaha", */
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 60b58cd18410..7ccb073f8316 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -122,6 +122,14 @@ static void os_enter_line_edit_mode(void)
122{ 122{
123 struct termios local_term_attributes; 123 struct termios local_term_attributes;
124 124
125 term_attributes_were_set = 0;
126
127 /* STDIN must be a terminal */
128
129 if (!isatty(STDIN_FILENO)) {
130 return;
131 }
132
125 /* Get and keep the original attributes */ 133 /* Get and keep the original attributes */
126 134
127 if (tcgetattr(STDIN_FILENO, &original_term_attributes)) { 135 if (tcgetattr(STDIN_FILENO, &original_term_attributes)) {
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 53cee781e24e..24d32968802d 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -146,7 +146,7 @@ u32 ap_get_table_length(struct acpi_table_header *table)
146 146
147 if (ACPI_VALIDATE_RSDP_SIG(table->signature)) { 147 if (ACPI_VALIDATE_RSDP_SIG(table->signature)) {
148 rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, table); 148 rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, table);
149 return (rsdp->length); 149 return (acpi_tb_get_rsdp_length(rsdp));
150 } 150 }
151 151
152 /* Normal ACPI table */ 152 /* Normal ACPI table */
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e51d9f9b995f..c1e6ae989a43 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
43 gfn_t base_gfn, unsigned long npages); 43 gfn_t base_gfn, unsigned long npages);
44 44
45static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, 45static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
46 unsigned long size) 46 unsigned long npages)
47{ 47{
48 gfn_t end_gfn; 48 gfn_t end_gfn;
49 pfn_t pfn; 49 pfn_t pfn;
50 50
51 pfn = gfn_to_pfn_memslot(slot, gfn); 51 pfn = gfn_to_pfn_memslot(slot, gfn);
52 end_gfn = gfn + (size >> PAGE_SHIFT); 52 end_gfn = gfn + npages;
53 gfn += 1; 53 gfn += 1;
54 54
55 if (is_error_noslot_pfn(pfn)) 55 if (is_error_noslot_pfn(pfn))
@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
119 * Pin all pages we are about to map in memory. This is 119 * Pin all pages we are about to map in memory. This is
120 * important because we unmap and unpin in 4kb steps later. 120 * important because we unmap and unpin in 4kb steps later.
121 */ 121 */
122 pfn = kvm_pin_pages(slot, gfn, page_size); 122 pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
123 if (is_error_noslot_pfn(pfn)) { 123 if (is_error_noslot_pfn(pfn)) {
124 gfn += 1; 124 gfn += 1;
125 continue; 125 continue;
@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
131 if (r) { 131 if (r) {
132 printk(KERN_ERR "kvm_iommu_map_address:" 132 printk(KERN_ERR "kvm_iommu_map_address:"
133 "iommu failed to map pfn=%llx\n", pfn); 133 "iommu failed to map pfn=%llx\n", pfn);
134 kvm_unpin_pages(kvm, pfn, page_size); 134 kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
135 goto unmap_pages; 135 goto unmap_pages;
136 } 136 }
137 137
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 384eaa7b02fa..25ffac9e947d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2354 return 0; 2354 return 0;
2355} 2355}
2356 2356
2357void kvm_unregister_device_ops(u32 type)
2358{
2359 if (kvm_device_ops_table[type] != NULL)
2360 kvm_device_ops_table[type] = NULL;
2361}
2362
2357static int kvm_ioctl_create_device(struct kvm *kvm, 2363static int kvm_ioctl_create_device(struct kvm *kvm,
2358 struct kvm_create_device *cd) 2364 struct kvm_create_device *cd)
2359{ 2365{
@@ -3328,5 +3334,6 @@ void kvm_exit(void)
3328 kvm_arch_exit(); 3334 kvm_arch_exit();
3329 kvm_irqfd_exit(); 3335 kvm_irqfd_exit();
3330 free_cpumask_var(cpus_hardware_enabled); 3336 free_cpumask_var(cpus_hardware_enabled);
3337 kvm_vfio_ops_exit();
3331} 3338}
3332EXPORT_SYMBOL_GPL(kvm_exit); 3339EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 281e7cf2b8e5..620e37f741b8 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
283{ 283{
284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); 284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
285} 285}
286
287void kvm_vfio_ops_exit(void)
288{
289 kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
290}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
index 92eac75d6b62..ab88c7dc0514 100644
--- a/virt/kvm/vfio.h
+++ b/virt/kvm/vfio.h
@@ -3,11 +3,15 @@
3 3
4#ifdef CONFIG_KVM_VFIO 4#ifdef CONFIG_KVM_VFIO
5int kvm_vfio_ops_init(void); 5int kvm_vfio_ops_init(void);
6void kvm_vfio_ops_exit(void);
6#else 7#else
7static inline int kvm_vfio_ops_init(void) 8static inline int kvm_vfio_ops_init(void)
8{ 9{
9 return 0; 10 return 0;
10} 11}
12static inline void kvm_vfio_ops_exit(void)
13{
14}
11#endif 15#endif
12 16
13#endif 17#endif