aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/debugobjects.tmpl50
-rw-r--r--Documentation/RCU/checklist.txt6
-rw-r--r--Documentation/RCU/rcu.txt10
-rw-r--r--Documentation/RCU/stallwarn.txt16
-rw-r--r--Documentation/RCU/torture.txt13
-rw-r--r--Documentation/RCU/trace.txt4
-rw-r--r--Documentation/RCU/whatisRCU.txt19
-rw-r--r--Documentation/atomic_ops.txt87
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/lockdep-design.txt63
-rw-r--r--Documentation/trace/events.txt2
-rw-r--r--Documentation/virtual/kvm/api.txt16
-rw-r--r--MAINTAINERS28
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/common/pl330.c116
-rw-r--r--arch/arm/configs/imx_v4_v5_defconfig12
-rw-r--r--arch/arm/kernel/process.c6
-rw-r--r--arch/arm/kernel/setup.c1
-rw-r--r--arch/arm/kernel/unwind.c4
-rw-r--r--arch/arm/mach-exynos/cpu.c5
-rw-r--r--arch/arm/mach-exynos/mct.c13
-rw-r--r--arch/arm/mach-imx/Kconfig8
-rw-r--r--arch/arm/mach-imx/Makefile4
-rw-r--r--arch/arm/mach-imx/clock-imx35.c20
-rw-r--r--arch/arm/mach-imx/mach-cpuimx35.c8
-rw-r--r--arch/arm/mach-mx5/board-mx51_babbage.c2
-rw-r--r--arch/arm/mach-mx5/board-mx53_evk.c2
-rw-r--r--arch/arm/mach-mx5/board-mx53_loco.c2
-rw-r--r--arch/arm/mach-mx5/board-mx53_smd.c2
-rw-r--r--arch/arm/mach-omap2/board-rx51-peripherals.c2
-rw-r--r--arch/arm/mach-omap2/mcbsp.c6
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_3xxx_data.c4
-rw-r--r--arch/arm/mach-s5pv210/mach-smdkv210.c1
-rw-r--r--arch/arm/mach-shmobile/board-ag5evm.c1
-rw-r--r--arch/arm/mach-shmobile/board-kota2.c139
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c18
-rw-r--r--arch/arm/mm/init.c4
-rw-r--r--arch/arm/mm/proc-v7.S6
-rw-r--r--arch/arm/oprofile/common.c2
-rw-r--r--arch/arm/plat-mxc/cpufreq.c3
-rw-r--r--arch/arm/plat-mxc/include/mach/uncompress.h1
-rw-r--r--arch/arm/plat-mxc/pwm.c16
-rw-r--r--arch/arm/plat-orion/gpio.c6
-rw-r--r--arch/arm/plat-samsung/dev-backlight.c1
-rw-r--r--arch/arm/plat-samsung/include/plat/cpu-freq-core.h25
-rw-r--r--arch/avr32/kernel/process.c6
-rw-r--r--arch/blackfin/kernel/process.c6
-rw-r--r--arch/ia64/Kconfig6
-rw-r--r--arch/ia64/include/asm/cputime.h2
-rw-r--r--arch/ia64/mm/contig.c3
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/microblaze/include/asm/memblock.h14
-rw-r--r--arch/microblaze/kernel/process.c6
-rw-r--r--arch/microblaze/kernel/prom.c3
-rw-r--r--arch/mips/Kconfig6
-rw-r--r--arch/mips/kernel/process.c6
-rw-r--r--arch/mips/kernel/setup.c3
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c5
-rw-r--r--arch/openrisc/include/asm/memblock.h24
-rw-r--r--arch/openrisc/kernel/idle.c6
-rw-r--r--arch/openrisc/kernel/prom.c3
-rw-r--r--arch/powerpc/Kconfig4
-rw-r--r--arch/powerpc/include/asm/cputime.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h33
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h33
-rw-r--r--arch/powerpc/include/asm/memblock.h8
-rw-r--r--arch/powerpc/kernel/idle.c15
-rw-r--r--arch/powerpc/kernel/machine_kexec.c3
-rw-r--r--arch/powerpc/kernel/prom.c20
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_pr.c2
-rw-r--r--arch/powerpc/kvm/e500.c1
-rw-r--r--arch/powerpc/mm/init_32.c4
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/mm/numa.c60
-rw-r--r--arch/powerpc/mm/tlb_nohash.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c23
-rw-r--r--arch/powerpc/platforms/iseries/setup.c12
-rw-r--r--arch/powerpc/platforms/ps3/mm.c1
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c4
-rw-r--r--arch/s390/Kconfig6
-rw-r--r--arch/s390/include/asm/cputime.h2
-rw-r--r--arch/s390/kernel/process.c6
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/s390/oprofile/hwsampler.c7
-rw-r--r--arch/s390/oprofile/init.c375
-rw-r--r--arch/s390/oprofile/op_counter.h23
-rw-r--r--arch/score/Kconfig6
-rw-r--r--arch/score/kernel/setup.c4
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/boards/board-sh7757lcr.c16
-rw-r--r--arch/sh/include/asm/memblock.h4
-rw-r--r--arch/sh/kernel/idle.c6
-rw-r--r--arch/sh/kernel/machine_kexec.c3
-rw-r--r--arch/sh/kernel/setup.c3
-rw-r--r--arch/sh/mm/Kconfig3
-rw-r--r--arch/sh/mm/init.c3
-rw-r--r--arch/sh/oprofile/common.c4
-rw-r--r--arch/sparc/Kconfig4
-rw-r--r--arch/sparc/include/asm/memblock.h8
-rw-r--r--arch/sparc/kernel/ds.c6
-rw-r--r--arch/sparc/kernel/pci_sun4v.c4
-rw-r--r--arch/sparc/kernel/process_64.c6
-rw-r--r--arch/sparc/kernel/prom_common.c4
-rw-r--r--arch/sparc/kernel/setup_32.c2
-rw-r--r--arch/sparc/mm/btfixup.c3
-rw-r--r--arch/sparc/mm/init_64.c32
-rw-r--r--arch/tile/kernel/process.c6
-rw-r--r--arch/tile/mm/fault.c4
-rw-r--r--arch/um/kernel/process.c6
-rw-r--r--arch/unicore32/kernel/process.c6
-rw-r--r--arch/unicore32/kernel/setup.c1
-rw-r--r--arch/unicore32/mm/init.c4
-rw-r--r--arch/unicore32/mm/mmu.c1
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/insn.h7
-rw-r--r--arch/x86/include/asm/memblock.h23
-rw-r--r--arch/x86/include/asm/perf_event.h44
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/check.c34
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c262
-rw-r--r--arch/x86/kernel/cpu/perf_event.h51
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c90
-rw-r--r--arch/x86/kernel/dumpstack_32.c8
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/e820.c58
-rw-r--r--arch/x86/kernel/head.c2
-rw-r--r--arch/x86/kernel/head32.c7
-rw-r--r--arch/x86/kernel/head64.c7
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/mpparse.c12
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c10
-rw-r--r--arch/x86/kernel/setup.c21
-rw-r--r--arch/x86/kernel/trampoline.c4
-rw-r--r--arch/x86/kvm/i8254.c10
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/lib/inat.c9
-rw-r--r--arch/x86/lib/insn.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt606
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/init.c8
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/memblock.c348
-rw-r--r--arch/x86/mm/memtest.c33
-rw-r--r--arch/x86/mm/numa.c37
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/numa_emulation.c36
-rw-r--r--arch/x86/net/bpf_jit_comp.c4
-rw-r--r--arch/x86/oprofile/Makefile3
-rw-r--r--arch/x86/oprofile/init.c30
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c50
-rw-r--r--arch/x86/platform/efi/efi.c9
-rw-r--r--arch/x86/tools/Makefile11
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk21
-rw-r--r--arch/x86/tools/insn_sanity.c275
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/mmu.c12
-rw-r--r--arch/x86/xen/setup.c25
-rw-r--r--block/blk-core.c23
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-tag.c13
-rw-r--r--block/cfq-iosched.c28
-rw-r--r--block/ioctl.c26
-rw-r--r--drivers/ata/Kconfig2
-rw-r--r--drivers/base/cpu.c7
-rw-r--r--drivers/block/cciss.c6
-rw-r--r--drivers/block/loop.c4
-rw-r--r--drivers/block/swim3.c362
-rw-r--r--drivers/bluetooth/Kconfig6
-rw-r--r--drivers/bluetooth/btmrvl_sdio.c15
-rw-r--r--drivers/bluetooth/btusb.c3
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c41
-rw-r--r--drivers/dma/Kconfig4
-rw-r--r--drivers/firmware/iscsi_ibft.c42
-rw-r--r--drivers/firmware/iscsi_ibft_find.c26
-rw-r--r--drivers/gpio/gpio-da9052.c21
-rw-r--r--drivers/gpio/gpio-ml-ioh.c32
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c18
-rw-r--r--drivers/gpio/gpio-pl061.c4
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c1
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c10
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c43
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h18
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c19
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h30
-rw-r--r--drivers/gpu/drm/i915/intel_display.c87
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c173
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h1
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c8
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c16
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c36
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c12
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c16
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c400
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h5
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c22
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c23
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c22
-rw-r--r--drivers/i2c/busses/i2c-omap.c11
-rw-r--r--drivers/i2c/busses/i2c-s3c2410.c3
-rw-r--r--drivers/infiniband/core/cma.c6
-rw-r--r--drivers/infiniband/hw/mlx4/main.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c4
-rw-r--r--drivers/input/misc/cma3000_d0x.c4
-rw-r--r--drivers/input/mouse/sentelic.c8
-rw-r--r--drivers/input/mouse/sentelic.h3
-rw-r--r--drivers/input/mouse/synaptics.c11
-rw-r--r--drivers/input/tablet/wacom_wac.c4
-rw-r--r--drivers/iommu/intel-iommu.c30
-rw-r--r--drivers/iommu/iommu.c2
-rw-r--r--drivers/md/bitmap.c5
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c3
-rw-r--r--drivers/md/raid5.c14
-rw-r--r--drivers/media/common/tuners/mxl5007t.c3
-rw-r--r--drivers/media/common/tuners/tda18218.c2
-rw-r--r--drivers/media/rc/ati_remote.c111
-rw-r--r--drivers/media/rc/keymaps/rc-ati-x10.c96
-rw-r--r--drivers/media/rc/keymaps/rc-medion-x10.c128
-rw-r--r--drivers/media/rc/keymaps/rc-snapstream-firefly.c114
-rw-r--r--drivers/media/video/au0828/au0828-cards.c7
-rw-r--r--drivers/media/video/gspca/gspca.c6
-rw-r--r--drivers/media/video/m5mols/m5mols.h2
-rw-r--r--drivers/media/video/m5mols/m5mols_core.c22
-rw-r--r--drivers/media/video/mt9m111.c1
-rw-r--r--drivers/media/video/mt9t112.c4
-rw-r--r--drivers/media/video/omap/omap_vout.c9
-rw-r--r--drivers/media/video/omap1_camera.c1
-rw-r--r--drivers/media/video/omap24xxcam-dma.c2
-rw-r--r--drivers/media/video/omap3isp/ispccdc.c2
-rw-r--r--drivers/media/video/omap3isp/ispstat.c2
-rw-r--r--drivers/media/video/omap3isp/ispvideo.c1
-rw-r--r--drivers/media/video/ov6650.c2
-rw-r--r--drivers/media/video/s5p-fimc/fimc-capture.c14
-rw-r--r--drivers/media/video/s5p-fimc/fimc-core.c24
-rw-r--r--drivers/media/video/s5p-fimc/fimc-core.h2
-rw-r--r--drivers/media/video/s5p-fimc/fimc-mdevice.c43
-rw-r--r--drivers/media/video/s5p-fimc/fimc-reg.c15
-rw-r--r--drivers/media/video/s5p-mfc/s5p_mfc_enc.c2
-rw-r--r--drivers/media/video/s5p-tv/mixer_video.c1
-rw-r--r--drivers/media/video/sh_mobile_ceu_camera.c34
-rw-r--r--drivers/media/video/sh_mobile_csi2.c4
-rw-r--r--drivers/media/video/soc_camera.c3
-rw-r--r--drivers/mfd/ab5500-debugfs.c2
-rw-r--r--drivers/mfd/ab8500-core.c2
-rw-r--r--drivers/mfd/adp5520.c2
-rw-r--r--drivers/mfd/da903x.c3
-rw-r--r--drivers/mfd/jz4740-adc.c1
-rw-r--r--drivers/mfd/tps6586x.c2
-rw-r--r--drivers/mfd/tps65910.c2
-rw-r--r--drivers/mfd/twl-core.c16
-rw-r--r--drivers/mfd/twl4030-irq.c18
-rw-r--r--drivers/mfd/wm8994-core.c1
-rw-r--r--drivers/mmc/core/host.c11
-rw-r--r--drivers/mmc/host/mmci.c14
-rw-r--r--drivers/mmc/host/sdhci-cns3xxx.c5
-rw-r--r--drivers/mmc/host/sdhci-dove.c5
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c5
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c5
-rw-r--r--drivers/mmc/host/sdhci-of-hlwd.c5
-rw-r--r--drivers/mmc/host/sdhci-pci.c26
-rw-r--r--drivers/mmc/host/sdhci-pltfm.c18
-rw-r--r--drivers/mmc/host/sdhci-pltfm.h6
-rw-r--r--drivers/mmc/host/sdhci-pxav2.c5
-rw-r--r--drivers/mmc/host/sdhci-pxav3.c5
-rw-r--r--drivers/mmc/host/sdhci-s3c.c21
-rw-r--r--drivers/mmc/host/sdhci-tegra.c5
-rw-r--r--drivers/mmc/host/sdhci.c2
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/mmc/host/vub300.c2
-rw-r--r--drivers/mtd/maps/plat-ram.c12
-rw-r--r--drivers/mtd/maps/pxa2xx-flash.c2
-rw-r--r--drivers/mtd/nand/gpmi-nand/gpmi-nand.c2
-rw-r--r--drivers/mtd/nand/ndfc.c2
-rw-r--r--drivers/net/ethernet/freescale/Kconfig4
-rw-r--r--drivers/net/ethernet/freescale/fec.c11
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.c53
-rw-r--r--drivers/net/ethernet/marvell/skge.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c1
-rw-r--r--drivers/net/ethernet/realtek/r8169.c14
-rw-r--r--drivers/net/ethernet/ti/davinci_cpdma.c2
-rw-r--r--drivers/net/ppp/pptp.c4
-rw-r--r--drivers/net/usb/asix.c4
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c5
-rw-r--r--drivers/net/wireless/ath/ath9k/rc.c4
-rw-r--r--drivers/net/wireless/b43/pio.c16
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-agn-rxon.c4
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-agn-tx.c5
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-agn.c6
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-trans-pcie.c4
-rw-r--r--drivers/net/wireless/mwifiex/cmdevt.c9
-rw-r--r--drivers/net/wireless/mwifiex/sta_ioctl.c7
-rw-r--r--drivers/net/wireless/rtlwifi/rtl8192ce/phy.c2
-rw-r--r--drivers/net/wireless/rtlwifi/rtl8192cu/phy.c2
-rw-r--r--drivers/net/wireless/rtlwifi/rtl8192de/phy.c2
-rw-r--r--drivers/net/wireless/rtlwifi/rtl8192se/phy.c2
-rw-r--r--drivers/of/platform.c2
-rw-r--r--drivers/oprofile/nmi_timer_int.c173
-rw-r--r--drivers/oprofile/oprof.c30
-rw-r--r--drivers/oprofile/oprof.h10
-rw-r--r--drivers/oprofile/oprofile_files.c7
-rw-r--r--drivers/oprofile/oprofilefs.c11
-rw-r--r--drivers/oprofile/timer_int.c30
-rw-r--r--drivers/pci/ats.c1
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c30
-rw-r--r--drivers/pci/iov.c7
-rw-r--r--drivers/pci/pci.c9
-rw-r--r--drivers/rtc/interface.c44
-rw-r--r--drivers/rtc/rtc-m41t80.c9
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c4
-rw-r--r--drivers/sbus/char/bbc_i2c.c27
-rw-r--r--drivers/sbus/char/display7seg.c13
-rw-r--r--drivers/sbus/char/envctrl.c12
-rw-r--r--drivers/sbus/char/flash.c12
-rw-r--r--drivers/sbus/char/uctrl.c12
-rw-r--r--drivers/scsi/bnx2i/bnx2i_hwi.c5
-rw-r--r--drivers/scsi/fcoe/fcoe.c116
-rw-r--r--drivers/scsi/fcoe/fcoe_ctlr.c4
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_scsih.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c27
-rw-r--r--drivers/scsi/qla2xxx/qla_dbg.c8
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_iocb.c14
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c25
-rw-r--r--drivers/scsi/qla2xxx/qla_nx.c42
-rw-r--r--drivers/scsi/qla2xxx/qla_nx.h4
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c86
-rw-r--r--drivers/scsi/qla2xxx/qla_version.h2
-rw-r--r--drivers/scsi/qla4xxx/ql4_def.h55
-rw-r--r--drivers/scsi/qla4xxx/ql4_fw.h8
-rw-r--r--drivers/scsi/qla4xxx/ql4_glbl.h16
-rw-r--r--drivers/scsi/qla4xxx/ql4_init.c243
-rw-r--r--drivers/scsi/qla4xxx/ql4_mbx.c11
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c1084
-rw-r--r--drivers/scsi/qla4xxx/ql4_version.h2
-rw-r--r--drivers/ssb/driver_pcicore.c8
-rw-r--r--drivers/staging/rtl8712/usb_intf.c1
-rw-r--r--drivers/staging/tidspbridge/core/dsp-clock.c15
-rw-r--r--drivers/staging/tidspbridge/rmgr/drv_interface.c4
-rw-r--r--drivers/usb/class/cdc-acm.c10
-rw-r--r--drivers/usb/dwc3/core.c2
-rw-r--r--drivers/usb/gadget/epautoconf.c3
-rw-r--r--drivers/usb/gadget/f_mass_storage.c1
-rw-r--r--drivers/usb/host/isp1760-if.c8
-rw-r--r--drivers/usb/musb/musb_host.c4
-rw-r--r--drivers/usb/renesas_usbhs/mod.c2
-rw-r--r--drivers/usb/renesas_usbhs/mod_host.c1
-rw-r--r--drivers/usb/serial/option.c7
-rw-r--r--drivers/watchdog/coh901327_wdt.c6
-rw-r--r--drivers/watchdog/hpwdt.c5
-rw-r--r--drivers/watchdog/iTCO_wdt.c6
-rw-r--r--drivers/watchdog/sp805_wdt.c2
-rw-r--r--drivers/xen/swiotlb-xen.c4
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c13
-rw-r--r--firmware/README.AddingFirmware3
-rw-r--r--fs/btrfs/async-thread.c120
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c34
-rw-r--r--fs/btrfs/extent-tree.c45
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode.c189
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/super.c32
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/ceph/dir.c29
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/compat_ioctl.c38
-rw-r--r--fs/fs-writeback.c11
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/locks.c11
-rw-r--r--fs/minix/inode.c34
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/nfs4proc.c24
-rw-r--r--fs/nfs/nfs4state.c33
-rw-r--r--fs/nilfs2/ioctl.c16
-rw-r--r--fs/proc/stat.c4
-rw-r--r--fs/xfs/xfs_super.c30
-rw-r--r--fs/xfs/xfs_sync.c36
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--include/asm-generic/cputime.h8
-rw-r--r--include/linux/bitops.h10
-rw-r--r--include/linux/blkdev.h3
-rw-r--r--include/linux/bootmem.h2
-rw-r--r--include/linux/clocksource.h11
-rw-r--r--include/linux/cpu.h1
-rw-r--r--include/linux/debugobjects.h6
-rw-r--r--include/linux/dma_remapping.h2
-rw-r--r--include/linux/hardirq.h21
-rw-r--r--include/linux/jump_label.h27
-rw-r--r--include/linux/kvm.h1
-rw-r--r--include/linux/lglock.h36
-rw-r--r--include/linux/lockdep.h4
-rw-r--r--include/linux/memblock.h170
-rw-r--r--include/linux/mm.h34
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/perf_event.h8
-rw-r--r--include/linux/poison.h6
-rw-r--r--include/linux/rcupdate.h115
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/security.h2
-rw-r--r--include/linux/srcu.h87
-rw-r--r--include/linux/tick.h11
-rw-r--r--include/linux/wait.h4
-rw-r--r--include/media/soc_camera.h7
-rw-r--r--include/net/dst.h1
-rw-r--r--include/net/flow.h1
-rw-r--r--include/net/ip_vs.h2
-rw-r--r--include/net/sctp/structs.h4
-rw-r--r--include/net/sock.h4
-rw-r--r--include/scsi/libfcoe.h3
-rw-r--r--include/trace/events/rcu.h122
-rw-r--r--include/trace/events/writeback.h15
-rw-r--r--include/xen/interface/io/xs_wire.h3
-rw-r--r--init/Kconfig10
-rw-r--r--init/main.c3
-rw-r--r--kernel/cgroup.c5
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/cpuset.c29
-rw-r--r--kernel/debug/kdb/kdb_support.c2
-rw-r--r--kernel/events/Makefile2
-rw-r--r--kernel/events/callchain.c191
-rw-r--r--kernel/events/core.c306
-rw-r--r--kernel/events/internal.h39
-rw-r--r--kernel/exit.c9
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/hung_task.c14
-rw-r--r--kernel/jump_label.c49
-rw-r--r--kernel/lockdep.c83
-rw-r--r--kernel/panic.c17
-rw-r--r--kernel/printk.c11
-rw-r--r--kernel/ptrace.c13
-rw-r--r--kernel/rcu.h7
-rw-r--r--kernel/rcupdate.c12
-rw-r--r--kernel/rcutiny.c149
-rw-r--r--kernel/rcutiny_plugin.h29
-rw-r--r--kernel/rcutorture.c225
-rw-r--r--kernel/rcutree.c290
-rw-r--r--kernel/rcutree.h26
-rw-r--r--kernel/rcutree_plugin.h289
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex.c8
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/clocksource.c12
-rw-r--r--kernel/time/tick-sched.c114
-rw-r--r--kernel/timer.c62
-rw-r--r--kernel/trace/trace.c106
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events_filter.c26
-rw-r--r--kernel/trace/trace_irqsoff.c13
-rw-r--r--kernel/trace/trace_output.c16
-rw-r--r--kernel/trace/trace_sched_wakeup.c13
-rw-r--r--kernel/wait.c4
-rw-r--r--lib/debugobjects.c54
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/filemap.c7
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/memblock.c961
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/mempolicy.c11
-rw-r--r--mm/nobootmem.c45
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c508
-rw-r--r--mm/percpu.c6
-rw-r--r--mm/vmalloc.c2
-rw-r--r--net/batman-adv/translation-table.c27
-rw-r--r--net/bluetooth/bnep/core.c8
-rw-r--r--net/bluetooth/cmtp/core.c5
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/bluetooth/hci_event.c2
-rw-r--r--net/bluetooth/l2cap_core.c12
-rw-r--r--net/bluetooth/rfcomm/core.c1
-rw-r--r--net/bridge/br_netfilter.c8
-rw-r--r--net/core/flow.c12
-rw-r--r--net/core/net-sysfs.c7
-rw-r--r--net/core/sock.c6
-rw-r--r--net/ipv4/ipconfig.c4
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/route.c112
-rw-r--r--net/ipv6/addrconf.c3
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c7
-rw-r--r--net/llc/af_llc.c14
-rw-r--r--net/mac80211/agg-tx.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c22
-rw-r--r--net/netfilter/xt_connbytes.c6
-rw-r--r--net/nfc/nci/core.c2
-rw-r--r--net/packet/af_packet.c12
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_netem.c7
-rw-r--r--net/sched/sch_qfq.c4
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/output.c8
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/sctp/sysctl.c13
-rw-r--r--net/socket.c16
-rw-r--r--net/sunrpc/sched.c30
-rw-r--r--net/sunrpc/xprt.c10
-rw-r--r--net/xfrm/xfrm_policy.c18
-rw-r--r--scripts/kconfig/Makefile5
-rw-r--r--security/integrity/evm/evm_crypto.c19
-rw-r--r--security/security.c2
-rw-r--r--security/selinux/netport.c4
-rw-r--r--sound/atmel/ac97c.c4
-rw-r--r--sound/pci/hda/hda_intel.c4
-rw-r--r--sound/pci/hda/patch_sigmatel.c6
-rw-r--r--sound/soc/codecs/Kconfig2
-rw-r--r--sound/soc/codecs/jz4740.c1
-rw-r--r--sound/soc/codecs/wm8776.c1
-rw-r--r--sound/soc/codecs/wm8958-dsp2.c2
-rw-r--r--sound/soc/codecs/wm8996.c1
-rw-r--r--sound/soc/mxs/mxs-pcm.c3
-rw-r--r--sound/soc/mxs/mxs-sgtl5000.c1
-rw-r--r--sound/soc/pxa/hx4700.c5
-rw-r--r--sound/soc/samsung/jive_wm8750.c3
-rw-r--r--sound/soc/samsung/smdk2443_wm9710.c1
-rw-r--r--tools/perf/Documentation/perf-annotate.txt4
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt2
-rw-r--r--tools/perf/Documentation/perf-evlist.txt2
-rw-r--r--tools/perf/Documentation/perf-kmem.txt2
-rw-r--r--tools/perf/Documentation/perf-lock.txt2
-rw-r--r--tools/perf/Documentation/perf-record.txt2
-rw-r--r--tools/perf/Documentation/perf-report.txt11
-rw-r--r--tools/perf/Documentation/perf-sched.txt2
-rw-r--r--tools/perf/Documentation/perf-script.txt9
-rw-r--r--tools/perf/Documentation/perf-test.txt8
-rw-r--r--tools/perf/Documentation/perf-timechart.txt2
-rw-r--r--tools/perf/Makefile1
-rw-r--r--tools/perf/builtin-annotate.c132
-rw-r--r--tools/perf/builtin-buildid-list.c53
-rw-r--r--tools/perf/builtin-diff.c21
-rw-r--r--tools/perf/builtin-evlist.c2
-rw-r--r--tools/perf/builtin-inject.c118
-rw-r--r--tools/perf/builtin-kmem.c16
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-lock.c12
-rw-r--r--tools/perf/builtin-probe.c1
-rw-r--r--tools/perf/builtin-record.c603
-rw-r--r--tools/perf/builtin-report.c236
-rw-r--r--tools/perf/builtin-sched.c200
-rw-r--r--tools/perf/builtin-script.c130
-rw-r--r--tools/perf/builtin-stat.c134
-rw-r--r--tools/perf/builtin-test.c545
-rw-r--r--tools/perf/builtin-timechart.c38
-rw-r--r--tools/perf/builtin-top.c558
-rw-r--r--tools/perf/perf.c33
-rw-r--r--tools/perf/perf.h24
-rw-r--r--tools/perf/util/annotate.c8
-rw-r--r--tools/perf/util/annotate.h5
-rw-r--r--tools/perf/util/build-id.c26
-rw-r--r--tools/perf/util/build-id.h2
-rw-r--r--tools/perf/util/callchain.h3
-rw-r--r--tools/perf/util/cgroup.c15
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/debugfs.c35
-rw-r--r--tools/perf/util/debugfs.h31
-rw-r--r--tools/perf/util/event.c360
-rw-r--r--tools/perf/util/event.h68
-rw-r--r--tools/perf/util/evlist.c299
-rw-r--r--tools/perf/util/evlist.h43
-rw-r--r--tools/perf/util/evsel.c154
-rw-r--r--tools/perf/util/evsel.h8
-rw-r--r--tools/perf/util/header.c741
-rw-r--r--tools/perf/util/header.h51
-rw-r--r--tools/perf/util/hist.h3
-rw-r--r--tools/perf/util/include/linux/bitops.h118
-rw-r--r--tools/perf/util/map.c4
-rw-r--r--tools/perf/util/map.h19
-rw-r--r--tools/perf/util/parse-events.c30
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/probe-finder.h1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c75
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c342
-rw-r--r--tools/perf/util/session.h72
-rw-r--r--tools/perf/util/setup.py3
-rw-r--r--tools/perf/util/symbol.c11
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/thread.c6
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/tool.h50
-rw-r--r--tools/perf/util/top.h20
-rw-r--r--tools/perf/util/trace-event-info.c28
-rw-r--r--tools/perf/util/trace-event-scripting.c2
-rw-r--r--tools/perf/util/trace-event.h8
-rw-r--r--tools/perf/util/ui/browsers/annotate.c16
-rw-r--r--tools/perf/util/ui/browsers/hists.c2
-rw-r--r--tools/perf/util/ui/progress.c3
-rw-r--r--tools/perf/util/usage.c5
-rw-r--r--tools/perf/util/util.h11
-rw-r--r--tools/perf/util/values.c1
-rw-r--r--virt/kvm/assigned-dev.c93
626 files changed, 13309 insertions, 7394 deletions
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
index 08ff908aa7a2..24979f691e3e 100644
--- a/Documentation/DocBook/debugobjects.tmpl
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -96,6 +96,7 @@
96 <listitem><para>debug_object_deactivate</para></listitem> 96 <listitem><para>debug_object_deactivate</para></listitem>
97 <listitem><para>debug_object_destroy</para></listitem> 97 <listitem><para>debug_object_destroy</para></listitem>
98 <listitem><para>debug_object_free</para></listitem> 98 <listitem><para>debug_object_free</para></listitem>
99 <listitem><para>debug_object_assert_init</para></listitem>
99 </itemizedlist> 100 </itemizedlist>
100 Each of these functions takes the address of the real object and 101 Each of these functions takes the address of the real object and
101 a pointer to the object type specific debug description 102 a pointer to the object type specific debug description
@@ -273,6 +274,26 @@
273 debug checks. 274 debug checks.
274 </para> 275 </para>
275 </sect1> 276 </sect1>
277
278 <sect1 id="debug_object_assert_init">
279 <title>debug_object_assert_init</title>
280 <para>
281 This function is called to assert that an object has been
282 initialized.
283 </para>
284 <para>
285 When the real object is not tracked by debugobjects, it calls
286 fixup_assert_init of the object type description structure
287 provided by the caller, with the hardcoded object state
288 ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
289 by calling debug_object_init and other specific initializing
290 functions.
291 </para>
292 <para>
293 When the real object is already tracked by debugobjects it is
294 ignored.
295 </para>
296 </sect1>
276 </chapter> 297 </chapter>
277 <chapter id="fixupfunctions"> 298 <chapter id="fixupfunctions">
278 <title>Fixup functions</title> 299 <title>Fixup functions</title>
@@ -381,6 +402,35 @@
381 statistics. 402 statistics.
382 </para> 403 </para>
383 </sect1> 404 </sect1>
405 <sect1 id="fixup_assert_init">
406 <title>fixup_assert_init</title>
407 <para>
408 This function is called from the debug code whenever a problem
409 in debug_object_assert_init is detected.
410 </para>
411 <para>
412 Called from debug_object_assert_init() with a hardcoded state
413 ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
414 debug bucket.
415 </para>
416 <para>
417 The function returns 1 when the fixup was successful,
418 otherwise 0. The return value is used to update the
419 statistics.
420 </para>
421 <para>
422 Note, this function should make sure debug_object_init() is
423 called before returning.
424 </para>
425 <para>
426 The handling of statically initialized objects is a special
427 case. The fixup function should check if this is a legitimate
428 case of a statically initialized object or not. In this case only
429 debug_object_init() should be called to make the object known to
430 the tracker. Then the function should return 0 because this is not
431 a real fixup.
432 </para>
433 </sect1>
384 </chapter> 434 </chapter>
385 <chapter id="bugs"> 435 <chapter id="bugs">
386 <title>Known Bugs And Assumptions</title> 436 <title>Known Bugs And Assumptions</title>
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 0c134f8afc6f..bff2d8be1e18 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -328,6 +328,12 @@ over a rather long period of time, but improvements are always welcome!
328 RCU rather than SRCU, because RCU is almost always faster and 328 RCU rather than SRCU, because RCU is almost always faster and
329 easier to use than is SRCU. 329 easier to use than is SRCU.
330 330
331 If you need to enter your read-side critical section in a
332 hardirq or exception handler, and then exit that same read-side
333 critical section in the task that was interrupted, then you need
334 to srcu_read_lock_raw() and srcu_read_unlock_raw(), which avoid
335 the lockdep checking that would otherwise this practice illegal.
336
331 Also unlike other forms of RCU, explicit initialization 337 Also unlike other forms of RCU, explicit initialization
332 and cleanup is required via init_srcu_struct() and 338 and cleanup is required via init_srcu_struct() and
333 cleanup_srcu_struct(). These are passed a "struct srcu_struct" 339 cleanup_srcu_struct(). These are passed a "struct srcu_struct"
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 31852705b586..bf778332a28f 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -38,11 +38,11 @@ o How can the updater tell when a grace period has completed
38 38
39 Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the 39 Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the
40 same effect, but require that the readers manipulate CPU-local 40 same effect, but require that the readers manipulate CPU-local
41 counters. These counters allow limited types of blocking 41 counters. These counters allow limited types of blocking within
42 within RCU read-side critical sections. SRCU also uses 42 RCU read-side critical sections. SRCU also uses CPU-local
43 CPU-local counters, and permits general blocking within 43 counters, and permits general blocking within RCU read-side
44 RCU read-side critical sections. These two variants of 44 critical sections. These variants of RCU detect grace periods
45 RCU detect grace periods by sampling these counters. 45 by sampling these counters.
46 46
47o If I am running on a uniprocessor kernel, which can only do one 47o If I am running on a uniprocessor kernel, which can only do one
48 thing at a time, why should I wait for a grace period? 48 thing at a time, why should I wait for a grace period?
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 4e959208f736..083d88cbc089 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -101,6 +101,11 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
101 CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning 101 CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning
102 messages. 102 messages.
103 103
104o A hardware or software issue shuts off the scheduler-clock
105 interrupt on a CPU that is not in dyntick-idle mode. This
106 problem really has happened, and seems to be most likely to
107 result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels.
108
104o A bug in the RCU implementation. 109o A bug in the RCU implementation.
105 110
106o A hardware failure. This is quite unlikely, but has occurred 111o A hardware failure. This is quite unlikely, but has occurred
@@ -109,12 +114,11 @@ o A hardware failure. This is quite unlikely, but has occurred
109 This resulted in a series of RCU CPU stall warnings, eventually 114 This resulted in a series of RCU CPU stall warnings, eventually
110 leading the realization that the CPU had failed. 115 leading the realization that the CPU had failed.
111 116
112The RCU, RCU-sched, and RCU-bh implementations have CPU stall 117The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning.
113warning. SRCU does not have its own CPU stall warnings, but its 118SRCU does not have its own CPU stall warnings, but its calls to
114calls to synchronize_sched() will result in RCU-sched detecting 119synchronize_sched() will result in RCU-sched detecting RCU-sched-related
115RCU-sched-related CPU stalls. Please note that RCU only detects 120CPU stalls. Please note that RCU only detects CPU stalls when there is
116CPU stalls when there is a grace period in progress. No grace period, 121a grace period in progress. No grace period, no CPU stall warnings.
117no CPU stall warnings.
118 122
119To diagnose the cause of the stall, inspect the stack traces. 123To diagnose the cause of the stall, inspect the stack traces.
120The offending function will usually be near the top of the stack. 124The offending function will usually be near the top of the stack.
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 783d6c134d3f..d67068d0d2b9 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -61,11 +61,24 @@ nreaders This is the number of RCU reading threads supported.
61 To properly exercise RCU implementations with preemptible 61 To properly exercise RCU implementations with preemptible
62 read-side critical sections. 62 read-side critical sections.
63 63
64onoff_interval
65 The number of seconds between each attempt to execute a
66 randomly selected CPU-hotplug operation. Defaults to
67 zero, which disables CPU hotplugging. In HOTPLUG_CPU=n
68 kernels, rcutorture will silently refuse to do any
69 CPU-hotplug operations regardless of what value is
70 specified for onoff_interval.
71
64shuffle_interval 72shuffle_interval
65 The number of seconds to keep the test threads affinitied 73 The number of seconds to keep the test threads affinitied
66 to a particular subset of the CPUs, defaults to 3 seconds. 74 to a particular subset of the CPUs, defaults to 3 seconds.
67 Used in conjunction with test_no_idle_hz. 75 Used in conjunction with test_no_idle_hz.
68 76
77shutdown_secs The number of seconds to run the test before terminating
78 the test and powering off the system. The default is
79 zero, which disables test termination and system shutdown.
80 This capability is useful for automated testing.
81
69stat_interval The number of seconds between output of torture 82stat_interval The number of seconds between output of torture
70 statistics (via printk()). Regardless of the interval, 83 statistics (via printk()). Regardless of the interval,
71 statistics are printed when the module is unloaded. 84 statistics are printed when the module is unloaded.
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index aaf65f6c6cd7..49587abfc2f7 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -105,14 +105,10 @@ o "dt" is the current value of the dyntick counter that is incremented
105 or one greater than the interrupt-nesting depth otherwise. 105 or one greater than the interrupt-nesting depth otherwise.
106 The number after the second "/" is the NMI nesting depth. 106 The number after the second "/" is the NMI nesting depth.
107 107
108 This field is displayed only for CONFIG_NO_HZ kernels.
109
110o "df" is the number of times that some other CPU has forced a 108o "df" is the number of times that some other CPU has forced a
111 quiescent state on behalf of this CPU due to this CPU being in 109 quiescent state on behalf of this CPU due to this CPU being in
112 dynticks-idle state. 110 dynticks-idle state.
113 111
114 This field is displayed only for CONFIG_NO_HZ kernels.
115
116o "of" is the number of times that some other CPU has forced a 112o "of" is the number of times that some other CPU has forced a
117 quiescent state on behalf of this CPU due to this CPU being 113 quiescent state on behalf of this CPU due to this CPU being
118 offline. In a perfect world, this might never happen, but it 114 offline. In a perfect world, this might never happen, but it
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 6ef692667e2f..6bbe8dcdc3da 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -4,6 +4,7 @@ to start learning about RCU:
41. What is RCU, Fundamentally? http://lwn.net/Articles/262464/ 41. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
52. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/ 52. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
63. RCU part 3: the RCU API http://lwn.net/Articles/264090/ 63. RCU part 3: the RCU API http://lwn.net/Articles/264090/
74. The RCU API, 2010 Edition http://lwn.net/Articles/418853/
7 8
8 9
9What is RCU? 10What is RCU?
@@ -834,6 +835,8 @@ SRCU: Critical sections Grace period Barrier
834 835
835 srcu_read_lock synchronize_srcu N/A 836 srcu_read_lock synchronize_srcu N/A
836 srcu_read_unlock synchronize_srcu_expedited 837 srcu_read_unlock synchronize_srcu_expedited
838 srcu_read_lock_raw
839 srcu_read_unlock_raw
837 srcu_dereference 840 srcu_dereference
838 841
839SRCU: Initialization/cleanup 842SRCU: Initialization/cleanup
@@ -855,27 +858,33 @@ list can be helpful:
855 858
856a. Will readers need to block? If so, you need SRCU. 859a. Will readers need to block? If so, you need SRCU.
857 860
858b. What about the -rt patchset? If readers would need to block 861b. Is it necessary to start a read-side critical section in a
862 hardirq handler or exception handler, and then to complete
863 this read-side critical section in the task that was
864 interrupted? If so, you need SRCU's srcu_read_lock_raw() and
865 srcu_read_unlock_raw() primitives.
866
867c. What about the -rt patchset? If readers would need to block
859 in an non-rt kernel, you need SRCU. If readers would block 868 in an non-rt kernel, you need SRCU. If readers would block
860 in a -rt kernel, but not in a non-rt kernel, SRCU is not 869 in a -rt kernel, but not in a non-rt kernel, SRCU is not
861 necessary. 870 necessary.
862 871
863c. Do you need to treat NMI handlers, hardirq handlers, 872d. Do you need to treat NMI handlers, hardirq handlers,
864 and code segments with preemption disabled (whether 873 and code segments with preemption disabled (whether
865 via preempt_disable(), local_irq_save(), local_bh_disable(), 874 via preempt_disable(), local_irq_save(), local_bh_disable(),
866 or some other mechanism) as if they were explicit RCU readers? 875 or some other mechanism) as if they were explicit RCU readers?
867 If so, you need RCU-sched. 876 If so, you need RCU-sched.
868 877
869d. Do you need RCU grace periods to complete even in the face 878e. Do you need RCU grace periods to complete even in the face
870 of softirq monopolization of one or more of the CPUs? For 879 of softirq monopolization of one or more of the CPUs? For
871 example, is your code subject to network-based denial-of-service 880 example, is your code subject to network-based denial-of-service
872 attacks? If so, you need RCU-bh. 881 attacks? If so, you need RCU-bh.
873 882
874e. Is your workload too update-intensive for normal use of 883f. Is your workload too update-intensive for normal use of
875 RCU, but inappropriate for other synchronization mechanisms? 884 RCU, but inappropriate for other synchronization mechanisms?
876 If so, consider SLAB_DESTROY_BY_RCU. But please be careful! 885 If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
877 886
878f. Otherwise, use RCU. 887g. Otherwise, use RCU.
879 888
880Of course, this all assumes that you have determined that RCU is in fact 889Of course, this all assumes that you have determined that RCU is in fact
881the right tool for your job. 890the right tool for your job.
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 3bd585b44927..27f2b21a9d5c 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -84,6 +84,93 @@ compiler optimizes the section accessing atomic_t variables.
84 84
85*** YOU HAVE BEEN WARNED! *** 85*** YOU HAVE BEEN WARNED! ***
86 86
87Properly aligned pointers, longs, ints, and chars (and unsigned
88equivalents) may be atomically loaded from and stored to in the same
89sense as described for atomic_read() and atomic_set(). The ACCESS_ONCE()
90macro should be used to prevent the compiler from using optimizations
91that might otherwise optimize accesses out of existence on the one hand,
92or that might create unsolicited accesses on the other.
93
94For example consider the following code:
95
96 while (a > 0)
97 do_something();
98
99If the compiler can prove that do_something() does not store to the
100variable a, then the compiler is within its rights transforming this to
101the following:
102
103 tmp = a;
104 if (a > 0)
105 for (;;)
106 do_something();
107
108If you don't want the compiler to do this (and you probably don't), then
109you should use something like the following:
110
111 while (ACCESS_ONCE(a) < 0)
112 do_something();
113
114Alternatively, you could place a barrier() call in the loop.
115
116For another example, consider the following code:
117
118 tmp_a = a;
119 do_something_with(tmp_a);
120 do_something_else_with(tmp_a);
121
122If the compiler can prove that do_something_with() does not store to the
123variable a, then the compiler is within its rights to manufacture an
124additional load as follows:
125
126 tmp_a = a;
127 do_something_with(tmp_a);
128 tmp_a = a;
129 do_something_else_with(tmp_a);
130
131This could fatally confuse your code if it expected the same value
132to be passed to do_something_with() and do_something_else_with().
133
134The compiler would be likely to manufacture this additional load if
135do_something_with() was an inline function that made very heavy use
136of registers: reloading from variable a could save a flush to the
137stack and later reload. To prevent the compiler from attacking your
138code in this manner, write the following:
139
140 tmp_a = ACCESS_ONCE(a);
141 do_something_with(tmp_a);
142 do_something_else_with(tmp_a);
143
144For a final example, consider the following code, assuming that the
145variable a is set at boot time before the second CPU is brought online
146and never changed later, so that memory barriers are not needed:
147
148 if (a)
149 b = 9;
150 else
151 b = 42;
152
153The compiler is within its rights to manufacture an additional store
154by transforming the above code into the following:
155
156 b = 42;
157 if (a)
158 b = 9;
159
160This could come as a fatal surprise to other code running concurrently
161that expected b to never have the value 42 if a was zero. To prevent
162the compiler from doing this, write something like:
163
164 if (a)
165 ACCESS_ONCE(b) = 9;
166 else
167 ACCESS_ONCE(b) = 42;
168
169Don't even -think- about doing this without proper use of memory barriers,
170locks, or atomic operations if variable a can change at runtime!
171
172*** WARNING: ACCESS_ONCE() DOES NOT IMPLY A BARRIER! ***
173
87Now, we move onto the atomic operation interfaces typically implemented with 174Now, we move onto the atomic operation interfaces typically implemented with
88the help of assembly code. 175the help of assembly code.
89 176
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 81c287fad79d..0293fc8daca3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1885,6 +1885,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1885 arch_perfmon: [X86] Force use of architectural 1885 arch_perfmon: [X86] Force use of architectural
1886 perfmon on Intel CPUs instead of the 1886 perfmon on Intel CPUs instead of the
1887 CPU specific event set. 1887 CPU specific event set.
1888 timer: [X86] Force use of architectural NMI
1889 timer mode (see also oprofile.timer
1890 for generic hr timer mode)
1891 [s390] Force legacy basic mode sampling
1892 (report cpu_type "timer")
1888 1893
1889 oops=panic Always panic on oopses. Default is to just kill the 1894 oops=panic Always panic on oopses. Default is to just kill the
1890 process, but there is a small probability of 1895 process, but there is a small probability of
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index abf768c681e2..5dbc99c04f6e 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -221,3 +221,66 @@ when the chain is validated for the first time, is then put into a hash
221table, which hash-table can be checked in a lockfree manner. If the 221table, which hash-table can be checked in a lockfree manner. If the
222locking chain occurs again later on, the hash table tells us that we 222locking chain occurs again later on, the hash table tells us that we
223dont have to validate the chain again. 223dont have to validate the chain again.
224
225Troubleshooting:
226----------------
227
228The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
229Exceeding this number will trigger the following lockdep warning:
230
231 (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
232
233By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
234desktop systems have less than 1,000 lock classes, so this warning
235normally results from lock-class leakage or failure to properly
236initialize locks. These two problems are illustrated below:
237
2381. Repeated module loading and unloading while running the validator
239 will result in lock-class leakage. The issue here is that each
240 load of the module will create a new set of lock classes for
241 that module's locks, but module unloading does not remove old
242 classes (see below discussion of reuse of lock classes for why).
243 Therefore, if that module is loaded and unloaded repeatedly,
244 the number of lock classes will eventually reach the maximum.
245
2462. Using structures such as arrays that have large numbers of
247 locks that are not explicitly initialized. For example,
248 a hash table with 8192 buckets where each bucket has its own
249 spinlock_t will consume 8192 lock classes -unless- each spinlock
250 is explicitly initialized at runtime, for example, using the
251 run-time spin_lock_init() as opposed to compile-time initializers
252 such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize
253 the per-bucket spinlocks would guarantee lock-class overflow.
254 In contrast, a loop that called spin_lock_init() on each lock
255 would place all 8192 locks into a single lock class.
256
257 The moral of this story is that you should always explicitly
258 initialize your locks.
259
260One might argue that the validator should be modified to allow
261lock classes to be reused. However, if you are tempted to make this
262argument, first review the code and think through the changes that would
263be required, keeping in mind that the lock classes to be removed are
264likely to be linked into the lock-dependency graph. This turns out to
265be harder to do than to say.
266
267Of course, if you do run out of lock classes, the next thing to do is
268to find the offending lock classes. First, the following command gives
269you the number of lock classes currently in use along with the maximum:
270
271 grep "lock-classes" /proc/lockdep_stats
272
273This command produces the following output on a modest system:
274
275 lock-classes: 748 [max: 8191]
276
277If the number allocated (748 above) increases continually over time,
278then there is likely a leak. The following command can be used to
279identify the leaking lock classes:
280
281 grep "BD" /proc/lockdep
282
283Run the command and save the output, then compare against the output from
284a later run of this command to identify the leakers. This same output
285can also help you find situations where runtime lock initialization has
286been omitted.
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index b510564aac7e..bb24c2a0e870 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -191,8 +191,6 @@ And for string fields they are:
191 191
192Currently, only exact string matches are supported. 192Currently, only exact string matches are supported.
193 193
194Currently, the maximum number of predicates in a filter is 16.
195
1965.2 Setting filters 1945.2 Setting filters
197------------------- 195-------------------
198 196
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7945b0bd35e2..e2a4b5287361 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1100,6 +1100,15 @@ emulate them efficiently. The fields in each entry are defined as follows:
1100 eax, ebx, ecx, edx: the values returned by the cpuid instruction for 1100 eax, ebx, ecx, edx: the values returned by the cpuid instruction for
1101 this function/index combination 1101 this function/index combination
1102 1102
1103The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
1104as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
1105support. Instead it is reported via
1106
1107 ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
1108
1109if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
1110feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
1111
11034.47 KVM_PPC_GET_PVINFO 11124.47 KVM_PPC_GET_PVINFO
1104 1113
1105Capability: KVM_CAP_PPC_GET_PVINFO 1114Capability: KVM_CAP_PPC_GET_PVINFO
@@ -1151,6 +1160,13 @@ following flags are specified:
1151/* Depends on KVM_CAP_IOMMU */ 1160/* Depends on KVM_CAP_IOMMU */
1152#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 1161#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
1153 1162
1163The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
1164isolation of the device. Usages not specifying this flag are deprecated.
1165
1166Only PCI header type 0 devices with PCI BAR resources are supported by
1167device assignment. The user requesting this ioctl must have read/write
1168access to the PCI sysfs resource files associated with the device.
1169
11544.49 KVM_DEASSIGN_PCI_DEVICE 11704.49 KVM_DEASSIGN_PCI_DEVICE
1155 1171
1156Capability: KVM_CAP_DEVICE_DEASSIGNMENT 1172Capability: KVM_CAP_DEVICE_DEASSIGNMENT
diff --git a/MAINTAINERS b/MAINTAINERS
index b9db108f01c8..62f1cd357ddf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1698,11 +1698,9 @@ F: arch/x86/include/asm/tce.h
1698 1698
1699CAN NETWORK LAYER 1699CAN NETWORK LAYER
1700M: Oliver Hartkopp <socketcan@hartkopp.net> 1700M: Oliver Hartkopp <socketcan@hartkopp.net>
1701M: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
1702M: Urs Thuermann <urs.thuermann@volkswagen.de>
1703L: linux-can@vger.kernel.org 1701L: linux-can@vger.kernel.org
1704L: netdev@vger.kernel.org 1702W: http://gitorious.org/linux-can
1705W: http://developer.berlios.de/projects/socketcan/ 1703T: git git://gitorious.org/linux-can/linux-can-next.git
1706S: Maintained 1704S: Maintained
1707F: net/can/ 1705F: net/can/
1708F: include/linux/can.h 1706F: include/linux/can.h
@@ -1713,9 +1711,10 @@ F: include/linux/can/gw.h
1713 1711
1714CAN NETWORK DRIVERS 1712CAN NETWORK DRIVERS
1715M: Wolfgang Grandegger <wg@grandegger.com> 1713M: Wolfgang Grandegger <wg@grandegger.com>
1714M: Marc Kleine-Budde <mkl@pengutronix.de>
1716L: linux-can@vger.kernel.org 1715L: linux-can@vger.kernel.org
1717L: netdev@vger.kernel.org 1716W: http://gitorious.org/linux-can
1718W: http://developer.berlios.de/projects/socketcan/ 1717T: git git://gitorious.org/linux-can/linux-can-next.git
1719S: Maintained 1718S: Maintained
1720F: drivers/net/can/ 1719F: drivers/net/can/
1721F: include/linux/can/dev.h 1720F: include/linux/can/dev.h
@@ -2700,7 +2699,7 @@ FIREWIRE SUBSYSTEM
2700M: Stefan Richter <stefanr@s5r6.in-berlin.de> 2699M: Stefan Richter <stefanr@s5r6.in-berlin.de>
2701L: linux1394-devel@lists.sourceforge.net 2700L: linux1394-devel@lists.sourceforge.net
2702W: http://ieee1394.wiki.kernel.org/ 2701W: http://ieee1394.wiki.kernel.org/
2703T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git 2702T: git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git
2704S: Maintained 2703S: Maintained
2705F: drivers/firewire/ 2704F: drivers/firewire/
2706F: include/linux/firewire*.h 2705F: include/linux/firewire*.h
@@ -3101,6 +3100,7 @@ F: include/linux/hid*
3101 3100
3102HIGH-RESOLUTION TIMERS, CLOCKEVENTS, DYNTICKS 3101HIGH-RESOLUTION TIMERS, CLOCKEVENTS, DYNTICKS
3103M: Thomas Gleixner <tglx@linutronix.de> 3102M: Thomas Gleixner <tglx@linutronix.de>
3103T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
3104S: Maintained 3104S: Maintained
3105F: Documentation/timers/ 3105F: Documentation/timers/
3106F: kernel/hrtimer.c 3106F: kernel/hrtimer.c
@@ -3610,7 +3610,7 @@ F: net/irda/
3610IRQ SUBSYSTEM 3610IRQ SUBSYSTEM
3611M: Thomas Gleixner <tglx@linutronix.de> 3611M: Thomas Gleixner <tglx@linutronix.de>
3612S: Maintained 3612S: Maintained
3613T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git irq/core 3613T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
3614F: kernel/irq/ 3614F: kernel/irq/
3615 3615
3616ISAPNP 3616ISAPNP
@@ -4098,7 +4098,7 @@ F: drivers/hwmon/lm90.c
4098LOCKDEP AND LOCKSTAT 4098LOCKDEP AND LOCKSTAT
4099M: Peter Zijlstra <peterz@infradead.org> 4099M: Peter Zijlstra <peterz@infradead.org>
4100M: Ingo Molnar <mingo@redhat.com> 4100M: Ingo Molnar <mingo@redhat.com>
4101T: git git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git 4101T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking
4102S: Maintained 4102S: Maintained
4103F: Documentation/lockdep*.txt 4103F: Documentation/lockdep*.txt
4104F: Documentation/lockstat.txt 4104F: Documentation/lockstat.txt
@@ -4280,7 +4280,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
4280S: Maintained 4280S: Maintained
4281F: Documentation/dvb/ 4281F: Documentation/dvb/
4282F: Documentation/video4linux/ 4282F: Documentation/video4linux/
4283F: Documentation/DocBook/media/
4283F: drivers/media/ 4284F: drivers/media/
4285F: drivers/staging/media/
4284F: include/media/ 4286F: include/media/
4285F: include/linux/dvb/ 4287F: include/linux/dvb/
4286F: include/linux/videodev*.h 4288F: include/linux/videodev*.h
@@ -5086,6 +5088,7 @@ M: Peter Zijlstra <a.p.zijlstra@chello.nl>
5086M: Paul Mackerras <paulus@samba.org> 5088M: Paul Mackerras <paulus@samba.org>
5087M: Ingo Molnar <mingo@elte.hu> 5089M: Ingo Molnar <mingo@elte.hu>
5088M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 5090M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
5091T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
5089S: Supported 5092S: Supported
5090F: kernel/events/* 5093F: kernel/events/*
5091F: include/linux/perf_event.h 5094F: include/linux/perf_event.h
@@ -5165,6 +5168,7 @@ F: drivers/scsi/pm8001/
5165 5168
5166POSIX CLOCKS and TIMERS 5169POSIX CLOCKS and TIMERS
5167M: Thomas Gleixner <tglx@linutronix.de> 5170M: Thomas Gleixner <tglx@linutronix.de>
5171T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
5168S: Supported 5172S: Supported
5169F: fs/timerfd.c 5173F: fs/timerfd.c
5170F: include/linux/timer* 5174F: include/linux/timer*
@@ -5680,6 +5684,7 @@ F: drivers/dma/dw_dmac.c
5680TIMEKEEPING, NTP 5684TIMEKEEPING, NTP
5681M: John Stultz <johnstul@us.ibm.com> 5685M: John Stultz <johnstul@us.ibm.com>
5682M: Thomas Gleixner <tglx@linutronix.de> 5686M: Thomas Gleixner <tglx@linutronix.de>
5687T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
5683S: Supported 5688S: Supported
5684F: include/linux/clocksource.h 5689F: include/linux/clocksource.h
5685F: include/linux/time.h 5690F: include/linux/time.h
@@ -5704,6 +5709,7 @@ F: drivers/watchdog/sc1200wdt.c
5704SCHEDULER 5709SCHEDULER
5705M: Ingo Molnar <mingo@elte.hu> 5710M: Ingo Molnar <mingo@elte.hu>
5706M: Peter Zijlstra <peterz@infradead.org> 5711M: Peter Zijlstra <peterz@infradead.org>
5712T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
5707S: Maintained 5713S: Maintained
5708F: kernel/sched* 5714F: kernel/sched*
5709F: include/linux/sched.h 5715F: include/linux/sched.h
@@ -6631,7 +6637,7 @@ TRACING
6631M: Steven Rostedt <rostedt@goodmis.org> 6637M: Steven Rostedt <rostedt@goodmis.org>
6632M: Frederic Weisbecker <fweisbec@gmail.com> 6638M: Frederic Weisbecker <fweisbec@gmail.com>
6633M: Ingo Molnar <mingo@redhat.com> 6639M: Ingo Molnar <mingo@redhat.com>
6634T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core 6640T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
6635S: Maintained 6641S: Maintained
6636F: Documentation/trace/ftrace.txt 6642F: Documentation/trace/ftrace.txt
6637F: arch/*/*/*/ftrace.h 6643F: arch/*/*/*/ftrace.h
@@ -7381,7 +7387,7 @@ M: Thomas Gleixner <tglx@linutronix.de>
7381M: Ingo Molnar <mingo@redhat.com> 7387M: Ingo Molnar <mingo@redhat.com>
7382M: "H. Peter Anvin" <hpa@zytor.com> 7388M: "H. Peter Anvin" <hpa@zytor.com>
7383M: x86@kernel.org 7389M: x86@kernel.org
7384T: git git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git 7390T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
7385S: Maintained 7391S: Maintained
7386F: Documentation/x86/ 7392F: Documentation/x86/
7387F: arch/x86/ 7393F: arch/x86/
diff --git a/Makefile b/Makefile
index d1ea73f74c2f..adddd11c3b3b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 2 2PATCHLEVEL = 2
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc5 4EXTRAVERSION =
5NAME = Saber-toothed Squirrel 5NAME = Saber-toothed Squirrel
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 4b0669cbb3b0..2505740b81d2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX
30config HAVE_OPROFILE 30config HAVE_OPROFILE
31 bool 31 bool
32 32
33config OPROFILE_NMI_TIMER
34 def_bool y
35 depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
36
33config KPROBES 37config KPROBES
34 bool "Kprobes" 38 bool "Kprobes"
35 depends on MODULES 39 depends on MODULES
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 776d76b8cb69..b259c7c644e3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1246,7 +1246,7 @@ config PL310_ERRATA_588369
1246 1246
1247config ARM_ERRATA_720789 1247config ARM_ERRATA_720789
1248 bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID" 1248 bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID"
1249 depends on CPU_V7 && SMP 1249 depends on CPU_V7
1250 help 1250 help
1251 This option enables the workaround for the 720789 Cortex-A9 (prior to 1251 This option enables the workaround for the 720789 Cortex-A9 (prior to
1252 r2p0) erratum. A faulty ASID can be sent to the other CPUs for the 1252 r2p0) erratum. A faulty ASID can be sent to the other CPUs for the
@@ -1282,7 +1282,7 @@ config ARM_ERRATA_743622
1282 1282
1283config ARM_ERRATA_751472 1283config ARM_ERRATA_751472
1284 bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation" 1284 bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation"
1285 depends on CPU_V7 && SMP 1285 depends on CPU_V7
1286 help 1286 help
1287 This option enables the workaround for the 751472 Cortex-A9 (prior 1287 This option enables the workaround for the 751472 Cortex-A9 (prior
1288 to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the 1288 to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the
diff --git a/arch/arm/common/pl330.c b/arch/arm/common/pl330.c
index f407a6b35d3d..8d8df744f7a5 100644
--- a/arch/arm/common/pl330.c
+++ b/arch/arm/common/pl330.c
@@ -221,17 +221,6 @@
221 */ 221 */
222#define MCODE_BUFF_PER_REQ 256 222#define MCODE_BUFF_PER_REQ 256
223 223
224/*
225 * Mark a _pl330_req as free.
226 * We do it by writing DMAEND as the first instruction
227 * because no valid request is going to have DMAEND as
228 * its first instruction to execute.
229 */
230#define MARK_FREE(req) do { \
231 _emit_END(0, (req)->mc_cpu); \
232 (req)->mc_len = 0; \
233 } while (0)
234
235/* If the _pl330_req is available to the client */ 224/* If the _pl330_req is available to the client */
236#define IS_FREE(req) (*((u8 *)((req)->mc_cpu)) == CMD_DMAEND) 225#define IS_FREE(req) (*((u8 *)((req)->mc_cpu)) == CMD_DMAEND)
237 226
@@ -301,8 +290,10 @@ struct pl330_thread {
301 struct pl330_dmac *dmac; 290 struct pl330_dmac *dmac;
302 /* Only two at a time */ 291 /* Only two at a time */
303 struct _pl330_req req[2]; 292 struct _pl330_req req[2];
304 /* Index of the last submitted request */ 293 /* Index of the last enqueued request */
305 unsigned lstenq; 294 unsigned lstenq;
295 /* Index of the last submitted request or -1 if the DMA is stopped */
296 int req_running;
306}; 297};
307 298
308enum pl330_dmac_state { 299enum pl330_dmac_state {
@@ -778,6 +769,22 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd,
778 writel(0, regs + DBGCMD); 769 writel(0, regs + DBGCMD);
779} 770}
780 771
772/*
773 * Mark a _pl330_req as free.
774 * We do it by writing DMAEND as the first instruction
775 * because no valid request is going to have DMAEND as
776 * its first instruction to execute.
777 */
778static void mark_free(struct pl330_thread *thrd, int idx)
779{
780 struct _pl330_req *req = &thrd->req[idx];
781
782 _emit_END(0, req->mc_cpu);
783 req->mc_len = 0;
784
785 thrd->req_running = -1;
786}
787
781static inline u32 _state(struct pl330_thread *thrd) 788static inline u32 _state(struct pl330_thread *thrd)
782{ 789{
783 void __iomem *regs = thrd->dmac->pinfo->base; 790 void __iomem *regs = thrd->dmac->pinfo->base;
@@ -836,31 +843,6 @@ static inline u32 _state(struct pl330_thread *thrd)
836 } 843 }
837} 844}
838 845
839/* If the request 'req' of thread 'thrd' is currently active */
840static inline bool _req_active(struct pl330_thread *thrd,
841 struct _pl330_req *req)
842{
843 void __iomem *regs = thrd->dmac->pinfo->base;
844 u32 buf = req->mc_bus, pc = readl(regs + CPC(thrd->id));
845
846 if (IS_FREE(req))
847 return false;
848
849 return (pc >= buf && pc <= buf + req->mc_len) ? true : false;
850}
851
852/* Returns 0 if the thread is inactive, ID of active req + 1 otherwise */
853static inline unsigned _thrd_active(struct pl330_thread *thrd)
854{
855 if (_req_active(thrd, &thrd->req[0]))
856 return 1; /* First req active */
857
858 if (_req_active(thrd, &thrd->req[1]))
859 return 2; /* Second req active */
860
861 return 0;
862}
863
864static void _stop(struct pl330_thread *thrd) 846static void _stop(struct pl330_thread *thrd)
865{ 847{
866 void __iomem *regs = thrd->dmac->pinfo->base; 848 void __iomem *regs = thrd->dmac->pinfo->base;
@@ -892,17 +874,22 @@ static bool _trigger(struct pl330_thread *thrd)
892 struct _arg_GO go; 874 struct _arg_GO go;
893 unsigned ns; 875 unsigned ns;
894 u8 insn[6] = {0, 0, 0, 0, 0, 0}; 876 u8 insn[6] = {0, 0, 0, 0, 0, 0};
877 int idx;
895 878
896 /* Return if already ACTIVE */ 879 /* Return if already ACTIVE */
897 if (_state(thrd) != PL330_STATE_STOPPED) 880 if (_state(thrd) != PL330_STATE_STOPPED)
898 return true; 881 return true;
899 882
900 if (!IS_FREE(&thrd->req[1 - thrd->lstenq])) 883 idx = 1 - thrd->lstenq;
901 req = &thrd->req[1 - thrd->lstenq]; 884 if (!IS_FREE(&thrd->req[idx]))
902 else if (!IS_FREE(&thrd->req[thrd->lstenq])) 885 req = &thrd->req[idx];
903 req = &thrd->req[thrd->lstenq]; 886 else {
904 else 887 idx = thrd->lstenq;
905 req = NULL; 888 if (!IS_FREE(&thrd->req[idx]))
889 req = &thrd->req[idx];
890 else
891 req = NULL;
892 }
906 893
907 /* Return if no request */ 894 /* Return if no request */
908 if (!req || !req->r) 895 if (!req || !req->r)
@@ -933,6 +920,8 @@ static bool _trigger(struct pl330_thread *thrd)
933 /* Only manager can execute GO */ 920 /* Only manager can execute GO */
934 _execute_DBGINSN(thrd, insn, true); 921 _execute_DBGINSN(thrd, insn, true);
935 922
923 thrd->req_running = idx;
924
936 return true; 925 return true;
937} 926}
938 927
@@ -1382,8 +1371,8 @@ static void pl330_dotask(unsigned long data)
1382 1371
1383 thrd->req[0].r = NULL; 1372 thrd->req[0].r = NULL;
1384 thrd->req[1].r = NULL; 1373 thrd->req[1].r = NULL;
1385 MARK_FREE(&thrd->req[0]); 1374 mark_free(thrd, 0);
1386 MARK_FREE(&thrd->req[1]); 1375 mark_free(thrd, 1);
1387 1376
1388 /* Clear the reset flag */ 1377 /* Clear the reset flag */
1389 pl330->dmac_tbd.reset_chan &= ~(1 << i); 1378 pl330->dmac_tbd.reset_chan &= ~(1 << i);
@@ -1461,14 +1450,12 @@ int pl330_update(const struct pl330_info *pi)
1461 1450
1462 thrd = &pl330->channels[id]; 1451 thrd = &pl330->channels[id];
1463 1452
1464 active = _thrd_active(thrd); 1453 active = thrd->req_running;
1465 if (!active) /* Aborted */ 1454 if (active == -1) /* Aborted */
1466 continue; 1455 continue;
1467 1456
1468 active -= 1;
1469
1470 rqdone = &thrd->req[active]; 1457 rqdone = &thrd->req[active];
1471 MARK_FREE(rqdone); 1458 mark_free(thrd, active);
1472 1459
1473 /* Get going again ASAP */ 1460 /* Get going again ASAP */
1474 _start(thrd); 1461 _start(thrd);
@@ -1509,7 +1496,7 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
1509 struct pl330_thread *thrd = ch_id; 1496 struct pl330_thread *thrd = ch_id;
1510 struct pl330_dmac *pl330; 1497 struct pl330_dmac *pl330;
1511 unsigned long flags; 1498 unsigned long flags;
1512 int ret = 0, active; 1499 int ret = 0, active = thrd->req_running;
1513 1500
1514 if (!thrd || thrd->free || thrd->dmac->state == DYING) 1501 if (!thrd || thrd->free || thrd->dmac->state == DYING)
1515 return -EINVAL; 1502 return -EINVAL;
@@ -1525,28 +1512,24 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
1525 1512
1526 thrd->req[0].r = NULL; 1513 thrd->req[0].r = NULL;
1527 thrd->req[1].r = NULL; 1514 thrd->req[1].r = NULL;
1528 MARK_FREE(&thrd->req[0]); 1515 mark_free(thrd, 0);
1529 MARK_FREE(&thrd->req[1]); 1516 mark_free(thrd, 1);
1530 break; 1517 break;
1531 1518
1532 case PL330_OP_ABORT: 1519 case PL330_OP_ABORT:
1533 active = _thrd_active(thrd);
1534
1535 /* Make sure the channel is stopped */ 1520 /* Make sure the channel is stopped */
1536 _stop(thrd); 1521 _stop(thrd);
1537 1522
1538 /* ABORT is only for the active req */ 1523 /* ABORT is only for the active req */
1539 if (!active) 1524 if (active == -1)
1540 break; 1525 break;
1541 1526
1542 active--;
1543
1544 thrd->req[active].r = NULL; 1527 thrd->req[active].r = NULL;
1545 MARK_FREE(&thrd->req[active]); 1528 mark_free(thrd, active);
1546 1529
1547 /* Start the next */ 1530 /* Start the next */
1548 case PL330_OP_START: 1531 case PL330_OP_START:
1549 if (!_thrd_active(thrd) && !_start(thrd)) 1532 if ((active == -1) && !_start(thrd))
1550 ret = -EIO; 1533 ret = -EIO;
1551 break; 1534 break;
1552 1535
@@ -1587,14 +1570,13 @@ int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus)
1587 else 1570 else
1588 pstatus->faulting = false; 1571 pstatus->faulting = false;
1589 1572
1590 active = _thrd_active(thrd); 1573 active = thrd->req_running;
1591 1574
1592 if (!active) { 1575 if (active == -1) {
1593 /* Indicate that the thread is not running */ 1576 /* Indicate that the thread is not running */
1594 pstatus->top_req = NULL; 1577 pstatus->top_req = NULL;
1595 pstatus->wait_req = NULL; 1578 pstatus->wait_req = NULL;
1596 } else { 1579 } else {
1597 active--;
1598 pstatus->top_req = thrd->req[active].r; 1580 pstatus->top_req = thrd->req[active].r;
1599 pstatus->wait_req = !IS_FREE(&thrd->req[1 - active]) 1581 pstatus->wait_req = !IS_FREE(&thrd->req[1 - active])
1600 ? thrd->req[1 - active].r : NULL; 1582 ? thrd->req[1 - active].r : NULL;
@@ -1659,9 +1641,9 @@ void *pl330_request_channel(const struct pl330_info *pi)
1659 thrd->free = false; 1641 thrd->free = false;
1660 thrd->lstenq = 1; 1642 thrd->lstenq = 1;
1661 thrd->req[0].r = NULL; 1643 thrd->req[0].r = NULL;
1662 MARK_FREE(&thrd->req[0]); 1644 mark_free(thrd, 0);
1663 thrd->req[1].r = NULL; 1645 thrd->req[1].r = NULL;
1664 MARK_FREE(&thrd->req[1]); 1646 mark_free(thrd, 1);
1665 break; 1647 break;
1666 } 1648 }
1667 } 1649 }
@@ -1767,14 +1749,14 @@ static inline void _reset_thread(struct pl330_thread *thrd)
1767 thrd->req[0].mc_bus = pl330->mcode_bus 1749 thrd->req[0].mc_bus = pl330->mcode_bus
1768 + (thrd->id * pi->mcbufsz); 1750 + (thrd->id * pi->mcbufsz);
1769 thrd->req[0].r = NULL; 1751 thrd->req[0].r = NULL;
1770 MARK_FREE(&thrd->req[0]); 1752 mark_free(thrd, 0);
1771 1753
1772 thrd->req[1].mc_cpu = thrd->req[0].mc_cpu 1754 thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
1773 + pi->mcbufsz / 2; 1755 + pi->mcbufsz / 2;
1774 thrd->req[1].mc_bus = thrd->req[0].mc_bus 1756 thrd->req[1].mc_bus = thrd->req[0].mc_bus
1775 + pi->mcbufsz / 2; 1757 + pi->mcbufsz / 2;
1776 thrd->req[1].r = NULL; 1758 thrd->req[1].r = NULL;
1777 MARK_FREE(&thrd->req[1]); 1759 mark_free(thrd, 1);
1778} 1760}
1779 1761
1780static int dmac_alloc_threads(struct pl330_dmac *pl330) 1762static int dmac_alloc_threads(struct pl330_dmac *pl330)
diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig
index 11a4192197c8..cf497ce41dfe 100644
--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@@ -18,9 +18,10 @@ CONFIG_ARCH_MXC=y
18CONFIG_ARCH_IMX_V4_V5=y 18CONFIG_ARCH_IMX_V4_V5=y
19CONFIG_ARCH_MX1ADS=y 19CONFIG_ARCH_MX1ADS=y
20CONFIG_MACH_SCB9328=y 20CONFIG_MACH_SCB9328=y
21CONFIG_MACH_APF9328=y
21CONFIG_MACH_MX21ADS=y 22CONFIG_MACH_MX21ADS=y
22CONFIG_MACH_MX25_3DS=y 23CONFIG_MACH_MX25_3DS=y
23CONFIG_MACH_EUKREA_CPUIMX25=y 24CONFIG_MACH_EUKREA_CPUIMX25SD=y
24CONFIG_MACH_MX27ADS=y 25CONFIG_MACH_MX27ADS=y
25CONFIG_MACH_PCM038=y 26CONFIG_MACH_PCM038=y
26CONFIG_MACH_CPUIMX27=y 27CONFIG_MACH_CPUIMX27=y
@@ -72,17 +73,16 @@ CONFIG_MTD_CFI_GEOMETRY=y
72CONFIG_MTD_CFI_INTELEXT=y 73CONFIG_MTD_CFI_INTELEXT=y
73CONFIG_MTD_PHYSMAP=y 74CONFIG_MTD_PHYSMAP=y
74CONFIG_MTD_NAND=y 75CONFIG_MTD_NAND=y
76CONFIG_MTD_NAND_MXC=y
75CONFIG_MTD_UBI=y 77CONFIG_MTD_UBI=y
76CONFIG_MISC_DEVICES=y 78CONFIG_MISC_DEVICES=y
77CONFIG_EEPROM_AT24=y 79CONFIG_EEPROM_AT24=y
78CONFIG_EEPROM_AT25=y 80CONFIG_EEPROM_AT25=y
79CONFIG_NETDEVICES=y 81CONFIG_NETDEVICES=y
80CONFIG_NET_ETHERNET=y
81CONFIG_SMC91X=y
82CONFIG_DM9000=y 82CONFIG_DM9000=y
83CONFIG_SMC91X=y
83CONFIG_SMC911X=y 84CONFIG_SMC911X=y
84# CONFIG_NETDEV_1000 is not set 85CONFIG_SMSC_PHY=y
85# CONFIG_NETDEV_10000 is not set
86# CONFIG_INPUT_MOUSEDEV is not set 86# CONFIG_INPUT_MOUSEDEV is not set
87CONFIG_INPUT_EVDEV=y 87CONFIG_INPUT_EVDEV=y
88# CONFIG_INPUT_KEYBOARD is not set 88# CONFIG_INPUT_KEYBOARD is not set
@@ -100,6 +100,7 @@ CONFIG_I2C_CHARDEV=y
100CONFIG_I2C_IMX=y 100CONFIG_I2C_IMX=y
101CONFIG_SPI=y 101CONFIG_SPI=y
102CONFIG_SPI_IMX=y 102CONFIG_SPI_IMX=y
103CONFIG_SPI_SPIDEV=y
103CONFIG_W1=y 104CONFIG_W1=y
104CONFIG_W1_MASTER_MXC=y 105CONFIG_W1_MASTER_MXC=y
105CONFIG_W1_SLAVE_THERM=y 106CONFIG_W1_SLAVE_THERM=y
@@ -139,6 +140,7 @@ CONFIG_MMC=y
139CONFIG_MMC_MXC=y 140CONFIG_MMC_MXC=y
140CONFIG_NEW_LEDS=y 141CONFIG_NEW_LEDS=y
141CONFIG_LEDS_CLASS=y 142CONFIG_LEDS_CLASS=y
143CONFIG_LEDS_GPIO=y
142CONFIG_LEDS_MC13783=y 144CONFIG_LEDS_MC13783=y
143CONFIG_LEDS_TRIGGERS=y 145CONFIG_LEDS_TRIGGERS=y
144CONFIG_LEDS_TRIGGER_TIMER=y 146CONFIG_LEDS_TRIGGER_TIMER=y
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 3d0c6fb74ae4..e8e8fe505df1 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -183,7 +183,8 @@ void cpu_idle(void)
183 183
184 /* endless idle loop with no priority at all */ 184 /* endless idle loop with no priority at all */
185 while (1) { 185 while (1) {
186 tick_nohz_stop_sched_tick(1); 186 tick_nohz_idle_enter();
187 rcu_idle_enter();
187 leds_event(led_idle_start); 188 leds_event(led_idle_start);
188 while (!need_resched()) { 189 while (!need_resched()) {
189#ifdef CONFIG_HOTPLUG_CPU 190#ifdef CONFIG_HOTPLUG_CPU
@@ -213,7 +214,8 @@ void cpu_idle(void)
213 } 214 }
214 } 215 }
215 leds_event(led_idle_end); 216 leds_event(led_idle_end);
216 tick_nohz_restart_sched_tick(); 217 rcu_idle_exit();
218 tick_nohz_idle_exit();
217 preempt_enable_no_resched(); 219 preempt_enable_no_resched();
218 schedule(); 220 schedule();
219 preempt_disable(); 221 preempt_disable();
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8fc2c8fcbdc6..c0b59bff6be6 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -52,6 +52,7 @@
52#include <asm/mach/time.h> 52#include <asm/mach/time.h>
53#include <asm/traps.h> 53#include <asm/traps.h>
54#include <asm/unwind.h> 54#include <asm/unwind.h>
55#include <asm/memblock.h>
55 56
56#if defined(CONFIG_DEPRECATED_PARAM_STRUCT) 57#if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
57#include "compat.h" 58#include "compat.h"
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 3f03fe0c3269..00df012c4678 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -160,12 +160,12 @@ static const struct unwind_idx *unwind_find_origin(
160 const struct unwind_idx *start, const struct unwind_idx *stop) 160 const struct unwind_idx *start, const struct unwind_idx *stop)
161{ 161{
162 pr_debug("%s(%p, %p)\n", __func__, start, stop); 162 pr_debug("%s(%p, %p)\n", __func__, start, stop);
163 while (start < stop - 1) { 163 while (start < stop) {
164 const struct unwind_idx *mid = start + ((stop - start) >> 1); 164 const struct unwind_idx *mid = start + ((stop - start) >> 1);
165 165
166 if (mid->addr_offset >= 0x40000000) 166 if (mid->addr_offset >= 0x40000000)
167 /* negative offset */ 167 /* negative offset */
168 start = mid; 168 start = mid + 1;
169 else 169 else
170 /* positive offset */ 170 /* positive offset */
171 stop = mid; 171 stop = mid;
diff --git a/arch/arm/mach-exynos/cpu.c b/arch/arm/mach-exynos/cpu.c
index 90ec247f3b37..cc8d4bd6d0f7 100644
--- a/arch/arm/mach-exynos/cpu.c
+++ b/arch/arm/mach-exynos/cpu.c
@@ -111,11 +111,6 @@ static struct map_desc exynos4_iodesc[] __initdata = {
111 .length = SZ_4K, 111 .length = SZ_4K,
112 .type = MT_DEVICE, 112 .type = MT_DEVICE,
113 }, { 113 }, {
114 .virtual = (unsigned long)S5P_VA_SROMC,
115 .pfn = __phys_to_pfn(EXYNOS4_PA_SROMC),
116 .length = SZ_4K,
117 .type = MT_DEVICE,
118 }, {
119 .virtual = (unsigned long)S3C_VA_USB_HSPHY, 114 .virtual = (unsigned long)S3C_VA_USB_HSPHY,
120 .pfn = __phys_to_pfn(EXYNOS4_PA_HSPHY), 115 .pfn = __phys_to_pfn(EXYNOS4_PA_HSPHY),
121 .length = SZ_4K, 116 .length = SZ_4K,
diff --git a/arch/arm/mach-exynos/mct.c b/arch/arm/mach-exynos/mct.c
index 97343df8f132..85b5527d0918 100644
--- a/arch/arm/mach-exynos/mct.c
+++ b/arch/arm/mach-exynos/mct.c
@@ -44,8 +44,6 @@ struct mct_clock_event_device {
44 char name[10]; 44 char name[10];
45}; 45};
46 46
47static DEFINE_PER_CPU(struct mct_clock_event_device, percpu_mct_tick);
48
49static void exynos4_mct_write(unsigned int value, void *addr) 47static void exynos4_mct_write(unsigned int value, void *addr)
50{ 48{
51 void __iomem *stat_addr; 49 void __iomem *stat_addr;
@@ -264,6 +262,9 @@ static void exynos4_clockevent_init(void)
264} 262}
265 263
266#ifdef CONFIG_LOCAL_TIMERS 264#ifdef CONFIG_LOCAL_TIMERS
265
266static DEFINE_PER_CPU(struct mct_clock_event_device, percpu_mct_tick);
267
267/* Clock event handling */ 268/* Clock event handling */
268static void exynos4_mct_tick_stop(struct mct_clock_event_device *mevt) 269static void exynos4_mct_tick_stop(struct mct_clock_event_device *mevt)
269{ 270{
@@ -428,9 +429,13 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
428 429
429void local_timer_stop(struct clock_event_device *evt) 430void local_timer_stop(struct clock_event_device *evt)
430{ 431{
432 unsigned int cpu = smp_processor_id();
431 evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); 433 evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
432 if (mct_int_type == MCT_INT_SPI) 434 if (mct_int_type == MCT_INT_SPI)
433 disable_irq(evt->irq); 435 if (cpu == 0)
436 remove_irq(evt->irq, &mct_tick0_event_irq);
437 else
438 remove_irq(evt->irq, &mct_tick1_event_irq);
434 else 439 else
435 disable_percpu_irq(IRQ_MCT_LOCALTIMER); 440 disable_percpu_irq(IRQ_MCT_LOCALTIMER);
436} 441}
@@ -443,6 +448,7 @@ static void __init exynos4_timer_resources(void)
443 448
444 clk_rate = clk_get_rate(mct_clk); 449 clk_rate = clk_get_rate(mct_clk);
445 450
451#ifdef CONFIG_LOCAL_TIMERS
446 if (mct_int_type == MCT_INT_PPI) { 452 if (mct_int_type == MCT_INT_PPI) {
447 int err; 453 int err;
448 454
@@ -452,6 +458,7 @@ static void __init exynos4_timer_resources(void)
452 WARN(err, "MCT: can't request IRQ %d (%d)\n", 458 WARN(err, "MCT: can't request IRQ %d (%d)\n",
453 IRQ_MCT_LOCALTIMER, err); 459 IRQ_MCT_LOCALTIMER, err);
454 } 460 }
461#endif /* CONFIG_LOCAL_TIMERS */
455} 462}
456 463
457static void __init exynos4_timer_init(void) 464static void __init exynos4_timer_init(void)
diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig
index c44aa974e79c..0e6f1af260b6 100644
--- a/arch/arm/mach-imx/Kconfig
+++ b/arch/arm/mach-imx/Kconfig
@@ -132,7 +132,7 @@ config MACH_MX25_3DS
132 select IMX_HAVE_PLATFORM_MXC_NAND 132 select IMX_HAVE_PLATFORM_MXC_NAND
133 select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX 133 select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX
134 134
135config MACH_EUKREA_CPUIMX25 135config MACH_EUKREA_CPUIMX25SD
136 bool "Support Eukrea CPUIMX25 Platform" 136 bool "Support Eukrea CPUIMX25 Platform"
137 select SOC_IMX25 137 select SOC_IMX25
138 select IMX_HAVE_PLATFORM_FLEXCAN 138 select IMX_HAVE_PLATFORM_FLEXCAN
@@ -148,7 +148,7 @@ config MACH_EUKREA_CPUIMX25
148 148
149choice 149choice
150 prompt "Baseboard" 150 prompt "Baseboard"
151 depends on MACH_EUKREA_CPUIMX25 151 depends on MACH_EUKREA_CPUIMX25SD
152 default MACH_EUKREA_MBIMXSD25_BASEBOARD 152 default MACH_EUKREA_MBIMXSD25_BASEBOARD
153 153
154config MACH_EUKREA_MBIMXSD25_BASEBOARD 154config MACH_EUKREA_MBIMXSD25_BASEBOARD
@@ -542,7 +542,7 @@ config MACH_MX35_3DS
542 Include support for MX35PDK platform. This includes specific 542 Include support for MX35PDK platform. This includes specific
543 configurations for the board and its peripherals. 543 configurations for the board and its peripherals.
544 544
545config MACH_EUKREA_CPUIMX35 545config MACH_EUKREA_CPUIMX35SD
546 bool "Support Eukrea CPUIMX35 Platform" 546 bool "Support Eukrea CPUIMX35 Platform"
547 select SOC_IMX35 547 select SOC_IMX35
548 select IMX_HAVE_PLATFORM_FLEXCAN 548 select IMX_HAVE_PLATFORM_FLEXCAN
@@ -560,7 +560,7 @@ config MACH_EUKREA_CPUIMX35
560 560
561choice 561choice
562 prompt "Baseboard" 562 prompt "Baseboard"
563 depends on MACH_EUKREA_CPUIMX35 563 depends on MACH_EUKREA_CPUIMX35SD
564 default MACH_EUKREA_MBIMXSD35_BASEBOARD 564 default MACH_EUKREA_MBIMXSD35_BASEBOARD
565 565
566config MACH_EUKREA_MBIMXSD35_BASEBOARD 566config MACH_EUKREA_MBIMXSD35_BASEBOARD
diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
index aba73214c2a8..d97f409ce98b 100644
--- a/arch/arm/mach-imx/Makefile
+++ b/arch/arm/mach-imx/Makefile
@@ -24,7 +24,7 @@ obj-$(CONFIG_MACH_MX21ADS) += mach-mx21ads.o
24 24
25# i.MX25 based machines 25# i.MX25 based machines
26obj-$(CONFIG_MACH_MX25_3DS) += mach-mx25_3ds.o 26obj-$(CONFIG_MACH_MX25_3DS) += mach-mx25_3ds.o
27obj-$(CONFIG_MACH_EUKREA_CPUIMX25) += mach-eukrea_cpuimx25.o 27obj-$(CONFIG_MACH_EUKREA_CPUIMX25SD) += mach-eukrea_cpuimx25.o
28obj-$(CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD) += eukrea_mbimxsd25-baseboard.o 28obj-$(CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD) += eukrea_mbimxsd25-baseboard.o
29 29
30# i.MX27 based machines 30# i.MX27 based machines
@@ -57,7 +57,7 @@ obj-$(CONFIG_MACH_BUG) += mach-bug.o
57# i.MX35 based machines 57# i.MX35 based machines
58obj-$(CONFIG_MACH_PCM043) += mach-pcm043.o 58obj-$(CONFIG_MACH_PCM043) += mach-pcm043.o
59obj-$(CONFIG_MACH_MX35_3DS) += mach-mx35_3ds.o 59obj-$(CONFIG_MACH_MX35_3DS) += mach-mx35_3ds.o
60obj-$(CONFIG_MACH_EUKREA_CPUIMX35) += mach-cpuimx35.o 60obj-$(CONFIG_MACH_EUKREA_CPUIMX35SD) += mach-cpuimx35.o
61obj-$(CONFIG_MACH_EUKREA_MBIMXSD35_BASEBOARD) += eukrea_mbimxsd35-baseboard.o 61obj-$(CONFIG_MACH_EUKREA_MBIMXSD35_BASEBOARD) += eukrea_mbimxsd35-baseboard.o
62obj-$(CONFIG_MACH_VPR200) += mach-vpr200.o 62obj-$(CONFIG_MACH_VPR200) += mach-vpr200.o
63 63
diff --git a/arch/arm/mach-imx/clock-imx35.c b/arch/arm/mach-imx/clock-imx35.c
index 8116f119517d..ac8238caecb9 100644
--- a/arch/arm/mach-imx/clock-imx35.c
+++ b/arch/arm/mach-imx/clock-imx35.c
@@ -507,7 +507,7 @@ static struct clk_lookup lookups[] = {
507 507
508int __init mx35_clocks_init() 508int __init mx35_clocks_init()
509{ 509{
510 unsigned int cgr2 = 3 << 26, cgr3 = 0; 510 unsigned int cgr2 = 3 << 26;
511 511
512#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC) 512#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC)
513 cgr2 |= 3 << 16; 513 cgr2 |= 3 << 16;
@@ -521,6 +521,12 @@ int __init mx35_clocks_init()
521 __raw_writel((3 << 18), CCM_BASE + CCM_CGR0); 521 __raw_writel((3 << 18), CCM_BASE + CCM_CGR0);
522 __raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16), 522 __raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16),
523 CCM_BASE + CCM_CGR1); 523 CCM_BASE + CCM_CGR1);
524 __raw_writel(cgr2, CCM_BASE + CCM_CGR2);
525 __raw_writel(0, CCM_BASE + CCM_CGR3);
526
527 clk_enable(&iim_clk);
528 imx_print_silicon_rev("i.MX35", mx35_revision());
529 clk_disable(&iim_clk);
524 530
525 /* 531 /*
526 * Check if we came up in internal boot mode. If yes, we need some 532 * Check if we came up in internal boot mode. If yes, we need some
@@ -529,17 +535,11 @@ int __init mx35_clocks_init()
529 */ 535 */
530 if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) { 536 if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) {
531 /* Additionally turn on UART1, SCC, and IIM clocks */ 537 /* Additionally turn on UART1, SCC, and IIM clocks */
532 cgr2 |= 3 << 16 | 3 << 4; 538 clk_enable(&iim_clk);
533 cgr3 |= 3 << 2; 539 clk_enable(&uart1_clk);
540 clk_enable(&scc_clk);
534 } 541 }
535 542
536 __raw_writel(cgr2, CCM_BASE + CCM_CGR2);
537 __raw_writel(cgr3, CCM_BASE + CCM_CGR3);
538
539 clk_enable(&iim_clk);
540 imx_print_silicon_rev("i.MX35", mx35_revision());
541 clk_disable(&iim_clk);
542
543#ifdef CONFIG_MXC_USE_EPIT 543#ifdef CONFIG_MXC_USE_EPIT
544 epit_timer_init(&epit1_clk, 544 epit_timer_init(&epit1_clk,
545 MX35_IO_ADDRESS(MX35_EPIT1_BASE_ADDR), MX35_INT_EPIT1); 545 MX35_IO_ADDRESS(MX35_EPIT1_BASE_ADDR), MX35_INT_EPIT1);
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c
index 66af2e8f7e57..362aae780601 100644
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -53,12 +53,18 @@ static const struct imxi2c_platform_data
53 .bitrate = 100000, 53 .bitrate = 100000,
54}; 54};
55 55
56#define TSC2007_IRQGPIO IMX_GPIO_NR(3, 2)
57static int tsc2007_get_pendown_state(void)
58{
59 return !gpio_get_value(TSC2007_IRQGPIO);
60}
61
56static struct tsc2007_platform_data tsc2007_info = { 62static struct tsc2007_platform_data tsc2007_info = {
57 .model = 2007, 63 .model = 2007,
58 .x_plate_ohms = 180, 64 .x_plate_ohms = 180,
65 .get_pendown_state = tsc2007_get_pendown_state,
59}; 66};
60 67
61#define TSC2007_IRQGPIO IMX_GPIO_NR(3, 2)
62static struct i2c_board_info eukrea_cpuimx35_i2c_devices[] = { 68static struct i2c_board_info eukrea_cpuimx35_i2c_devices[] = {
63 { 69 {
64 I2C_BOARD_INFO("pcf8563", 0x51), 70 I2C_BOARD_INFO("pcf8563", 0x51),
diff --git a/arch/arm/mach-mx5/board-mx51_babbage.c b/arch/arm/mach-mx5/board-mx51_babbage.c
index 5c837603ff0f..24994bb52147 100644
--- a/arch/arm/mach-mx5/board-mx51_babbage.c
+++ b/arch/arm/mach-mx5/board-mx51_babbage.c
@@ -362,7 +362,7 @@ static void __init mx51_babbage_init(void)
362{ 362{
363 iomux_v3_cfg_t usbh1stp = MX51_PAD_USBH1_STP__USBH1_STP; 363 iomux_v3_cfg_t usbh1stp = MX51_PAD_USBH1_STP__USBH1_STP;
364 iomux_v3_cfg_t power_key = NEW_PAD_CTRL(MX51_PAD_EIM_A27__GPIO2_21, 364 iomux_v3_cfg_t power_key = NEW_PAD_CTRL(MX51_PAD_EIM_A27__GPIO2_21,
365 PAD_CTL_SRE_FAST | PAD_CTL_DSE_HIGH | PAD_CTL_PUS_100K_UP); 365 PAD_CTL_SRE_FAST | PAD_CTL_DSE_HIGH);
366 366
367 imx51_soc_init(); 367 imx51_soc_init();
368 368
diff --git a/arch/arm/mach-mx5/board-mx53_evk.c b/arch/arm/mach-mx5/board-mx53_evk.c
index 6bea31ab8f85..64bbfcea6f35 100644
--- a/arch/arm/mach-mx5/board-mx53_evk.c
+++ b/arch/arm/mach-mx5/board-mx53_evk.c
@@ -106,7 +106,7 @@ static inline void mx53_evk_fec_reset(void)
106 gpio_set_value(MX53_EVK_FEC_PHY_RST, 1); 106 gpio_set_value(MX53_EVK_FEC_PHY_RST, 1);
107} 107}
108 108
109static struct fec_platform_data mx53_evk_fec_pdata = { 109static const struct fec_platform_data mx53_evk_fec_pdata __initconst = {
110 .phy = PHY_INTERFACE_MODE_RMII, 110 .phy = PHY_INTERFACE_MODE_RMII,
111}; 111};
112 112
diff --git a/arch/arm/mach-mx5/board-mx53_loco.c b/arch/arm/mach-mx5/board-mx53_loco.c
index 7678f7734db6..237bdecd9331 100644
--- a/arch/arm/mach-mx5/board-mx53_loco.c
+++ b/arch/arm/mach-mx5/board-mx53_loco.c
@@ -242,7 +242,7 @@ static inline void mx53_loco_fec_reset(void)
242 gpio_set_value(LOCO_FEC_PHY_RST, 1); 242 gpio_set_value(LOCO_FEC_PHY_RST, 1);
243} 243}
244 244
245static struct fec_platform_data mx53_loco_fec_data = { 245static const struct fec_platform_data mx53_loco_fec_data __initconst = {
246 .phy = PHY_INTERFACE_MODE_RMII, 246 .phy = PHY_INTERFACE_MODE_RMII,
247}; 247};
248 248
diff --git a/arch/arm/mach-mx5/board-mx53_smd.c b/arch/arm/mach-mx5/board-mx53_smd.c
index 59c0845eb4a6..d42132a80e8f 100644
--- a/arch/arm/mach-mx5/board-mx53_smd.c
+++ b/arch/arm/mach-mx5/board-mx53_smd.c
@@ -104,7 +104,7 @@ static inline void mx53_smd_fec_reset(void)
104 gpio_set_value(SMD_FEC_PHY_RST, 1); 104 gpio_set_value(SMD_FEC_PHY_RST, 1);
105} 105}
106 106
107static struct fec_platform_data mx53_smd_fec_data = { 107static const struct fec_platform_data mx53_smd_fec_data __initconst = {
108 .phy = PHY_INTERFACE_MODE_RMII, 108 .phy = PHY_INTERFACE_MODE_RMII,
109}; 109};
110 110
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index ba1aa07bdb29..c15c5c9c9085 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -193,7 +193,7 @@ static struct platform_device rx51_charger_device = {
193static void __init rx51_charger_init(void) 193static void __init rx51_charger_init(void)
194{ 194{
195 WARN_ON(gpio_request_one(RX51_USB_TRANSCEIVER_RST_GPIO, 195 WARN_ON(gpio_request_one(RX51_USB_TRANSCEIVER_RST_GPIO,
196 GPIOF_OUT_INIT_LOW, "isp1704_reset")); 196 GPIOF_OUT_INIT_HIGH, "isp1704_reset"));
197 197
198 platform_device_register(&rx51_charger_device); 198 platform_device_register(&rx51_charger_device);
199} 199}
diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c
index 292eee3be15f..28fcb27005d2 100644
--- a/arch/arm/mach-omap2/mcbsp.c
+++ b/arch/arm/mach-omap2/mcbsp.c
@@ -145,6 +145,9 @@ static int omap_init_mcbsp(struct omap_hwmod *oh, void *unused)
145 pdata->reg_size = 4; 145 pdata->reg_size = 4;
146 pdata->has_ccr = true; 146 pdata->has_ccr = true;
147 } 147 }
148 pdata->set_clk_src = omap2_mcbsp_set_clk_src;
149 if (id == 1)
150 pdata->mux_signal = omap2_mcbsp1_mux_rx_clk;
148 151
149 if (oh->class->rev == MCBSP_CONFIG_TYPE3) { 152 if (oh->class->rev == MCBSP_CONFIG_TYPE3) {
150 if (id == 2) 153 if (id == 2)
@@ -174,9 +177,6 @@ static int omap_init_mcbsp(struct omap_hwmod *oh, void *unused)
174 name, oh->name); 177 name, oh->name);
175 return PTR_ERR(pdev); 178 return PTR_ERR(pdev);
176 } 179 }
177 pdata->set_clk_src = omap2_mcbsp_set_clk_src;
178 if (id == 1)
179 pdata->mux_signal = omap2_mcbsp1_mux_rx_clk;
180 omap_mcbsp_count++; 180 omap_mcbsp_count++;
181 return 0; 181 return 0;
182} 182}
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 7f8915ad5099..eef43e2e163e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3247,18 +3247,14 @@ static __initdata struct omap_hwmod *omap3xxx_hwmods[] = {
3247 3247
3248/* 3430ES1-only hwmods */ 3248/* 3430ES1-only hwmods */
3249static __initdata struct omap_hwmod *omap3430es1_hwmods[] = { 3249static __initdata struct omap_hwmod *omap3430es1_hwmods[] = {
3250 &omap3xxx_iva_hwmod,
3251 &omap3430es1_dss_core_hwmod, 3250 &omap3430es1_dss_core_hwmod,
3252 &omap3xxx_mailbox_hwmod,
3253 NULL 3251 NULL
3254}; 3252};
3255 3253
3256/* 3430ES2+-only hwmods */ 3254/* 3430ES2+-only hwmods */
3257static __initdata struct omap_hwmod *omap3430es2plus_hwmods[] = { 3255static __initdata struct omap_hwmod *omap3430es2plus_hwmods[] = {
3258 &omap3xxx_iva_hwmod,
3259 &omap3xxx_dss_core_hwmod, 3256 &omap3xxx_dss_core_hwmod,
3260 &omap3xxx_usbhsotg_hwmod, 3257 &omap3xxx_usbhsotg_hwmod,
3261 &omap3xxx_mailbox_hwmod,
3262 NULL 3258 NULL
3263}; 3259};
3264 3260
diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c
index a9106c392398..8662ef6e5681 100644
--- a/arch/arm/mach-s5pv210/mach-smdkv210.c
+++ b/arch/arm/mach-s5pv210/mach-smdkv210.c
@@ -273,6 +273,7 @@ static struct samsung_bl_gpio_info smdkv210_bl_gpio_info = {
273 273
274static struct platform_pwm_backlight_data smdkv210_bl_data = { 274static struct platform_pwm_backlight_data smdkv210_bl_data = {
275 .pwm_id = 3, 275 .pwm_id = 3,
276 .pwm_period_ns = 1000,
276}; 277};
277 278
278static void __init smdkv210_map_io(void) 279static void __init smdkv210_map_io(void)
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index b862e9f81e3e..7119b87cbfa0 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -607,6 +607,7 @@ struct sys_timer ag5evm_timer = {
607 607
608MACHINE_START(AG5EVM, "ag5evm") 608MACHINE_START(AG5EVM, "ag5evm")
609 .map_io = ag5evm_map_io, 609 .map_io = ag5evm_map_io,
610 .nr_irqs = NR_IRQS_LEGACY,
610 .init_irq = sh73a0_init_irq, 611 .init_irq = sh73a0_init_irq,
611 .handle_irq = shmobile_handle_irq_gic, 612 .handle_irq = shmobile_handle_irq_gic,
612 .init_machine = ag5evm_init, 613 .init_machine = ag5evm_init,
diff --git a/arch/arm/mach-shmobile/board-kota2.c b/arch/arm/mach-shmobile/board-kota2.c
index bd9a78424d6b..f44150b5ae46 100644
--- a/arch/arm/mach-shmobile/board-kota2.c
+++ b/arch/arm/mach-shmobile/board-kota2.c
@@ -33,6 +33,7 @@
33#include <linux/input/sh_keysc.h> 33#include <linux/input/sh_keysc.h>
34#include <linux/gpio_keys.h> 34#include <linux/gpio_keys.h>
35#include <linux/leds.h> 35#include <linux/leds.h>
36#include <linux/platform_data/leds-renesas-tpu.h>
36#include <linux/mmc/host.h> 37#include <linux/mmc/host.h>
37#include <linux/mmc/sh_mmcif.h> 38#include <linux/mmc/sh_mmcif.h>
38#include <linux/mfd/tmio.h> 39#include <linux/mfd/tmio.h>
@@ -56,7 +57,7 @@ static struct resource smsc9220_resources[] = {
56 .flags = IORESOURCE_MEM, 57 .flags = IORESOURCE_MEM,
57 }, 58 },
58 [1] = { 59 [1] = {
59 .start = gic_spi(33), /* PINTA2 @ PORT144 */ 60 .start = SH73A0_PINT0_IRQ(2), /* PINTA2 */
60 .flags = IORESOURCE_IRQ, 61 .flags = IORESOURCE_IRQ,
61 }, 62 },
62}; 63};
@@ -157,10 +158,6 @@ static struct platform_device gpio_keys_device = {
157#define GPIO_LED(n, g) { .name = n, .gpio = g } 158#define GPIO_LED(n, g) { .name = n, .gpio = g }
158 159
159static struct gpio_led gpio_leds[] = { 160static struct gpio_led gpio_leds[] = {
160 GPIO_LED("V2513", GPIO_PORT153), /* PORT153 [TPU1T02] -> V2513 */
161 GPIO_LED("V2514", GPIO_PORT199), /* PORT199 [TPU4TO1] -> V2514 */
162 GPIO_LED("V2515", GPIO_PORT197), /* PORT197 [TPU2TO1] -> V2515 */
163 GPIO_LED("KEYLED", GPIO_PORT163), /* PORT163 [TPU3TO0] -> KEYLED */
164 GPIO_LED("G", GPIO_PORT20), /* PORT20 [GPO0] -> LED7 -> "G" */ 161 GPIO_LED("G", GPIO_PORT20), /* PORT20 [GPO0] -> LED7 -> "G" */
165 GPIO_LED("H", GPIO_PORT21), /* PORT21 [GPO1] -> LED8 -> "H" */ 162 GPIO_LED("H", GPIO_PORT21), /* PORT21 [GPO1] -> LED8 -> "H" */
166 GPIO_LED("J", GPIO_PORT22), /* PORT22 [GPO2] -> LED9 -> "J" */ 163 GPIO_LED("J", GPIO_PORT22), /* PORT22 [GPO2] -> LED9 -> "J" */
@@ -179,6 +176,119 @@ static struct platform_device gpio_leds_device = {
179 }, 176 },
180}; 177};
181 178
179/* TPU LED */
180static struct led_renesas_tpu_config led_renesas_tpu12_pdata = {
181 .name = "V2513",
182 .pin_gpio_fn = GPIO_FN_TPU1TO2,
183 .pin_gpio = GPIO_PORT153,
184 .channel_offset = 0x90,
185 .timer_bit = 2,
186 .max_brightness = 1000,
187};
188
189static struct resource tpu12_resources[] = {
190 [0] = {
191 .name = "TPU12",
192 .start = 0xe6610090,
193 .end = 0xe66100b5,
194 .flags = IORESOURCE_MEM,
195 },
196};
197
198static struct platform_device leds_tpu12_device = {
199 .name = "leds-renesas-tpu",
200 .id = 12,
201 .dev = {
202 .platform_data = &led_renesas_tpu12_pdata,
203 },
204 .num_resources = ARRAY_SIZE(tpu12_resources),
205 .resource = tpu12_resources,
206};
207
208static struct led_renesas_tpu_config led_renesas_tpu41_pdata = {
209 .name = "V2514",
210 .pin_gpio_fn = GPIO_FN_TPU4TO1,
211 .pin_gpio = GPIO_PORT199,
212 .channel_offset = 0x50,
213 .timer_bit = 1,
214 .max_brightness = 1000,
215};
216
217static struct resource tpu41_resources[] = {
218 [0] = {
219 .name = "TPU41",
220 .start = 0xe6640050,
221 .end = 0xe6640075,
222 .flags = IORESOURCE_MEM,
223 },
224};
225
226static struct platform_device leds_tpu41_device = {
227 .name = "leds-renesas-tpu",
228 .id = 41,
229 .dev = {
230 .platform_data = &led_renesas_tpu41_pdata,
231 },
232 .num_resources = ARRAY_SIZE(tpu41_resources),
233 .resource = tpu41_resources,
234};
235
236static struct led_renesas_tpu_config led_renesas_tpu21_pdata = {
237 .name = "V2515",
238 .pin_gpio_fn = GPIO_FN_TPU2TO1,
239 .pin_gpio = GPIO_PORT197,
240 .channel_offset = 0x50,
241 .timer_bit = 1,
242 .max_brightness = 1000,
243};
244
245static struct resource tpu21_resources[] = {
246 [0] = {
247 .name = "TPU21",
248 .start = 0xe6620050,
249 .end = 0xe6620075,
250 .flags = IORESOURCE_MEM,
251 },
252};
253
254static struct platform_device leds_tpu21_device = {
255 .name = "leds-renesas-tpu",
256 .id = 21,
257 .dev = {
258 .platform_data = &led_renesas_tpu21_pdata,
259 },
260 .num_resources = ARRAY_SIZE(tpu21_resources),
261 .resource = tpu21_resources,
262};
263
264static struct led_renesas_tpu_config led_renesas_tpu30_pdata = {
265 .name = "KEYLED",
266 .pin_gpio_fn = GPIO_FN_TPU3TO0,
267 .pin_gpio = GPIO_PORT163,
268 .channel_offset = 0x10,
269 .timer_bit = 0,
270 .max_brightness = 1000,
271};
272
273static struct resource tpu30_resources[] = {
274 [0] = {
275 .name = "TPU30",
276 .start = 0xe6630010,
277 .end = 0xe6630035,
278 .flags = IORESOURCE_MEM,
279 },
280};
281
282static struct platform_device leds_tpu30_device = {
283 .name = "leds-renesas-tpu",
284 .id = 30,
285 .dev = {
286 .platform_data = &led_renesas_tpu30_pdata,
287 },
288 .num_resources = ARRAY_SIZE(tpu30_resources),
289 .resource = tpu30_resources,
290};
291
182/* MMCIF */ 292/* MMCIF */
183static struct resource mmcif_resources[] = { 293static struct resource mmcif_resources[] = {
184 [0] = { 294 [0] = {
@@ -291,6 +401,10 @@ static struct platform_device *kota2_devices[] __initdata = {
291 &keysc_device, 401 &keysc_device,
292 &gpio_keys_device, 402 &gpio_keys_device,
293 &gpio_leds_device, 403 &gpio_leds_device,
404 &leds_tpu12_device,
405 &leds_tpu41_device,
406 &leds_tpu21_device,
407 &leds_tpu30_device,
294 &mmcif_device, 408 &mmcif_device,
295 &sdhi0_device, 409 &sdhi0_device,
296 &sdhi1_device, 410 &sdhi1_device,
@@ -317,18 +431,6 @@ static void __init kota2_map_io(void)
317 shmobile_setup_console(); 431 shmobile_setup_console();
318} 432}
319 433
320#define PINTER0A 0xe69000a0
321#define PINTCR0A 0xe69000b0
322
323void __init kota2_init_irq(void)
324{
325 sh73a0_init_irq();
326
327 /* setup PINT: enable PINTA2 as active low */
328 __raw_writel(1 << 29, PINTER0A);
329 __raw_writew(2 << 10, PINTCR0A);
330}
331
332static void __init kota2_init(void) 434static void __init kota2_init(void)
333{ 435{
334 sh73a0_pinmux_init(); 436 sh73a0_pinmux_init();
@@ -447,7 +549,8 @@ struct sys_timer kota2_timer = {
447 549
448MACHINE_START(KOTA2, "kota2") 550MACHINE_START(KOTA2, "kota2")
449 .map_io = kota2_map_io, 551 .map_io = kota2_map_io,
450 .init_irq = kota2_init_irq, 552 .nr_irqs = NR_IRQS_LEGACY,
553 .init_irq = sh73a0_init_irq,
451 .handle_irq = shmobile_handle_irq_gic, 554 .handle_irq = shmobile_handle_irq_gic,
452 .init_machine = kota2_init, 555 .init_machine = kota2_init,
453 .timer = &kota2_timer, 556 .timer = &kota2_timer,
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index 61a846bb30f2..1370a89ca358 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -113,6 +113,12 @@ static struct clk main_clk = {
113 .ops = &main_clk_ops, 113 .ops = &main_clk_ops,
114}; 114};
115 115
116/* Divide Main clock by two */
117static struct clk main_div2_clk = {
118 .ops = &div2_clk_ops,
119 .parent = &main_clk,
120};
121
116/* PLL0, PLL1, PLL2, PLL3 */ 122/* PLL0, PLL1, PLL2, PLL3 */
117static unsigned long pll_recalc(struct clk *clk) 123static unsigned long pll_recalc(struct clk *clk)
118{ 124{
@@ -181,6 +187,7 @@ static struct clk *main_clks[] = {
181 &extal1_div2_clk, 187 &extal1_div2_clk,
182 &extal2_div2_clk, 188 &extal2_div2_clk,
183 &main_clk, 189 &main_clk,
190 &main_div2_clk,
184 &pll0_clk, 191 &pll0_clk,
185 &pll1_clk, 192 &pll1_clk,
186 &pll2_clk, 193 &pll2_clk,
@@ -243,7 +250,7 @@ static struct clk div6_clks[DIV6_NR] = {
243 [DIV6_VCK1] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR1, 0), 250 [DIV6_VCK1] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR1, 0),
244 [DIV6_VCK2] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR2, 0), 251 [DIV6_VCK2] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR2, 0),
245 [DIV6_VCK3] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR3, 0), 252 [DIV6_VCK3] = SH_CLK_DIV6(&pll1_div2_clk, VCLKCR3, 0),
246 [DIV6_ZB1] = SH_CLK_DIV6(&pll1_div2_clk, ZBCKCR, 0), 253 [DIV6_ZB1] = SH_CLK_DIV6(&pll1_div2_clk, ZBCKCR, CLK_ENABLE_ON_INIT),
247 [DIV6_FLCTL] = SH_CLK_DIV6(&pll1_div2_clk, FLCKCR, 0), 254 [DIV6_FLCTL] = SH_CLK_DIV6(&pll1_div2_clk, FLCKCR, 0),
248 [DIV6_SDHI0] = SH_CLK_DIV6(&pll1_div2_clk, SD0CKCR, 0), 255 [DIV6_SDHI0] = SH_CLK_DIV6(&pll1_div2_clk, SD0CKCR, 0),
249 [DIV6_SDHI1] = SH_CLK_DIV6(&pll1_div2_clk, SD1CKCR, 0), 256 [DIV6_SDHI1] = SH_CLK_DIV6(&pll1_div2_clk, SD1CKCR, 0),
@@ -268,6 +275,7 @@ enum { MSTP001,
268 MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, 275 MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
269 MSTP331, MSTP329, MSTP325, MSTP323, MSTP318, 276 MSTP331, MSTP329, MSTP325, MSTP323, MSTP318,
270 MSTP314, MSTP313, MSTP312, MSTP311, 277 MSTP314, MSTP313, MSTP312, MSTP311,
278 MSTP303, MSTP302, MSTP301, MSTP300,
271 MSTP411, MSTP410, MSTP403, 279 MSTP411, MSTP410, MSTP403,
272 MSTP_NR }; 280 MSTP_NR };
273 281
@@ -301,6 +309,10 @@ static struct clk mstp_clks[MSTP_NR] = {
301 [MSTP313] = MSTP(&div6_clks[DIV6_SDHI1], SMSTPCR3, 13, 0), /* SDHI1 */ 309 [MSTP313] = MSTP(&div6_clks[DIV6_SDHI1], SMSTPCR3, 13, 0), /* SDHI1 */
302 [MSTP312] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 12, 0), /* MMCIF0 */ 310 [MSTP312] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 12, 0), /* MMCIF0 */
303 [MSTP311] = MSTP(&div6_clks[DIV6_SDHI2], SMSTPCR3, 11, 0), /* SDHI2 */ 311 [MSTP311] = MSTP(&div6_clks[DIV6_SDHI2], SMSTPCR3, 11, 0), /* SDHI2 */
312 [MSTP303] = MSTP(&main_div2_clk, SMSTPCR3, 3, 0), /* TPU1 */
313 [MSTP302] = MSTP(&main_div2_clk, SMSTPCR3, 2, 0), /* TPU2 */
314 [MSTP301] = MSTP(&main_div2_clk, SMSTPCR3, 1, 0), /* TPU3 */
315 [MSTP300] = MSTP(&main_div2_clk, SMSTPCR3, 0, 0), /* TPU4 */
304 [MSTP411] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 11, 0), /* IIC3 */ 316 [MSTP411] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 11, 0), /* IIC3 */
305 [MSTP410] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 10, 0), /* IIC4 */ 317 [MSTP410] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 10, 0), /* IIC4 */
306 [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */ 318 [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */
@@ -350,6 +362,10 @@ static struct clk_lookup lookups[] = {
350 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */ 362 CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */
351 CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMCIF0 */ 363 CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMCIF0 */
352 CLKDEV_DEV_ID("sh_mobile_sdhi.2", &mstp_clks[MSTP311]), /* SDHI2 */ 364 CLKDEV_DEV_ID("sh_mobile_sdhi.2", &mstp_clks[MSTP311]), /* SDHI2 */
365 CLKDEV_DEV_ID("leds-renesas-tpu.12", &mstp_clks[MSTP303]), /* TPU1 */
366 CLKDEV_DEV_ID("leds-renesas-tpu.21", &mstp_clks[MSTP302]), /* TPU2 */
367 CLKDEV_DEV_ID("leds-renesas-tpu.30", &mstp_clks[MSTP301]), /* TPU3 */
368 CLKDEV_DEV_ID("leds-renesas-tpu.41", &mstp_clks[MSTP300]), /* TPU4 */
353 CLKDEV_DEV_ID("i2c-sh_mobile.3", &mstp_clks[MSTP411]), /* I2C3 */ 369 CLKDEV_DEV_ID("i2c-sh_mobile.3", &mstp_clks[MSTP411]), /* I2C3 */
354 CLKDEV_DEV_ID("i2c-sh_mobile.4", &mstp_clks[MSTP410]), /* I2C4 */ 370 CLKDEV_DEV_ID("i2c-sh_mobile.4", &mstp_clks[MSTP410]), /* I2C4 */
355 CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */ 371 CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index fbdd12ea3a58..7c38474e533a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -32,6 +32,7 @@
32 32
33#include <asm/mach/arch.h> 33#include <asm/mach/arch.h>
34#include <asm/mach/map.h> 34#include <asm/mach/map.h>
35#include <asm/memblock.h>
35 36
36#include "mm.h" 37#include "mm.h"
37 38
@@ -332,7 +333,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
332 333
333 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); 334 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
334 335
335 memblock_init();
336 for (i = 0; i < mi->nr_banks; i++) 336 for (i = 0; i < mi->nr_banks; i++)
337 memblock_add(mi->bank[i].start, mi->bank[i].size); 337 memblock_add(mi->bank[i].start, mi->bank[i].size);
338 338
@@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
371 if (mdesc->reserve) 371 if (mdesc->reserve)
372 mdesc->reserve(); 372 mdesc->reserve();
373 373
374 memblock_analyze(); 374 memblock_allow_resize();
375 memblock_dump_all(); 375 memblock_dump_all();
376} 376}
377 377
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 2c559ac38142..e70a73731eaa 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -363,11 +363,13 @@ __v7_setup:
363 orreq r10, r10, #1 << 6 @ set bit #6 363 orreq r10, r10, #1 << 6 @ set bit #6
364 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register 364 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
365#endif 365#endif
366#ifdef CONFIG_ARM_ERRATA_751472 366#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
367 cmp r6, #0x30 @ present prior to r3p0 367 ALT_SMP(cmp r6, #0x30) @ present prior to r3p0
368 ALT_UP_B(1f)
368 mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register 369 mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
369 orrlt r10, r10, #1 << 11 @ set bit #11 370 orrlt r10, r10, #1 << 11 @ set bit #11
370 mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register 371 mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
3721:
371#endif 373#endif
372 374
3733: mov r10, #0 3753: mov r10, #0
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index c074e66ad224..4e0a371630b3 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -116,7 +116,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
116 return oprofile_perf_init(ops); 116 return oprofile_perf_init(ops);
117} 117}
118 118
119void __exit oprofile_arch_exit(void) 119void oprofile_arch_exit(void)
120{ 120{
121 oprofile_perf_exit(); 121 oprofile_perf_exit();
122} 122}
diff --git a/arch/arm/plat-mxc/cpufreq.c b/arch/arm/plat-mxc/cpufreq.c
index 74aac96cda20..73db34bf588a 100644
--- a/arch/arm/plat-mxc/cpufreq.c
+++ b/arch/arm/plat-mxc/cpufreq.c
@@ -17,6 +17,7 @@
17 * the CPU clock speed on the fly. 17 * the CPU clock speed on the fly.
18 */ 18 */
19 19
20#include <linux/module.h>
20#include <linux/cpufreq.h> 21#include <linux/cpufreq.h>
21#include <linux/clk.h> 22#include <linux/clk.h>
22#include <linux/err.h> 23#include <linux/err.h>
@@ -97,7 +98,7 @@ static int mxc_set_target(struct cpufreq_policy *policy,
97 return ret; 98 return ret;
98} 99}
99 100
100static int __init mxc_cpufreq_init(struct cpufreq_policy *policy) 101static int mxc_cpufreq_init(struct cpufreq_policy *policy)
101{ 102{
102 int ret; 103 int ret;
103 int i; 104 int i;
diff --git a/arch/arm/plat-mxc/include/mach/uncompress.h b/arch/arm/plat-mxc/include/mach/uncompress.h
index 88fd40452567..477971b00930 100644
--- a/arch/arm/plat-mxc/include/mach/uncompress.h
+++ b/arch/arm/plat-mxc/include/mach/uncompress.h
@@ -98,6 +98,7 @@ static __inline__ void __arch_decomp_setup(unsigned long arch_id)
98 case MACH_TYPE_PCM043: 98 case MACH_TYPE_PCM043:
99 case MACH_TYPE_LILLY1131: 99 case MACH_TYPE_LILLY1131:
100 case MACH_TYPE_VPR200: 100 case MACH_TYPE_VPR200:
101 case MACH_TYPE_EUKREA_CPUIMX35SD:
101 uart_base = MX3X_UART1_BASE_ADDR; 102 uart_base = MX3X_UART1_BASE_ADDR;
102 break; 103 break;
103 case MACH_TYPE_MAGX_ZN5: 104 case MACH_TYPE_MAGX_ZN5:
diff --git a/arch/arm/plat-mxc/pwm.c b/arch/arm/plat-mxc/pwm.c
index 42d74ea59084..e032717f7d02 100644
--- a/arch/arm/plat-mxc/pwm.c
+++ b/arch/arm/plat-mxc/pwm.c
@@ -32,6 +32,9 @@
32#define MX3_PWMSAR 0x0C /* PWM Sample Register */ 32#define MX3_PWMSAR 0x0C /* PWM Sample Register */
33#define MX3_PWMPR 0x10 /* PWM Period Register */ 33#define MX3_PWMPR 0x10 /* PWM Period Register */
34#define MX3_PWMCR_PRESCALER(x) (((x - 1) & 0xFFF) << 4) 34#define MX3_PWMCR_PRESCALER(x) (((x - 1) & 0xFFF) << 4)
35#define MX3_PWMCR_DOZEEN (1 << 24)
36#define MX3_PWMCR_WAITEN (1 << 23)
37#define MX3_PWMCR_DBGEN (1 << 22)
35#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16) 38#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16)
36#define MX3_PWMCR_CLKSRC_IPG (1 << 16) 39#define MX3_PWMCR_CLKSRC_IPG (1 << 16)
37#define MX3_PWMCR_EN (1 << 0) 40#define MX3_PWMCR_EN (1 << 0)
@@ -74,10 +77,21 @@ int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns)
74 do_div(c, period_ns); 77 do_div(c, period_ns);
75 duty_cycles = c; 78 duty_cycles = c;
76 79
80 /*
81 * according to imx pwm RM, the real period value should be
82 * PERIOD value in PWMPR plus 2.
83 */
84 if (period_cycles > 2)
85 period_cycles -= 2;
86 else
87 period_cycles = 0;
88
77 writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR); 89 writel(duty_cycles, pwm->mmio_base + MX3_PWMSAR);
78 writel(period_cycles, pwm->mmio_base + MX3_PWMPR); 90 writel(period_cycles, pwm->mmio_base + MX3_PWMPR);
79 91
80 cr = MX3_PWMCR_PRESCALER(prescale) | MX3_PWMCR_EN; 92 cr = MX3_PWMCR_PRESCALER(prescale) |
93 MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
94 MX3_PWMCR_DBGEN | MX3_PWMCR_EN;
81 95
82 if (cpu_is_mx25()) 96 if (cpu_is_mx25())
83 cr |= MX3_PWMCR_CLKSRC_IPG; 97 cr |= MX3_PWMCR_CLKSRC_IPG;
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index 41ab97ebe4cf..10d160888133 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -384,12 +384,16 @@ void __init orion_gpio_init(int gpio_base, int ngpio,
384 struct orion_gpio_chip *ochip; 384 struct orion_gpio_chip *ochip;
385 struct irq_chip_generic *gc; 385 struct irq_chip_generic *gc;
386 struct irq_chip_type *ct; 386 struct irq_chip_type *ct;
387 char gc_label[16];
387 388
388 if (orion_gpio_chip_count == ARRAY_SIZE(orion_gpio_chips)) 389 if (orion_gpio_chip_count == ARRAY_SIZE(orion_gpio_chips))
389 return; 390 return;
390 391
392 snprintf(gc_label, sizeof(gc_label), "orion_gpio%d",
393 orion_gpio_chip_count);
394
391 ochip = orion_gpio_chips + orion_gpio_chip_count; 395 ochip = orion_gpio_chips + orion_gpio_chip_count;
392 ochip->chip.label = "orion_gpio"; 396 ochip->chip.label = kstrdup(gc_label, GFP_KERNEL);
393 ochip->chip.request = orion_gpio_request; 397 ochip->chip.request = orion_gpio_request;
394 ochip->chip.direction_input = orion_gpio_direction_input; 398 ochip->chip.direction_input = orion_gpio_direction_input;
395 ochip->chip.get = orion_gpio_get; 399 ochip->chip.get = orion_gpio_get;
diff --git a/arch/arm/plat-samsung/dev-backlight.c b/arch/arm/plat-samsung/dev-backlight.c
index e657305644cc..a976c023b286 100644
--- a/arch/arm/plat-samsung/dev-backlight.c
+++ b/arch/arm/plat-samsung/dev-backlight.c
@@ -15,7 +15,6 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/io.h> 16#include <linux/io.h>
17#include <linux/pwm_backlight.h> 17#include <linux/pwm_backlight.h>
18#include <linux/slab.h>
19 18
20#include <plat/devs.h> 19#include <plat/devs.h>
21#include <plat/gpio-cfg.h> 20#include <plat/gpio-cfg.h>
diff --git a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
index dac4760c0f0a..95509d8eb140 100644
--- a/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
+++ b/arch/arm/plat-samsung/include/plat/cpu-freq-core.h
@@ -202,14 +202,6 @@ extern int s3c_plltab_register(struct cpufreq_frequency_table *plls,
202extern struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void); 202extern struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void);
203extern struct s3c_iotimings *s3c_cpufreq_getiotimings(void); 203extern struct s3c_iotimings *s3c_cpufreq_getiotimings(void);
204 204
205extern void s3c2410_iotiming_debugfs(struct seq_file *seq,
206 struct s3c_cpufreq_config *cfg,
207 union s3c_iobank *iob);
208
209extern void s3c2412_iotiming_debugfs(struct seq_file *seq,
210 struct s3c_cpufreq_config *cfg,
211 union s3c_iobank *iob);
212
213#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUGFS 205#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUGFS
214#define s3c_cpufreq_debugfs_call(x) x 206#define s3c_cpufreq_debugfs_call(x) x
215#else 207#else
@@ -226,6 +218,10 @@ extern void s3c2410_cpufreq_setrefresh(struct s3c_cpufreq_config *cfg);
226extern void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg); 218extern void s3c2410_set_fvco(struct s3c_cpufreq_config *cfg);
227 219
228#ifdef CONFIG_S3C2410_IOTIMING 220#ifdef CONFIG_S3C2410_IOTIMING
221extern void s3c2410_iotiming_debugfs(struct seq_file *seq,
222 struct s3c_cpufreq_config *cfg,
223 union s3c_iobank *iob);
224
229extern int s3c2410_iotiming_calc(struct s3c_cpufreq_config *cfg, 225extern int s3c2410_iotiming_calc(struct s3c_cpufreq_config *cfg,
230 struct s3c_iotimings *iot); 226 struct s3c_iotimings *iot);
231 227
@@ -235,6 +231,7 @@ extern int s3c2410_iotiming_get(struct s3c_cpufreq_config *cfg,
235extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg, 231extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg,
236 struct s3c_iotimings *iot); 232 struct s3c_iotimings *iot);
237#else 233#else
234#define s3c2410_iotiming_debugfs NULL
238#define s3c2410_iotiming_calc NULL 235#define s3c2410_iotiming_calc NULL
239#define s3c2410_iotiming_get NULL 236#define s3c2410_iotiming_get NULL
240#define s3c2410_iotiming_set NULL 237#define s3c2410_iotiming_set NULL
@@ -242,8 +239,10 @@ extern void s3c2410_iotiming_set(struct s3c_cpufreq_config *cfg,
242 239
243/* S3C2412 compatible routines */ 240/* S3C2412 compatible routines */
244 241
245extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg, 242#ifdef CONFIG_S3C2412_IOTIMING
246 struct s3c_iotimings *timings); 243extern void s3c2412_iotiming_debugfs(struct seq_file *seq,
244 struct s3c_cpufreq_config *cfg,
245 union s3c_iobank *iob);
247 246
248extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg, 247extern int s3c2412_iotiming_get(struct s3c_cpufreq_config *cfg,
249 struct s3c_iotimings *timings); 248 struct s3c_iotimings *timings);
@@ -253,6 +252,12 @@ extern int s3c2412_iotiming_calc(struct s3c_cpufreq_config *cfg,
253 252
254extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg, 253extern void s3c2412_iotiming_set(struct s3c_cpufreq_config *cfg,
255 struct s3c_iotimings *iot); 254 struct s3c_iotimings *iot);
255#else
256#define s3c2412_iotiming_debugfs NULL
257#define s3c2412_iotiming_calc NULL
258#define s3c2412_iotiming_get NULL
259#define s3c2412_iotiming_set NULL
260#endif /* CONFIG_S3C2412_IOTIMING */
256 261
257#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUG 262#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUG
258#define s3c_freq_dbg(x...) printk(KERN_INFO x) 263#define s3c_freq_dbg(x...) printk(KERN_INFO x)
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index ef5a2a08fcca..ea3395750324 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -34,10 +34,12 @@ void cpu_idle(void)
34{ 34{
35 /* endless idle loop with no priority at all */ 35 /* endless idle loop with no priority at all */
36 while (1) { 36 while (1) {
37 tick_nohz_stop_sched_tick(1); 37 tick_nohz_idle_enter();
38 rcu_idle_enter();
38 while (!need_resched()) 39 while (!need_resched())
39 cpu_idle_sleep(); 40 cpu_idle_sleep();
40 tick_nohz_restart_sched_tick(); 41 rcu_idle_exit();
42 tick_nohz_idle_exit();
41 preempt_enable_no_resched(); 43 preempt_enable_no_resched();
42 schedule(); 44 schedule();
43 preempt_disable(); 45 preempt_disable();
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 6a80a9e9fc4a..8dd0416673cb 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -88,10 +88,12 @@ void cpu_idle(void)
88#endif 88#endif
89 if (!idle) 89 if (!idle)
90 idle = default_idle; 90 idle = default_idle;
91 tick_nohz_stop_sched_tick(1); 91 tick_nohz_idle_enter();
92 rcu_idle_enter();
92 while (!need_resched()) 93 while (!need_resched())
93 idle(); 94 idle();
94 tick_nohz_restart_sched_tick(); 95 rcu_idle_exit();
96 tick_nohz_idle_exit();
95 preempt_enable_no_resched(); 97 preempt_enable_no_resched();
96 schedule(); 98 schedule();
97 preempt_disable(); 99 preempt_disable();
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 27489b6dd533..3b7a7c483785 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -23,6 +23,9 @@ config IA64
23 select HAVE_ARCH_TRACEHOOK 23 select HAVE_ARCH_TRACEHOOK
24 select HAVE_DMA_API_DEBUG 24 select HAVE_DMA_API_DEBUG
25 select HAVE_GENERIC_HARDIRQS 25 select HAVE_GENERIC_HARDIRQS
26 select HAVE_MEMBLOCK
27 select HAVE_MEMBLOCK_NODE_MAP
28 select ARCH_DISCARD_MEMBLOCK
26 select GENERIC_IRQ_PROBE 29 select GENERIC_IRQ_PROBE
27 select GENERIC_PENDING_IRQ if SMP 30 select GENERIC_PENDING_IRQ if SMP
28 select IRQ_PER_CPU 31 select IRQ_PER_CPU
@@ -474,9 +477,6 @@ config NODES_SHIFT
474 MAX_NUMNODES will be 2^(This value). 477 MAX_NUMNODES will be 2^(This value).
475 If in doubt, use the default. 478 If in doubt, use the default.
476 479
477config ARCH_POPULATES_NODE_MAP
478 def_bool y
479
480# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. 480# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
481# VIRTUAL_MEM_MAP has been retained for historical reasons. 481# VIRTUAL_MEM_MAP has been retained for historical reasons.
482config VIRTUAL_MEM_MAP 482config VIRTUAL_MEM_MAP
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 461e52f0277f..3deac956d325 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -50,6 +50,8 @@ typedef u64 __nocast cputime64_t;
50 ((__force u64)(__ct) / NSEC_PER_USEC) 50 ((__force u64)(__ct) / NSEC_PER_USEC)
51#define usecs_to_cputime(__usecs) \ 51#define usecs_to_cputime(__usecs) \
52 (__force cputime_t)((__usecs) * NSEC_PER_USEC) 52 (__force cputime_t)((__usecs) * NSEC_PER_USEC)
53#define usecs_to_cputime64(__usecs) \
54 (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
53 55
54/* 56/*
55 * Convert cputime <-> seconds 57 * Convert cputime <-> seconds
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index f114a3b14c6a..1516d1dc11fd 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -16,6 +16,7 @@
16 */ 16 */
17#include <linux/bootmem.h> 17#include <linux/bootmem.h>
18#include <linux/efi.h> 18#include <linux/efi.h>
19#include <linux/memblock.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <linux/nmi.h> 21#include <linux/nmi.h>
21#include <linux/swap.h> 22#include <linux/swap.h>
@@ -348,7 +349,7 @@ paging_init (void)
348 printk("Virtual mem_map starts at 0x%p\n", mem_map); 349 printk("Virtual mem_map starts at 0x%p\n", mem_map);
349 } 350 }
350#else /* !CONFIG_VIRTUAL_MEM_MAP */ 351#else /* !CONFIG_VIRTUAL_MEM_MAP */
351 add_active_range(0, 0, max_low_pfn); 352 memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
352 free_area_init_nodes(max_zone_pfns); 353 free_area_init_nodes(max_zone_pfns);
353#endif /* !CONFIG_VIRTUAL_MEM_MAP */ 354#endif /* !CONFIG_VIRTUAL_MEM_MAP */
354 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); 355 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 00cb0e26c64e..13df239dbed1 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -10,6 +10,7 @@
10#include <linux/bootmem.h> 10#include <linux/bootmem.h>
11#include <linux/efi.h> 11#include <linux/efi.h>
12#include <linux/elf.h> 12#include <linux/elf.h>
13#include <linux/memblock.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
14#include <linux/mmzone.h> 15#include <linux/mmzone.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -557,8 +558,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
557#endif 558#endif
558 559
559 if (start < end) 560 if (start < end)
560 add_active_range(nid, __pa(start) >> PAGE_SHIFT, 561 memblock_add_node(__pa(start), end - start, nid);
561 __pa(end) >> PAGE_SHIFT);
562 return 0; 562 return 0;
563} 563}
564 564
diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h
deleted file mode 100644
index 20a8e257c77f..000000000000
--- a/arch/microblaze/include/asm/memblock.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * Copyright (C) 2008 Michal Simek <monstr@monstr.eu>
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 */
8
9#ifndef _ASM_MICROBLAZE_MEMBLOCK_H
10#define _ASM_MICROBLAZE_MEMBLOCK_H
11
12#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
13
14
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 95cc295976a7..7dcb5bfffb75 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -103,10 +103,12 @@ void cpu_idle(void)
103 if (!idle) 103 if (!idle)
104 idle = default_idle; 104 idle = default_idle;
105 105
106 tick_nohz_stop_sched_tick(1); 106 tick_nohz_idle_enter();
107 rcu_idle_enter();
107 while (!need_resched()) 108 while (!need_resched())
108 idle(); 109 idle();
109 tick_nohz_restart_sched_tick(); 110 rcu_idle_exit();
111 tick_nohz_idle_exit();
110 112
111 preempt_enable_no_resched(); 113 preempt_enable_no_resched();
112 schedule(); 114 schedule();
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 977484add216..80d314e81901 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -122,7 +122,6 @@ void __init early_init_devtree(void *params)
122 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line); 122 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
123 123
124 /* Scan memory nodes and rebuild MEMBLOCKs */ 124 /* Scan memory nodes and rebuild MEMBLOCKs */
125 memblock_init();
126 of_scan_flat_dt(early_init_dt_scan_root, NULL); 125 of_scan_flat_dt(early_init_dt_scan_root, NULL);
127 of_scan_flat_dt(early_init_dt_scan_memory, NULL); 126 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
128 127
@@ -130,7 +129,7 @@ void __init early_init_devtree(void *params)
130 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); 129 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
131 parse_early_param(); 130 parse_early_param();
132 131
133 memblock_analyze(); 132 memblock_allow_resize();
134 133
135 pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size()); 134 pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
136 135
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d46f1da18a3c..9c652eb68aaa 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -25,6 +25,9 @@ config MIPS
25 select GENERIC_IRQ_SHOW 25 select GENERIC_IRQ_SHOW
26 select HAVE_ARCH_JUMP_LABEL 26 select HAVE_ARCH_JUMP_LABEL
27 select IRQ_FORCED_THREADING 27 select IRQ_FORCED_THREADING
28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP
30 select ARCH_DISCARD_MEMBLOCK
28 31
29menu "Machine selection" 32menu "Machine selection"
30 33
@@ -2064,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE
2064 or have huge holes in the physical address space for other reasons. 2067 or have huge holes in the physical address space for other reasons.
2065 See <file:Documentation/vm/numa> for more. 2068 See <file:Documentation/vm/numa> for more.
2066 2069
2067config ARCH_POPULATES_NODE_MAP
2068 def_bool y
2069
2070config ARCH_SPARSEMEM_ENABLE 2070config ARCH_SPARSEMEM_ENABLE
2071 bool 2071 bool
2072 select SPARSEMEM_STATIC 2072 select SPARSEMEM_STATIC
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c47f96e453c0..7955409051c4 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -56,7 +56,8 @@ void __noreturn cpu_idle(void)
56 56
57 /* endless idle loop with no priority at all */ 57 /* endless idle loop with no priority at all */
58 while (1) { 58 while (1) {
59 tick_nohz_stop_sched_tick(1); 59 tick_nohz_idle_enter();
60 rcu_idle_enter();
60 while (!need_resched() && cpu_online(cpu)) { 61 while (!need_resched() && cpu_online(cpu)) {
61#ifdef CONFIG_MIPS_MT_SMTC 62#ifdef CONFIG_MIPS_MT_SMTC
62 extern void smtc_idle_loop_hook(void); 63 extern void smtc_idle_loop_hook(void);
@@ -77,7 +78,8 @@ void __noreturn cpu_idle(void)
77 system_state == SYSTEM_BOOTING)) 78 system_state == SYSTEM_BOOTING))
78 play_dead(); 79 play_dead();
79#endif 80#endif
80 tick_nohz_restart_sched_tick(); 81 rcu_idle_exit();
82 tick_nohz_idle_exit();
81 preempt_enable_no_resched(); 83 preempt_enable_no_resched();
82 schedule(); 84 schedule();
83 preempt_disable(); 85 preempt_disable();
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 84af26ab2212..b1cb8f87d7b4 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -14,6 +14,7 @@
14#include <linux/ioport.h> 14#include <linux/ioport.h>
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/screen_info.h> 16#include <linux/screen_info.h>
17#include <linux/memblock.h>
17#include <linux/bootmem.h> 18#include <linux/bootmem.h>
18#include <linux/initrd.h> 19#include <linux/initrd.h>
19#include <linux/root_dev.h> 20#include <linux/root_dev.h>
@@ -352,7 +353,7 @@ static void __init bootmem_init(void)
352 continue; 353 continue;
353#endif 354#endif
354 355
355 add_active_range(0, start, end); 356 memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
356 } 357 }
357 358
358 /* 359 /*
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index bc1297109cc5..b105eca3c020 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -12,6 +12,7 @@
12 */ 12 */
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/memblock.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/mmzone.h> 17#include <linux/mmzone.h>
17#include <linux/module.h> 18#include <linux/module.h>
@@ -381,8 +382,8 @@ static void __init szmem(void)
381 continue; 382 continue;
382 } 383 }
383 num_physpages += slot_psize; 384 num_physpages += slot_psize;
384 add_active_range(node, slot_getbasepfn(node, slot), 385 memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
385 slot_getbasepfn(node, slot) + slot_psize); 386 PFN_PHYS(slot_psize), node);
386 } 387 }
387 } 388 }
388} 389}
diff --git a/arch/openrisc/include/asm/memblock.h b/arch/openrisc/include/asm/memblock.h
deleted file mode 100644
index bbe5a1c788cb..000000000000
--- a/arch/openrisc/include/asm/memblock.h
+++ /dev/null
@@ -1,24 +0,0 @@
1/*
2 * OpenRISC Linux
3 *
4 * Linux architectural port borrowing liberally from similar works of
5 * others. All original copyrights apply as per the original source
6 * declaration.
7 *
8 * OpenRISC implementation:
9 * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
10 * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
11 * et al.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 */
18
19#ifndef __ASM_OPENRISC_MEMBLOCK_H
20#define __ASM_OPENRISC_MEMBLOCK_H
21
22/* empty */
23
24#endif /* __ASM_OPENRISC_MEMBLOCK_H */
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c
index d5bc5f813e89..e5fc78877830 100644
--- a/arch/openrisc/kernel/idle.c
+++ b/arch/openrisc/kernel/idle.c
@@ -51,7 +51,8 @@ void cpu_idle(void)
51 51
52 /* endless idle loop with no priority at all */ 52 /* endless idle loop with no priority at all */
53 while (1) { 53 while (1) {
54 tick_nohz_stop_sched_tick(1); 54 tick_nohz_idle_enter();
55 rcu_idle_enter();
55 56
56 while (!need_resched()) { 57 while (!need_resched()) {
57 check_pgt_cache(); 58 check_pgt_cache();
@@ -69,7 +70,8 @@ void cpu_idle(void)
69 set_thread_flag(TIF_POLLING_NRFLAG); 70 set_thread_flag(TIF_POLLING_NRFLAG);
70 } 71 }
71 72
72 tick_nohz_restart_sched_tick(); 73 rcu_idle_exit();
74 tick_nohz_idle_exit();
73 preempt_enable_no_resched(); 75 preempt_enable_no_resched();
74 schedule(); 76 schedule();
75 preempt_disable(); 77 preempt_disable();
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 1bb58ba89afa..3d4478f6c942 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -76,14 +76,13 @@ void __init early_init_devtree(void *params)
76 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line); 76 of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
77 77
78 /* Scan memory nodes and rebuild MEMBLOCKs */ 78 /* Scan memory nodes and rebuild MEMBLOCKs */
79 memblock_init();
80 of_scan_flat_dt(early_init_dt_scan_root, NULL); 79 of_scan_flat_dt(early_init_dt_scan_root, NULL);
81 of_scan_flat_dt(early_init_dt_scan_memory, NULL); 80 of_scan_flat_dt(early_init_dt_scan_memory, NULL);
82 81
83 /* Save command line for /proc/cmdline and then parse parameters */ 82 /* Save command line for /proc/cmdline and then parse parameters */
84 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); 83 strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
85 84
86 memblock_analyze(); 85 memblock_allow_resize();
87 86
88 /* We must copy the flattend device tree from init memory to regular 87 /* We must copy the flattend device tree from init memory to regular
89 * memory because the device tree references the strings in it 88 * memory because the device tree references the strings in it
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951e18f5335b..ead0bc68439d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -117,6 +117,7 @@ config PPC
117 select HAVE_KRETPROBES 117 select HAVE_KRETPROBES
118 select HAVE_ARCH_TRACEHOOK 118 select HAVE_ARCH_TRACEHOOK
119 select HAVE_MEMBLOCK 119 select HAVE_MEMBLOCK
120 select HAVE_MEMBLOCK_NODE_MAP
120 select HAVE_DMA_ATTRS 121 select HAVE_DMA_ATTRS
121 select HAVE_DMA_API_DEBUG 122 select HAVE_DMA_API_DEBUG
122 select USE_GENERIC_SMP_HELPERS if SMP 123 select USE_GENERIC_SMP_HELPERS if SMP
@@ -421,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT
421 def_bool y 422 def_bool y
422 depends on (SMP && PPC_PSERIES) || PPC_PS3 423 depends on (SMP && PPC_PSERIES) || PPC_PS3
423 424
424config ARCH_POPULATES_NODE_MAP
425 def_bool y
426
427config SYS_SUPPORTS_HUGETLBFS 425config SYS_SUPPORTS_HUGETLBFS
428 bool 426 bool
429 427
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index e94935c52019..6ec1c380a4d6 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -134,6 +134,8 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
134 return (__force cputime_t) ct; 134 return (__force cputime_t) ct;
135} 135}
136 136
137#define usecs_to_cputime64(us) usecs_to_cputime(us)
138
137/* 139/*
138 * Convert cputime <-> seconds 140 * Convert cputime <-> seconds
139 */ 141 */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index d4df013ad779..69c7377d2071 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -381,39 +381,6 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
381} 381}
382#endif 382#endif
383 383
384static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
385 unsigned long pte_index)
386{
387 unsigned long rb, va_low;
388
389 rb = (v & ~0x7fUL) << 16; /* AVA field */
390 va_low = pte_index >> 3;
391 if (v & HPTE_V_SECONDARY)
392 va_low = ~va_low;
393 /* xor vsid from AVA */
394 if (!(v & HPTE_V_1TB_SEG))
395 va_low ^= v >> 12;
396 else
397 va_low ^= v >> 24;
398 va_low &= 0x7ff;
399 if (v & HPTE_V_LARGE) {
400 rb |= 1; /* L field */
401 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
402 (r & 0xff000)) {
403 /* non-16MB large page, must be 64k */
404 /* (masks depend on page size) */
405 rb |= 0x1000; /* page encoding in LP field */
406 rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
407 rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
408 }
409 } else {
410 /* 4kB page */
411 rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
412 }
413 rb |= (v >> 54) & 0x300; /* B field */
414 return rb;
415}
416
417/* Magic register values loaded into r3 and r4 before the 'sc' assembly 384/* Magic register values loaded into r3 and r4 before the 'sc' assembly
418 * instruction for the OSI hypercalls */ 385 * instruction for the OSI hypercalls */
419#define OSI_SC_MAGIC_R3 0x113724FA 386#define OSI_SC_MAGIC_R3 0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index e43fe42b9875..d0ac94f98f9e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -29,4 +29,37 @@ static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
29 29
30#define SPAPR_TCE_SHIFT 12 30#define SPAPR_TCE_SHIFT 12
31 31
32static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
33 unsigned long pte_index)
34{
35 unsigned long rb, va_low;
36
37 rb = (v & ~0x7fUL) << 16; /* AVA field */
38 va_low = pte_index >> 3;
39 if (v & HPTE_V_SECONDARY)
40 va_low = ~va_low;
41 /* xor vsid from AVA */
42 if (!(v & HPTE_V_1TB_SEG))
43 va_low ^= v >> 12;
44 else
45 va_low ^= v >> 24;
46 va_low &= 0x7ff;
47 if (v & HPTE_V_LARGE) {
48 rb |= 1; /* L field */
49 if (cpu_has_feature(CPU_FTR_ARCH_206) &&
50 (r & 0xff000)) {
51 /* non-16MB large page, must be 64k */
52 /* (masks depend on page size) */
53 rb |= 0x1000; /* page encoding in LP field */
54 rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
55 rb |= (va_low & 0xfe); /* AVAL field (P7 doesn't seem to care) */
56 }
57 } else {
58 /* 4kB page */
59 rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */
60 }
61 rb |= (v >> 54) & 0x300; /* B field */
62 return rb;
63}
64
32#endif /* __ASM_KVM_BOOK3S_64_H__ */ 65#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
deleted file mode 100644
index 43efc345065e..000000000000
--- a/arch/powerpc/include/asm/memblock.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _ASM_POWERPC_MEMBLOCK_H
2#define _ASM_POWERPC_MEMBLOCK_H
3
4#include <asm/udbg.h>
5
6#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
7
8#endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 39a2baa6ad58..9c3cd490b1bd 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -46,6 +46,12 @@ static int __init powersave_off(char *arg)
46} 46}
47__setup("powersave=off", powersave_off); 47__setup("powersave=off", powersave_off);
48 48
49#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS)
50static const bool idle_uses_rcu = 1;
51#else
52static const bool idle_uses_rcu;
53#endif
54
49/* 55/*
50 * The body of the idle task. 56 * The body of the idle task.
51 */ 57 */
@@ -56,7 +62,10 @@ void cpu_idle(void)
56 62
57 set_thread_flag(TIF_POLLING_NRFLAG); 63 set_thread_flag(TIF_POLLING_NRFLAG);
58 while (1) { 64 while (1) {
59 tick_nohz_stop_sched_tick(1); 65 tick_nohz_idle_enter();
66 if (!idle_uses_rcu)
67 rcu_idle_enter();
68
60 while (!need_resched() && !cpu_should_die()) { 69 while (!need_resched() && !cpu_should_die()) {
61 ppc64_runlatch_off(); 70 ppc64_runlatch_off();
62 71
@@ -93,7 +102,9 @@ void cpu_idle(void)
93 102
94 HMT_medium(); 103 HMT_medium();
95 ppc64_runlatch_on(); 104 ppc64_runlatch_on();
96 tick_nohz_restart_sched_tick(); 105 if (!idle_uses_rcu)
106 rcu_idle_exit();
107 tick_nohz_idle_exit();
97 preempt_enable_no_resched(); 108 preempt_enable_no_resched();
98 if (cpu_should_die()) 109 if (cpu_should_die())
99 cpu_die(); 110 cpu_die();
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 9ce1672afb59..a2158a395d96 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
107 unsigned long long crash_size, crash_base; 107 unsigned long long crash_size, crash_base;
108 int ret; 108 int ret;
109 109
110 /* this is necessary because of memblock_phys_mem_size() */
111 memblock_analyze();
112
113 /* use common parsing */ 110 /* use common parsing */
114 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 111 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
115 &crash_size, &crash_base); 112 &crash_size, &crash_base);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa1235b0503b..abe405dab34d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
733 of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line); 733 of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
734 734
735 /* Scan memory nodes and rebuild MEMBLOCKs */ 735 /* Scan memory nodes and rebuild MEMBLOCKs */
736 memblock_init();
737
738 of_scan_flat_dt(early_init_dt_scan_root, NULL); 736 of_scan_flat_dt(early_init_dt_scan_root, NULL);
739 of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); 737 of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
740 738
@@ -756,20 +754,14 @@ void __init early_init_devtree(void *params)
756 early_reserve_mem(); 754 early_reserve_mem();
757 phyp_dump_reserve_mem(); 755 phyp_dump_reserve_mem();
758 756
759 limit = memory_limit; 757 /*
760 if (! limit) { 758 * Ensure that total memory size is page-aligned, because otherwise
761 phys_addr_t memsize; 759 * mark_bootmem() gets upset.
762 760 */
763 /* Ensure that total memory size is page-aligned, because 761 limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
764 * otherwise mark_bootmem() gets upset. */
765 memblock_analyze();
766 memsize = memblock_phys_mem_size();
767 if ((memsize & PAGE_MASK) != memsize)
768 limit = memsize & PAGE_MASK;
769 }
770 memblock_enforce_memory_limit(limit); 762 memblock_enforce_memory_limit(limit);
771 763
772 memblock_analyze(); 764 memblock_allow_resize();
773 memblock_dump_all(); 765 memblock_dump_all();
774 766
775 DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); 767 DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0cb137a9b038..336983da9e72 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -538,7 +538,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
538 tpaca->kvm_hstate.napping = 0; 538 tpaca->kvm_hstate.napping = 0;
539 vcpu->cpu = vc->pcpu; 539 vcpu->cpu = vc->pcpu;
540 smp_wmb(); 540 smp_wmb();
541#ifdef CONFIG_PPC_ICP_NATIVE 541#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
542 if (vcpu->arch.ptid) { 542 if (vcpu->arch.ptid) {
543 tpaca->cpu_start = 0x80; 543 tpaca->cpu_start = 0x80;
544 wmb(); 544 wmb();
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3c791e1eb675..e2cfb9e1e20e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -658,10 +658,12 @@ program_interrupt:
658 ulong cmd = kvmppc_get_gpr(vcpu, 3); 658 ulong cmd = kvmppc_get_gpr(vcpu, 3);
659 int i; 659 int i;
660 660
661#ifdef CONFIG_KVM_BOOK3S_64_PR
661 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { 662 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
662 r = RESUME_GUEST; 663 r = RESUME_GUEST;
663 break; 664 break;
664 } 665 }
666#endif
665 667
666 run->papr_hcall.nr = cmd; 668 run->papr_hcall.nr = cmd;
667 for (i = 0; i < 9; ++i) { 669 for (i = 0; i < 9; ++i) {
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 26d20903f2bc..8c0d45a6faf7 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -15,6 +15,7 @@
15#include <linux/kvm_host.h> 15#include <linux/kvm_host.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/export.h>
18 19
19#include <asm/reg.h> 20#include <asm/reg.h>
20#include <asm/cputable.h> 21#include <asm/cputable.h>
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 161cefde5c15..58861fa1220e 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -134,8 +134,7 @@ void __init MMU_init(void)
134 134
135 if (memblock.memory.cnt > 1) { 135 if (memblock.memory.cnt > 1) {
136#ifndef CONFIG_WII 136#ifndef CONFIG_WII
137 memblock.memory.cnt = 1; 137 memblock_enforce_memory_limit(memblock.memory.regions[0].size);
138 memblock_analyze();
139 printk(KERN_WARNING "Only using first contiguous memory region"); 138 printk(KERN_WARNING "Only using first contiguous memory region");
140#else 139#else
141 wii_memory_fixups(); 140 wii_memory_fixups();
@@ -158,7 +157,6 @@ void __init MMU_init(void)
158#ifndef CONFIG_HIGHMEM 157#ifndef CONFIG_HIGHMEM
159 total_memory = total_lowmem; 158 total_memory = total_lowmem;
160 memblock_enforce_memory_limit(total_lowmem); 159 memblock_enforce_memory_limit(total_lowmem);
161 memblock_analyze();
162#endif /* CONFIG_HIGHMEM */ 160#endif /* CONFIG_HIGHMEM */
163 } 161 }
164 162
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2dd6bdd31fe1..8e2eb6611b0b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -199,7 +199,7 @@ void __init do_init_bootmem(void)
199 unsigned long start_pfn, end_pfn; 199 unsigned long start_pfn, end_pfn;
200 start_pfn = memblock_region_memory_base_pfn(reg); 200 start_pfn = memblock_region_memory_base_pfn(reg);
201 end_pfn = memblock_region_memory_end_pfn(reg); 201 end_pfn = memblock_region_memory_end_pfn(reg);
202 add_active_range(0, start_pfn, end_pfn); 202 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
203 } 203 }
204 204
205 /* Add all physical memory to the bootmem map, mark each area 205 /* Add all physical memory to the bootmem map, mark each area
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b22a83a91cb8..e6eea0ac80c8 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
127} 127}
128 128
129/* 129/*
130 * get_active_region_work_fn - A helper function for get_node_active_region 130 * get_node_active_region - Return active region containing pfn
131 * Returns datax set to the start_pfn and end_pfn if they contain
132 * the initial value of datax->start_pfn between them
133 * @start_pfn: start page(inclusive) of region to check
134 * @end_pfn: end page(exclusive) of region to check
135 * @datax: comes in with ->start_pfn set to value to search for and
136 * goes out with active range if it contains it
137 * Returns 1 if search value is in range else 0
138 */
139static int __init get_active_region_work_fn(unsigned long start_pfn,
140 unsigned long end_pfn, void *datax)
141{
142 struct node_active_region *data;
143 data = (struct node_active_region *)datax;
144
145 if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
146 data->start_pfn = start_pfn;
147 data->end_pfn = end_pfn;
148 return 1;
149 }
150 return 0;
151
152}
153
154/*
155 * get_node_active_region - Return active region containing start_pfn
156 * Active range returned is empty if none found. 131 * Active range returned is empty if none found.
157 * @start_pfn: The page to return the region for. 132 * @pfn: The page to return the region for
158 * @node_ar: Returned set to the active region containing start_pfn 133 * @node_ar: Returned set to the active region containing @pfn
159 */ 134 */
160static void __init get_node_active_region(unsigned long start_pfn, 135static void __init get_node_active_region(unsigned long pfn,
161 struct node_active_region *node_ar) 136 struct node_active_region *node_ar)
162{ 137{
163 int nid = early_pfn_to_nid(start_pfn); 138 unsigned long start_pfn, end_pfn;
139 int i, nid;
164 140
165 node_ar->nid = nid; 141 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
166 node_ar->start_pfn = start_pfn; 142 if (pfn >= start_pfn && pfn < end_pfn) {
167 node_ar->end_pfn = start_pfn; 143 node_ar->nid = nid;
168 work_with_active_regions(nid, get_active_region_work_fn, node_ar); 144 node_ar->start_pfn = start_pfn;
145 node_ar->end_pfn = end_pfn;
146 break;
147 }
148 }
169} 149}
170 150
171static void map_cpu_to_node(int cpu, int node) 151static void map_cpu_to_node(int cpu, int node)
@@ -710,9 +690,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
710 node_set_online(nid); 690 node_set_online(nid);
711 sz = numa_enforce_memory_limit(base, size); 691 sz = numa_enforce_memory_limit(base, size);
712 if (sz) 692 if (sz)
713 add_active_range(nid, base >> PAGE_SHIFT, 693 memblock_set_node(base, sz, nid);
714 (base >> PAGE_SHIFT)
715 + (sz >> PAGE_SHIFT));
716 } while (--ranges); 694 } while (--ranges);
717 } 695 }
718} 696}
@@ -802,8 +780,7 @@ new_range:
802 continue; 780 continue;
803 } 781 }
804 782
805 add_active_range(nid, start >> PAGE_SHIFT, 783 memblock_set_node(start, size, nid);
806 (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
807 784
808 if (--ranges) 785 if (--ranges)
809 goto new_range; 786 goto new_range;
@@ -839,7 +816,8 @@ static void __init setup_nonnuma(void)
839 end_pfn = memblock_region_memory_end_pfn(reg); 816 end_pfn = memblock_region_memory_end_pfn(reg);
840 817
841 fake_numa_create_new_node(end_pfn, &nid); 818 fake_numa_create_new_node(end_pfn, &nid);
842 add_active_range(nid, start_pfn, end_pfn); 819 memblock_set_node(PFN_PHYS(start_pfn),
820 PFN_PHYS(end_pfn - start_pfn), nid);
843 node_set_online(nid); 821 node_set_online(nid);
844 } 822 }
845} 823}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 4e13d6f9023e..573ba3b69d1f 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu)
615 615
616 /* limit memory so we dont have linear faults */ 616 /* limit memory so we dont have linear faults */
617 memblock_enforce_memory_limit(linear_map_top); 617 memblock_enforce_memory_limit(linear_map_top);
618 memblock_analyze();
619 618
620 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); 619 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
621 patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); 620 patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 1b5dc1a2e145..6d8dadf19f0b 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -79,24 +79,19 @@ void __init wii_memory_fixups(void)
79 BUG_ON(memblock.memory.cnt != 2); 79 BUG_ON(memblock.memory.cnt != 2);
80 BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); 80 BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
81 81
82 p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE); 82 /* trim unaligned tail */
83 p[1].size = _ALIGN_DOWN(p[1].size, PAGE_SIZE); 83 memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
84 (phys_addr_t)ULLONG_MAX);
84 85
85 wii_hole_start = p[0].base + p[0].size; 86 /* determine hole, add & reserve them */
87 wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
86 wii_hole_size = p[1].base - wii_hole_start; 88 wii_hole_size = p[1].base - wii_hole_start;
87 89 memblock_add(wii_hole_start, wii_hole_size);
88 pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size);
89 pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size);
90 pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size);
91
92 p[0].size += wii_hole_size + p[1].size;
93
94 memblock.memory.cnt = 1;
95 memblock_analyze();
96
97 /* reserve the hole */
98 memblock_reserve(wii_hole_start, wii_hole_size); 90 memblock_reserve(wii_hole_start, wii_hole_size);
99 91
92 BUG_ON(memblock.memory.cnt != 1);
93 __memblock_dump_all();
94
100 /* allow ioremapping the address space in the hole */ 95 /* allow ioremapping the address space in the hole */
101 __allow_ioremap_reserved = 1; 96 __allow_ioremap_reserved = 1;
102} 97}
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index ea0acbd8966d..8fc62586a973 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -563,7 +563,8 @@ static void yield_shared_processor(void)
563static void iseries_shared_idle(void) 563static void iseries_shared_idle(void)
564{ 564{
565 while (1) { 565 while (1) {
566 tick_nohz_stop_sched_tick(1); 566 tick_nohz_idle_enter();
567 rcu_idle_enter();
567 while (!need_resched() && !hvlpevent_is_pending()) { 568 while (!need_resched() && !hvlpevent_is_pending()) {
568 local_irq_disable(); 569 local_irq_disable();
569 ppc64_runlatch_off(); 570 ppc64_runlatch_off();
@@ -577,7 +578,8 @@ static void iseries_shared_idle(void)
577 } 578 }
578 579
579 ppc64_runlatch_on(); 580 ppc64_runlatch_on();
580 tick_nohz_restart_sched_tick(); 581 rcu_idle_exit();
582 tick_nohz_idle_exit();
581 583
582 if (hvlpevent_is_pending()) 584 if (hvlpevent_is_pending())
583 process_iSeries_events(); 585 process_iSeries_events();
@@ -593,7 +595,8 @@ static void iseries_dedicated_idle(void)
593 set_thread_flag(TIF_POLLING_NRFLAG); 595 set_thread_flag(TIF_POLLING_NRFLAG);
594 596
595 while (1) { 597 while (1) {
596 tick_nohz_stop_sched_tick(1); 598 tick_nohz_idle_enter();
599 rcu_idle_enter();
597 if (!need_resched()) { 600 if (!need_resched()) {
598 while (!need_resched()) { 601 while (!need_resched()) {
599 ppc64_runlatch_off(); 602 ppc64_runlatch_off();
@@ -610,7 +613,8 @@ static void iseries_dedicated_idle(void)
610 } 613 }
611 614
612 ppc64_runlatch_on(); 615 ppc64_runlatch_on();
613 tick_nohz_restart_sched_tick(); 616 rcu_idle_exit();
617 tick_nohz_idle_exit();
614 preempt_enable_no_resched(); 618 preempt_enable_no_resched();
615 schedule(); 619 schedule();
616 preempt_disable(); 620 preempt_disable();
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 72714ad27842..8bd6ba542691 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -319,7 +319,6 @@ static int __init ps3_mm_add_memory(void)
319 } 319 }
320 320
321 memblock_add(start_addr, map.r1.size); 321 memblock_add(start_addr, map.r1.size);
322 memblock_analyze();
323 322
324 result = online_pages(start_pfn, nr_pages); 323 result = online_pages(start_pfn, nr_pages);
325 324
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 27a49508b410..52d429be6c76 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -555,6 +555,8 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
555 555
556 (*depth)++; 556 (*depth)++;
557 trace_hcall_entry(opcode, args); 557 trace_hcall_entry(opcode, args);
558 if (opcode == H_CEDE)
559 rcu_idle_enter();
558 (*depth)--; 560 (*depth)--;
559 561
560out: 562out:
@@ -575,6 +577,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval,
575 goto out; 577 goto out;
576 578
577 (*depth)++; 579 (*depth)++;
580 if (opcode == H_CEDE)
581 rcu_idle_exit();
578 trace_hcall_exit(opcode, retval, retbuf); 582 trace_hcall_exit(opcode, retval, retbuf);
579 (*depth)--; 583 (*depth)--;
580 584
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 373679b3744a..d48ede334434 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -92,6 +92,9 @@ config S390
92 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 92 select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
93 select HAVE_RCU_TABLE_FREE if SMP 93 select HAVE_RCU_TABLE_FREE if SMP
94 select ARCH_SAVE_PAGE_KEYS if HIBERNATION 94 select ARCH_SAVE_PAGE_KEYS if HIBERNATION
95 select HAVE_MEMBLOCK
96 select HAVE_MEMBLOCK_NODE_MAP
97 select ARCH_DISCARD_MEMBLOCK
95 select ARCH_INLINE_SPIN_TRYLOCK 98 select ARCH_INLINE_SPIN_TRYLOCK
96 select ARCH_INLINE_SPIN_TRYLOCK_BH 99 select ARCH_INLINE_SPIN_TRYLOCK_BH
97 select ARCH_INLINE_SPIN_LOCK 100 select ARCH_INLINE_SPIN_LOCK
@@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK
345 348
346 Say N if you are unsure. 349 Say N if you are unsure.
347 350
348config ARCH_POPULATES_NODE_MAP
349 def_bool y
350
351comment "Kernel preemption" 351comment "Kernel preemption"
352 352
353source "kernel/Kconfig.preempt" 353source "kernel/Kconfig.preempt"
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 0887a0463e33..c23c3900c304 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -72,6 +72,8 @@ static inline cputime_t usecs_to_cputime(const unsigned int m)
72 return (__force cputime_t)(m * 4096ULL); 72 return (__force cputime_t)(m * 4096ULL);
73} 73}
74 74
75#define usecs_to_cputime64(m) usecs_to_cputime(m)
76
75/* 77/*
76 * Convert cputime to milliseconds and back. 78 * Convert cputime to milliseconds and back.
77 */ 79 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9451b210a1b4..3201ae447990 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,10 +91,12 @@ static void default_idle(void)
91void cpu_idle(void) 91void cpu_idle(void)
92{ 92{
93 for (;;) { 93 for (;;) {
94 tick_nohz_stop_sched_tick(1); 94 tick_nohz_idle_enter();
95 rcu_idle_enter();
95 while (!need_resched()) 96 while (!need_resched())
96 default_idle(); 97 default_idle();
97 tick_nohz_restart_sched_tick(); 98 rcu_idle_exit();
99 tick_nohz_idle_exit();
98 preempt_enable_no_resched(); 100 preempt_enable_no_resched();
99 schedule(); 101 schedule();
100 preempt_disable(); 102 preempt_disable();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e54c4ff8abaa..f11d1b037c50 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/memblock.h>
24#include <linux/mm.h> 25#include <linux/mm.h>
25#include <linux/stddef.h> 26#include <linux/stddef.h>
26#include <linux/unistd.h> 27#include <linux/unistd.h>
@@ -820,7 +821,8 @@ setup_memory(void)
820 end_chunk = min(end_chunk, end_pfn); 821 end_chunk = min(end_chunk, end_pfn);
821 if (start_chunk >= end_chunk) 822 if (start_chunk >= end_chunk)
822 continue; 823 continue;
823 add_active_range(0, start_chunk, end_chunk); 824 memblock_add_node(PFN_PHYS(start_chunk),
825 PFN_PHYS(end_chunk - start_chunk), 0);
824 pfn = max(start_chunk, start_pfn); 826 pfn = max(start_chunk, start_pfn);
825 for (; pfn < end_chunk; pfn++) 827 for (; pfn < end_chunk; pfn++)
826 page_set_storage_key(PFN_PHYS(pfn), 828 page_set_storage_key(PFN_PHYS(pfn),
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index f43c0e4282af..9daee91e6c3f 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -22,6 +22,7 @@
22#include <asm/irq.h> 22#include <asm/irq.h>
23 23
24#include "hwsampler.h" 24#include "hwsampler.h"
25#include "op_counter.h"
25 26
26#define MAX_NUM_SDB 511 27#define MAX_NUM_SDB 511
27#define MIN_NUM_SDB 1 28#define MIN_NUM_SDB 1
@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
896 if (sample_data_ptr->P == 1) { 897 if (sample_data_ptr->P == 1) {
897 /* userspace sample */ 898 /* userspace sample */
898 unsigned int pid = sample_data_ptr->prim_asn; 899 unsigned int pid = sample_data_ptr->prim_asn;
900 if (!counter_config.user)
901 goto skip_sample;
899 rcu_read_lock(); 902 rcu_read_lock();
900 tsk = pid_task(find_vpid(pid), PIDTYPE_PID); 903 tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
901 if (tsk) 904 if (tsk)
@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
903 rcu_read_unlock(); 906 rcu_read_unlock();
904 } else { 907 } else {
905 /* kernelspace sample */ 908 /* kernelspace sample */
909 if (!counter_config.kernel)
910 goto skip_sample;
906 regs = task_pt_regs(current); 911 regs = task_pt_regs(current);
907 } 912 }
908 913
@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
910 oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, 915 oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
911 !sample_data_ptr->P, tsk); 916 !sample_data_ptr->P, tsk);
912 mutex_unlock(&hws_sem); 917 mutex_unlock(&hws_sem);
913 918 skip_sample:
914 sample_data_ptr++; 919 sample_data_ptr++;
915 } 920 }
916} 921}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 6efc18b5e60a..2297be406c61 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -2,10 +2,11 @@
2 * arch/s390/oprofile/init.c 2 * arch/s390/oprofile/init.c
3 * 3 *
4 * S390 Version 4 * S390 Version
5 * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation 5 * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 * Author(s): Thomas Spatzier (tspat@de.ibm.com) 6 * Author(s): Thomas Spatzier (tspat@de.ibm.com)
7 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) 7 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
8 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) 8 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
9 * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
9 * 10 *
10 * @remark Copyright 2002-2011 OProfile authors 11 * @remark Copyright 2002-2011 OProfile authors
11 */ 12 */
@@ -14,6 +15,8 @@
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/module.h>
19#include <asm/processor.h>
17 20
18#include "../../../drivers/oprofile/oprof.h" 21#include "../../../drivers/oprofile/oprof.h"
19 22
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
22#ifdef CONFIG_64BIT 25#ifdef CONFIG_64BIT
23 26
24#include "hwsampler.h" 27#include "hwsampler.h"
28#include "op_counter.h"
25 29
26#define DEFAULT_INTERVAL 4127518 30#define DEFAULT_INTERVAL 4127518
27 31
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
35static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; 39static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
36static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; 40static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
37 41
38static int hwsampler_file; 42static int hwsampler_enabled;
39static int hwsampler_running; /* start_mutex must be held to change */ 43static int hwsampler_running; /* start_mutex must be held to change */
44static int hwsampler_available;
40 45
41static struct oprofile_operations timer_ops; 46static struct oprofile_operations timer_ops;
42 47
48struct op_counter_config counter_config;
49
50enum __force_cpu_type {
51 reserved = 0, /* do not force */
52 timer,
53};
54static int force_cpu_type;
55
56static int set_cpu_type(const char *str, struct kernel_param *kp)
57{
58 if (!strcmp(str, "timer")) {
59 force_cpu_type = timer;
60 printk(KERN_INFO "oprofile: forcing timer to be returned "
61 "as cpu type\n");
62 } else {
63 force_cpu_type = 0;
64 }
65
66 return 0;
67}
68module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
69MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
70 "(report cpu_type \"timer\"");
71
43static int oprofile_hwsampler_start(void) 72static int oprofile_hwsampler_start(void)
44{ 73{
45 int retval; 74 int retval;
46 75
47 hwsampler_running = hwsampler_file; 76 hwsampler_running = hwsampler_enabled;
48 77
49 if (!hwsampler_running) 78 if (!hwsampler_running)
50 return timer_ops.start(); 79 return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
72 return; 101 return;
73} 102}
74 103
104/*
105 * File ops used for:
106 * /dev/oprofile/0/enabled
107 * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
108 */
109
75static ssize_t hwsampler_read(struct file *file, char __user *buf, 110static ssize_t hwsampler_read(struct file *file, char __user *buf,
76 size_t count, loff_t *offset) 111 size_t count, loff_t *offset)
77{ 112{
78 return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); 113 return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
79} 114}
80 115
81static ssize_t hwsampler_write(struct file *file, char const __user *buf, 116static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -88,9 +123,12 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
88 return -EINVAL; 123 return -EINVAL;
89 124
90 retval = oprofilefs_ulong_from_user(&val, buf, count); 125 retval = oprofilefs_ulong_from_user(&val, buf, count);
91 if (retval) 126 if (retval <= 0)
92 return retval; 127 return retval;
93 128
129 if (val != 0 && val != 1)
130 return -EINVAL;
131
94 if (oprofile_started) 132 if (oprofile_started)
95 /* 133 /*
96 * save to do without locking as we set 134 * save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
99 */ 137 */
100 return -EBUSY; 138 return -EBUSY;
101 139
102 hwsampler_file = val; 140 hwsampler_enabled = val;
103 141
104 return count; 142 return count;
105} 143}
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
109 .write = hwsampler_write, 147 .write = hwsampler_write,
110}; 148};
111 149
150/*
151 * File ops used for:
152 * /dev/oprofile/0/count
153 * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
154 *
155 * Make sure that the value is within the hardware range.
156 */
157
158static ssize_t hw_interval_read(struct file *file, char __user *buf,
159 size_t count, loff_t *offset)
160{
161 return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
162 count, offset);
163}
164
165static ssize_t hw_interval_write(struct file *file, char const __user *buf,
166 size_t count, loff_t *offset)
167{
168 unsigned long val;
169 int retval;
170
171 if (*offset)
172 return -EINVAL;
173 retval = oprofilefs_ulong_from_user(&val, buf, count);
174 if (retval)
175 return retval;
176 if (val < oprofile_min_interval)
177 oprofile_hw_interval = oprofile_min_interval;
178 else if (val > oprofile_max_interval)
179 oprofile_hw_interval = oprofile_max_interval;
180 else
181 oprofile_hw_interval = val;
182
183 return count;
184}
185
186static const struct file_operations hw_interval_fops = {
187 .read = hw_interval_read,
188 .write = hw_interval_write,
189};
190
191/*
192 * File ops used for:
193 * /dev/oprofile/0/event
194 * Only a single event with number 0 is supported with this counter.
195 *
196 * /dev/oprofile/0/unit_mask
197 * This is a dummy file needed by the user space tools.
198 * No value other than 0 is accepted or returned.
199 */
200
201static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
202 size_t count, loff_t *offset)
203{
204 return oprofilefs_ulong_to_user(0, buf, count, offset);
205}
206
207static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
208 size_t count, loff_t *offset)
209{
210 unsigned long val;
211 int retval;
212
213 if (*offset)
214 return -EINVAL;
215
216 retval = oprofilefs_ulong_from_user(&val, buf, count);
217 if (retval)
218 return retval;
219 if (val != 0)
220 return -EINVAL;
221 return count;
222}
223
224static const struct file_operations zero_fops = {
225 .read = hwsampler_zero_read,
226 .write = hwsampler_zero_write,
227};
228
229/* /dev/oprofile/0/kernel file ops. */
230
231static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
232 size_t count, loff_t *offset)
233{
234 return oprofilefs_ulong_to_user(counter_config.kernel,
235 buf, count, offset);
236}
237
238static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
239 size_t count, loff_t *offset)
240{
241 unsigned long val;
242 int retval;
243
244 if (*offset)
245 return -EINVAL;
246
247 retval = oprofilefs_ulong_from_user(&val, buf, count);
248 if (retval)
249 return retval;
250
251 if (val != 0 && val != 1)
252 return -EINVAL;
253
254 counter_config.kernel = val;
255
256 return count;
257}
258
259static const struct file_operations kernel_fops = {
260 .read = hwsampler_kernel_read,
261 .write = hwsampler_kernel_write,
262};
263
264/* /dev/oprofile/0/user file ops. */
265
266static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
267 size_t count, loff_t *offset)
268{
269 return oprofilefs_ulong_to_user(counter_config.user,
270 buf, count, offset);
271}
272
273static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
274 size_t count, loff_t *offset)
275{
276 unsigned long val;
277 int retval;
278
279 if (*offset)
280 return -EINVAL;
281
282 retval = oprofilefs_ulong_from_user(&val, buf, count);
283 if (retval)
284 return retval;
285
286 if (val != 0 && val != 1)
287 return -EINVAL;
288
289 counter_config.user = val;
290
291 return count;
292}
293
294static const struct file_operations user_fops = {
295 .read = hwsampler_user_read,
296 .write = hwsampler_user_write,
297};
298
299
300/*
301 * File ops used for: /dev/oprofile/timer/enabled
302 * The value always has to be the inverted value of hwsampler_enabled. So
303 * no separate variable is created. That way we do not need locking.
304 */
305
306static ssize_t timer_enabled_read(struct file *file, char __user *buf,
307 size_t count, loff_t *offset)
308{
309 return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
310}
311
312static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
313 size_t count, loff_t *offset)
314{
315 unsigned long val;
316 int retval;
317
318 if (*offset)
319 return -EINVAL;
320
321 retval = oprofilefs_ulong_from_user(&val, buf, count);
322 if (retval)
323 return retval;
324
325 if (val != 0 && val != 1)
326 return -EINVAL;
327
328 /* Timer cannot be disabled without having hardware sampling. */
329 if (val == 0 && !hwsampler_available)
330 return -EINVAL;
331
332 if (oprofile_started)
333 /*
334 * save to do without locking as we set
335 * hwsampler_running in start() when start_mutex is
336 * held
337 */
338 return -EBUSY;
339
340 hwsampler_enabled = !val;
341
342 return count;
343}
344
345static const struct file_operations timer_enabled_fops = {
346 .read = timer_enabled_read,
347 .write = timer_enabled_write,
348};
349
350
112static int oprofile_create_hwsampling_files(struct super_block *sb, 351static int oprofile_create_hwsampling_files(struct super_block *sb,
113 struct dentry *root) 352 struct dentry *root)
114{ 353{
115 struct dentry *hw_dir; 354 struct dentry *dir;
355
356 dir = oprofilefs_mkdir(sb, root, "timer");
357 if (!dir)
358 return -EINVAL;
359
360 oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
361
362 if (!hwsampler_available)
363 return 0;
116 364
117 /* reinitialize default values */ 365 /* reinitialize default values */
118 hwsampler_file = 1; 366 hwsampler_enabled = 1;
367 counter_config.kernel = 1;
368 counter_config.user = 1;
119 369
120 hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); 370 if (!force_cpu_type) {
121 if (!hw_dir) 371 /*
122 return -EINVAL; 372 * Create the counter file system. A single virtual
373 * counter is created which can be used to
374 * enable/disable hardware sampling dynamically from
375 * user space. The user space will configure a single
376 * counter with a single event. The value of 'event'
377 * and 'unit_mask' are not evaluated by the kernel code
378 * and can only be set to 0.
379 */
380
381 dir = oprofilefs_mkdir(sb, root, "0");
382 if (!dir)
383 return -EINVAL;
123 384
124 oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); 385 oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
125 oprofilefs_create_ulong(sb, hw_dir, "hw_interval", 386 oprofilefs_create_file(sb, dir, "event", &zero_fops);
126 &oprofile_hw_interval); 387 oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
127 oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", 388 oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
128 &oprofile_min_interval); 389 oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
129 oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", 390 oprofilefs_create_file(sb, dir, "user", &user_fops);
130 &oprofile_max_interval); 391 oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
131 oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", 392 &oprofile_sdbt_blocks);
132 &oprofile_sdbt_blocks);
133 393
394 } else {
395 /*
396 * Hardware sampling can be used but the cpu_type is
397 * forced to timer in order to deal with legacy user
398 * space tools. The /dev/oprofile/hwsampling fs is
399 * provided in that case.
400 */
401 dir = oprofilefs_mkdir(sb, root, "hwsampling");
402 if (!dir)
403 return -EINVAL;
404
405 oprofilefs_create_file(sb, dir, "hwsampler",
406 &hwsampler_fops);
407 oprofilefs_create_file(sb, dir, "hw_interval",
408 &hw_interval_fops);
409 oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
410 &oprofile_min_interval);
411 oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
412 &oprofile_max_interval);
413 oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
414 &oprofile_sdbt_blocks);
415 }
134 return 0; 416 return 0;
135} 417}
136 418
137static int oprofile_hwsampler_init(struct oprofile_operations *ops) 419static int oprofile_hwsampler_init(struct oprofile_operations *ops)
138{ 420{
421 /*
422 * Initialize the timer mode infrastructure as well in order
423 * to be able to switch back dynamically. oprofile_timer_init
424 * is not supposed to fail.
425 */
426 if (oprofile_timer_init(ops))
427 BUG();
428
429 memcpy(&timer_ops, ops, sizeof(timer_ops));
430 ops->create_files = oprofile_create_hwsampling_files;
431
432 /*
433 * If the user space tools do not support newer cpu types,
434 * the force_cpu_type module parameter
435 * can be used to always return \"timer\" as cpu type.
436 */
437 if (force_cpu_type != timer) {
438 struct cpuid id;
439
440 get_cpu_id (&id);
441
442 switch (id.machine) {
443 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
444 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
445 default: return -ENODEV;
446 }
447 }
448
139 if (hwsampler_setup()) 449 if (hwsampler_setup())
140 return -ENODEV; 450 return -ENODEV;
141 451
142 /* 452 /*
143 * create hwsampler files only if hwsampler_setup() succeeds. 453 * Query the range for the sampling interval from the
454 * hardware.
144 */ 455 */
145 oprofile_min_interval = hwsampler_query_min_interval(); 456 oprofile_min_interval = hwsampler_query_min_interval();
146 if (oprofile_min_interval == 0) 457 if (oprofile_min_interval == 0)
@@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
155 if (oprofile_hw_interval > oprofile_max_interval) 466 if (oprofile_hw_interval > oprofile_max_interval)
156 oprofile_hw_interval = oprofile_max_interval; 467 oprofile_hw_interval = oprofile_max_interval;
157 468
158 if (oprofile_timer_init(ops)) 469 printk(KERN_INFO "oprofile: System z hardware sampling "
159 return -ENODEV; 470 "facility found.\n");
160
161 printk(KERN_INFO "oprofile: using hardware sampling\n");
162
163 memcpy(&timer_ops, ops, sizeof(timer_ops));
164 471
165 ops->start = oprofile_hwsampler_start; 472 ops->start = oprofile_hwsampler_start;
166 ops->stop = oprofile_hwsampler_stop; 473 ops->stop = oprofile_hwsampler_stop;
167 ops->create_files = oprofile_create_hwsampling_files;
168 474
169 return 0; 475 return 0;
170} 476}
171 477
172static void oprofile_hwsampler_exit(void) 478static void oprofile_hwsampler_exit(void)
173{ 479{
174 oprofile_timer_exit();
175 hwsampler_shutdown(); 480 hwsampler_shutdown();
176} 481}
177 482
@@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
182 ops->backtrace = s390_backtrace; 487 ops->backtrace = s390_backtrace;
183 488
184#ifdef CONFIG_64BIT 489#ifdef CONFIG_64BIT
185 return oprofile_hwsampler_init(ops); 490
491 /*
492 * -ENODEV is not reported to the caller. The module itself
493 * will use the timer mode sampling as fallback and this is
494 * always available.
495 */
496 hwsampler_available = oprofile_hwsampler_init(ops) == 0;
497
498 return 0;
186#else 499#else
187 return -ENODEV; 500 return -ENODEV;
188#endif 501#endif
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644
index 000000000000..1a8d3ca09014
--- /dev/null
+++ b/arch/s390/oprofile/op_counter.h
@@ -0,0 +1,23 @@
1/**
2 * arch/s390/oprofile/op_counter.h
3 *
4 * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
5 * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
6 *
7 * @remark Copyright 2011 OProfile authors
8 */
9
10#ifndef OP_COUNTER_H
11#define OP_COUNTER_H
12
13struct op_counter_config {
14 /* `enabled' maps to the hwsampler_file variable. */
15 /* `count' maps to the oprofile_hw_interval variable. */
16 /* `event' and `unit_mask' are unused. */
17 unsigned long kernel;
18 unsigned long user;
19};
20
21extern struct op_counter_config counter_config;
22
23#endif /* OP_COUNTER_H */
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index df169e84db4e..8b0c9464aa9d 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -4,6 +4,9 @@ config SCORE
4 def_bool y 4 def_bool y
5 select HAVE_GENERIC_HARDIRQS 5 select HAVE_GENERIC_HARDIRQS
6 select GENERIC_IRQ_SHOW 6 select GENERIC_IRQ_SHOW
7 select HAVE_MEMBLOCK
8 select HAVE_MEMBLOCK_NODE_MAP
9 select ARCH_DISCARD_MEMBLOCK
7 10
8choice 11choice
9 prompt "System type" 12 prompt "System type"
@@ -60,9 +63,6 @@ config 32BIT
60config ARCH_FLATMEM_ENABLE 63config ARCH_FLATMEM_ENABLE
61 def_bool y 64 def_bool y
62 65
63config ARCH_POPULATES_NODE_MAP
64 def_bool y
65
66source "mm/Kconfig" 66source "mm/Kconfig"
67 67
68config MEMORY_START 68config MEMORY_START
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index 6f898c057878..b48459afefdd 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -26,6 +26,7 @@
26#include <linux/bootmem.h> 26#include <linux/bootmem.h>
27#include <linux/initrd.h> 27#include <linux/initrd.h>
28#include <linux/ioport.h> 28#include <linux/ioport.h>
29#include <linux/memblock.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
30#include <linux/seq_file.h> 31#include <linux/seq_file.h>
31#include <linux/screen_info.h> 32#include <linux/screen_info.h>
@@ -54,7 +55,8 @@ static void __init bootmem_init(void)
54 /* Initialize the boot-time allocator with low memory only. */ 55 /* Initialize the boot-time allocator with low memory only. */
55 bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn, 56 bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn,
56 min_low_pfn, max_low_pfn); 57 min_low_pfn, max_low_pfn);
57 add_active_range(0, min_low_pfn, max_low_pfn); 58 memblock_add_node(PFN_PHYS(min_low_pfn),
59 PFN_PHYS(max_low_pfn - min_low_pfn), 0);
58 60
59 free_bootmem(PFN_PHYS(start_pfn), 61 free_bootmem(PFN_PHYS(start_pfn),
60 (max_low_pfn - start_pfn) << PAGE_SHIFT); 62 (max_low_pfn - start_pfn) << PAGE_SHIFT);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5629e2099130..47a2f1c2cb0d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -4,6 +4,7 @@ config SUPERH
4 select CLKDEV_LOOKUP 4 select CLKDEV_LOOKUP
5 select HAVE_IDE if HAS_IOPORT 5 select HAVE_IDE if HAS_IOPORT
6 select HAVE_MEMBLOCK 6 select HAVE_MEMBLOCK
7 select HAVE_MEMBLOCK_NODE_MAP
7 select HAVE_OPROFILE 8 select HAVE_OPROFILE
8 select HAVE_GENERIC_DMA_COHERENT 9 select HAVE_GENERIC_DMA_COHERENT
9 select HAVE_ARCH_TRACEHOOK 10 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c
index ec8c84c14b17..895e337c79b6 100644
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c
@@ -50,9 +50,9 @@ static struct platform_device heartbeat_device = {
50#define GBECONT 0xffc10100 50#define GBECONT 0xffc10100
51#define GBECONT_RMII1 BIT(17) 51#define GBECONT_RMII1 BIT(17)
52#define GBECONT_RMII0 BIT(16) 52#define GBECONT_RMII0 BIT(16)
53static void sh7757_eth_set_mdio_gate(unsigned long addr) 53static void sh7757_eth_set_mdio_gate(void *addr)
54{ 54{
55 if ((addr & 0x00000fff) < 0x0800) 55 if (((unsigned long)addr & 0x00000fff) < 0x0800)
56 writel(readl(GBECONT) | GBECONT_RMII0, GBECONT); 56 writel(readl(GBECONT) | GBECONT_RMII0, GBECONT);
57 else 57 else
58 writel(readl(GBECONT) | GBECONT_RMII1, GBECONT); 58 writel(readl(GBECONT) | GBECONT_RMII1, GBECONT);
@@ -116,9 +116,9 @@ static struct platform_device sh7757_eth1_device = {
116 }, 116 },
117}; 117};
118 118
119static void sh7757_eth_giga_set_mdio_gate(unsigned long addr) 119static void sh7757_eth_giga_set_mdio_gate(void *addr)
120{ 120{
121 if ((addr & 0x00000fff) < 0x0800) { 121 if (((unsigned long)addr & 0x00000fff) < 0x0800) {
122 gpio_set_value(GPIO_PTT4, 1); 122 gpio_set_value(GPIO_PTT4, 1);
123 writel(readl(GBECONT) & ~GBECONT_RMII0, GBECONT); 123 writel(readl(GBECONT) & ~GBECONT_RMII0, GBECONT);
124 } else { 124 } else {
@@ -210,8 +210,12 @@ static struct resource sh_mmcif_resources[] = {
210}; 210};
211 211
212static struct sh_mmcif_dma sh7757lcr_mmcif_dma = { 212static struct sh_mmcif_dma sh7757lcr_mmcif_dma = {
213 .chan_priv_tx = SHDMA_SLAVE_MMCIF_TX, 213 .chan_priv_tx = {
214 .chan_priv_rx = SHDMA_SLAVE_MMCIF_RX, 214 .slave_id = SHDMA_SLAVE_MMCIF_TX,
215 },
216 .chan_priv_rx = {
217 .slave_id = SHDMA_SLAVE_MMCIF_RX,
218 }
215}; 219};
216 220
217static struct sh_mmcif_plat_data sh_mmcif_plat = { 221static struct sh_mmcif_plat_data sh_mmcif_plat = {
diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h
deleted file mode 100644
index e87063fad2ea..000000000000
--- a/arch/sh/include/asm/memblock.h
+++ /dev/null
@@ -1,4 +0,0 @@
1#ifndef __ASM_SH_MEMBLOCK_H
2#define __ASM_SH_MEMBLOCK_H
3
4#endif /* __ASM_SH_MEMBLOCK_H */
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index db4ecd731a00..406508d4ce74 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -89,7 +89,8 @@ void cpu_idle(void)
89 89
90 /* endless idle loop with no priority at all */ 90 /* endless idle loop with no priority at all */
91 while (1) { 91 while (1) {
92 tick_nohz_stop_sched_tick(1); 92 tick_nohz_idle_enter();
93 rcu_idle_enter();
93 94
94 while (!need_resched()) { 95 while (!need_resched()) {
95 check_pgt_cache(); 96 check_pgt_cache();
@@ -111,7 +112,8 @@ void cpu_idle(void)
111 start_critical_timings(); 112 start_critical_timings();
112 } 113 }
113 114
114 tick_nohz_restart_sched_tick(); 115 rcu_idle_exit();
116 tick_nohz_idle_exit();
115 preempt_enable_no_resched(); 117 preempt_enable_no_resched();
116 schedule(); 118 schedule();
117 preempt_disable(); 119 preempt_disable();
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index c5a33f007f88..9fea49f6e667 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -157,9 +157,6 @@ void __init reserve_crashkernel(void)
157 unsigned long long crash_size, crash_base; 157 unsigned long long crash_size, crash_base;
158 int ret; 158 int ret;
159 159
160 /* this is necessary because of memblock_phys_mem_size() */
161 memblock_analyze();
162
163 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 160 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
164 &crash_size, &crash_base); 161 &crash_size, &crash_base);
165 if (ret == 0 && crash_size > 0) { 162 if (ret == 0 && crash_size > 0) {
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 1a0e946679a4..7b57bf1dc855 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -230,7 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
230 pmb_bolt_mapping((unsigned long)__va(start), start, end - start, 230 pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
231 PAGE_KERNEL); 231 PAGE_KERNEL);
232 232
233 add_active_range(nid, start_pfn, end_pfn); 233 memblock_set_node(PFN_PHYS(start_pfn),
234 PFN_PHYS(end_pfn - start_pfn), nid);
234} 235}
235 236
236void __init __weak plat_early_device_setup(void) 237void __init __weak plat_early_device_setup(void)
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index c3e61b366493..cb8f9920f4dd 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS
143 CPU_SUBTYPE_SH7785) 143 CPU_SUBTYPE_SH7785)
144 default "1" 144 default "1"
145 145
146config ARCH_POPULATES_NODE_MAP
147 def_bool y
148
149config ARCH_SELECT_MEMORY_MODEL 146config ARCH_SELECT_MEMORY_MODEL
150 def_bool y 147 def_bool y
151 148
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 939ca0f356f6..82cc576fab15 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -324,7 +324,6 @@ void __init paging_init(void)
324 unsigned long vaddr, end; 324 unsigned long vaddr, end;
325 int nid; 325 int nid;
326 326
327 memblock_init();
328 sh_mv.mv_mem_init(); 327 sh_mv.mv_mem_init();
329 328
330 early_reserve_mem(); 329 early_reserve_mem();
@@ -337,7 +336,7 @@ void __init paging_init(void)
337 sh_mv.mv_mem_reserve(); 336 sh_mv.mv_mem_reserve();
338 337
339 memblock_enforce_memory_limit(memory_limit); 338 memblock_enforce_memory_limit(memory_limit);
340 memblock_analyze(); 339 memblock_allow_resize();
341 340
342 memblock_dump_all(); 341 memblock_dump_all();
343 342
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index b4c2d2b946dd..e4dd5d5a1115 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -49,7 +49,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
49 return oprofile_perf_init(ops); 49 return oprofile_perf_init(ops);
50} 50}
51 51
52void __exit oprofile_arch_exit(void) 52void oprofile_arch_exit(void)
53{ 53{
54 oprofile_perf_exit(); 54 oprofile_perf_exit();
55 kfree(sh_pmu_op_name); 55 kfree(sh_pmu_op_name);
@@ -60,5 +60,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
60 ops->backtrace = sh_backtrace; 60 ops->backtrace = sh_backtrace;
61 return -ENODEV; 61 return -ENODEV;
62} 62}
63void __exit oprofile_arch_exit(void) {} 63void oprofile_arch_exit(void) {}
64#endif /* CONFIG_HW_PERF_EVENTS */ 64#endif /* CONFIG_HW_PERF_EVENTS */
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index f92602e86607..70ae9d81870e 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,6 +43,7 @@ config SPARC64
43 select HAVE_KPROBES 43 select HAVE_KPROBES
44 select HAVE_RCU_TABLE_FREE if SMP 44 select HAVE_RCU_TABLE_FREE if SMP
45 select HAVE_MEMBLOCK 45 select HAVE_MEMBLOCK
46 select HAVE_MEMBLOCK_NODE_MAP
46 select HAVE_SYSCALL_WRAPPERS 47 select HAVE_SYSCALL_WRAPPERS
47 select HAVE_DYNAMIC_FTRACE 48 select HAVE_DYNAMIC_FTRACE
48 select HAVE_FTRACE_MCOUNT_RECORD 49 select HAVE_FTRACE_MCOUNT_RECORD
@@ -352,9 +353,6 @@ config NODES_SPAN_OTHER_NODES
352 def_bool y 353 def_bool y
353 depends on NEED_MULTIPLE_NODES 354 depends on NEED_MULTIPLE_NODES
354 355
355config ARCH_POPULATES_NODE_MAP
356 def_bool y if SPARC64
357
358config ARCH_SELECT_MEMORY_MODEL 356config ARCH_SELECT_MEMORY_MODEL
359 def_bool y if SPARC64 357 def_bool y if SPARC64
360 358
diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h
deleted file mode 100644
index c67b047ef85e..000000000000
--- a/arch/sparc/include/asm/memblock.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef _SPARC64_MEMBLOCK_H
2#define _SPARC64_MEMBLOCK_H
3
4#include <asm/oplib.h>
5
6#define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
7
8#endif /* !(_SPARC64_MEMBLOCK_H) */
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 7429b47c3aca..381edcd5bc29 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -1181,13 +1181,11 @@ static int __devinit ds_probe(struct vio_dev *vdev,
1181 1181
1182 dp->rcv_buf_len = 4096; 1182 dp->rcv_buf_len = 4096;
1183 1183
1184 dp->ds_states = kzalloc(sizeof(ds_states_template), 1184 dp->ds_states = kmemdup(ds_states_template,
1185 GFP_KERNEL); 1185 sizeof(ds_states_template), GFP_KERNEL);
1186 if (!dp->ds_states) 1186 if (!dp->ds_states)
1187 goto out_free_rcv_buf; 1187 goto out_free_rcv_buf;
1188 1188
1189 memcpy(dp->ds_states, ds_states_template,
1190 sizeof(ds_states_template));
1191 dp->num_ds_states = ARRAY_SIZE(ds_states_template); 1189 dp->num_ds_states = ARRAY_SIZE(ds_states_template);
1192 1190
1193 for (i = 0; i < dp->num_ds_states; i++) 1191 for (i = 0; i < dp->num_ds_states; i++)
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index b272cda35a01..af5755d20fbe 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -849,10 +849,10 @@ static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
849 if (!irq) 849 if (!irq)
850 return -ENOMEM; 850 return -ENOMEM;
851 851
852 if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
853 return -EINVAL;
854 if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID)) 852 if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
855 return -EINVAL; 853 return -EINVAL;
854 if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
855 return -EINVAL;
856 856
857 return irq; 857 return irq;
858} 858}
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 3739a06a76cb..39d8b05201a2 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -95,12 +95,14 @@ void cpu_idle(void)
95 set_thread_flag(TIF_POLLING_NRFLAG); 95 set_thread_flag(TIF_POLLING_NRFLAG);
96 96
97 while(1) { 97 while(1) {
98 tick_nohz_stop_sched_tick(1); 98 tick_nohz_idle_enter();
99 rcu_idle_enter();
99 100
100 while (!need_resched() && !cpu_is_offline(cpu)) 101 while (!need_resched() && !cpu_is_offline(cpu))
101 sparc64_yield(cpu); 102 sparc64_yield(cpu);
102 103
103 tick_nohz_restart_sched_tick(); 104 rcu_idle_exit();
105 tick_nohz_idle_exit();
104 106
105 preempt_enable_no_resched(); 107 preempt_enable_no_resched();
106 108
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 46614807a57f..741df916c124 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -58,12 +58,10 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
58 void *new_val; 58 void *new_val;
59 int err; 59 int err;
60 60
61 new_val = kmalloc(len, GFP_KERNEL); 61 new_val = kmemdup(val, len, GFP_KERNEL);
62 if (!new_val) 62 if (!new_val)
63 return -ENOMEM; 63 return -ENOMEM;
64 64
65 memcpy(new_val, val, len);
66
67 err = -ENODEV; 65 err = -ENODEV;
68 66
69 mutex_lock(&of_set_property_mutex); 67 mutex_lock(&of_set_property_mutex);
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index fe1e3fc31bc5..ffb883ddd0f0 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -84,7 +84,7 @@ static void prom_sync_me(void)
84 84
85 prom_printf("PROM SYNC COMMAND...\n"); 85 prom_printf("PROM SYNC COMMAND...\n");
86 show_free_areas(0); 86 show_free_areas(0);
87 if(current->pid != 0) { 87 if (!is_idle_task(current)) {
88 local_irq_enable(); 88 local_irq_enable();
89 sys_sync(); 89 sys_sync();
90 local_irq_disable(); 90 local_irq_disable();
diff --git a/arch/sparc/mm/btfixup.c b/arch/sparc/mm/btfixup.c
index 5175ac2f4820..8a7f81743c12 100644
--- a/arch/sparc/mm/btfixup.c
+++ b/arch/sparc/mm/btfixup.c
@@ -302,8 +302,7 @@ void __init btfixup(void)
302 case 'i': /* INT */ 302 case 'i': /* INT */
303 if ((insn & 0xc1c00000) == 0x01000000) /* %HI */ 303 if ((insn & 0xc1c00000) == 0x01000000) /* %HI */
304 set_addr(addr, q[1], fmangled, (insn & 0xffc00000) | (p[1] >> 10)); 304 set_addr(addr, q[1], fmangled, (insn & 0xffc00000) | (p[1] >> 10));
305 else if ((insn & 0x80002000) == 0x80002000 && 305 else if ((insn & 0x80002000) == 0x80002000) /* %LO */
306 (insn & 0x01800000) != 0x01800000) /* %LO */
307 set_addr(addr, q[1], fmangled, (insn & 0xffffe000) | (p[1] & 0x3ff)); 306 set_addr(addr, q[1], fmangled, (insn & 0xffffe000) | (p[1] & 0x3ff));
308 else { 307 else {
309 prom_printf(insn_i, p, addr, insn); 308 prom_printf(insn_i, p, addr, insn);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 8e073d802139..b3f5e7dfea51 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -790,7 +790,7 @@ static int find_node(unsigned long addr)
790 return -1; 790 return -1;
791} 791}
792 792
793u64 memblock_nid_range(u64 start, u64 end, int *nid) 793static u64 memblock_nid_range(u64 start, u64 end, int *nid)
794{ 794{
795 *nid = find_node(start); 795 *nid = find_node(start);
796 start += PAGE_SIZE; 796 start += PAGE_SIZE;
@@ -808,7 +808,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
808 return start; 808 return start;
809} 809}
810#else 810#else
811u64 memblock_nid_range(u64 start, u64 end, int *nid) 811static u64 memblock_nid_range(u64 start, u64 end, int *nid)
812{ 812{
813 *nid = 0; 813 *nid = 0;
814 return end; 814 return end;
@@ -816,7 +816,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
816#endif 816#endif
817 817
818/* This must be invoked after performing all of the necessary 818/* This must be invoked after performing all of the necessary
819 * add_active_range() calls for 'nid'. We need to be able to get 819 * memblock_set_node() calls for 'nid'. We need to be able to get
820 * correct data from get_pfn_range_for_nid(). 820 * correct data from get_pfn_range_for_nid().
821 */ 821 */
822static void __init allocate_node_data(int nid) 822static void __init allocate_node_data(int nid)
@@ -987,14 +987,11 @@ static void __init add_node_ranges(void)
987 987
988 this_end = memblock_nid_range(start, end, &nid); 988 this_end = memblock_nid_range(start, end, &nid);
989 989
990 numadbg("Adding active range nid[%d] " 990 numadbg("Setting memblock NUMA node nid[%d] "
991 "start[%lx] end[%lx]\n", 991 "start[%lx] end[%lx]\n",
992 nid, start, this_end); 992 nid, start, this_end);
993 993
994 add_active_range(nid, 994 memblock_set_node(start, this_end - start, nid);
995 start >> PAGE_SHIFT,
996 this_end >> PAGE_SHIFT);
997
998 start = this_end; 995 start = this_end;
999 } 996 }
1000 } 997 }
@@ -1282,7 +1279,6 @@ static void __init bootmem_init_nonnuma(void)
1282{ 1279{
1283 unsigned long top_of_ram = memblock_end_of_DRAM(); 1280 unsigned long top_of_ram = memblock_end_of_DRAM();
1284 unsigned long total_ram = memblock_phys_mem_size(); 1281 unsigned long total_ram = memblock_phys_mem_size();
1285 struct memblock_region *reg;
1286 1282
1287 numadbg("bootmem_init_nonnuma()\n"); 1283 numadbg("bootmem_init_nonnuma()\n");
1288 1284
@@ -1292,20 +1288,8 @@ static void __init bootmem_init_nonnuma(void)
1292 (top_of_ram - total_ram) >> 20); 1288 (top_of_ram - total_ram) >> 20);
1293 1289
1294 init_node_masks_nonnuma(); 1290 init_node_masks_nonnuma();
1295 1291 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
1296 for_each_memblock(memory, reg) {
1297 unsigned long start_pfn, end_pfn;
1298
1299 if (!reg->size)
1300 continue;
1301
1302 start_pfn = memblock_region_memory_base_pfn(reg);
1303 end_pfn = memblock_region_memory_end_pfn(reg);
1304 add_active_range(0, start_pfn, end_pfn);
1305 }
1306
1307 allocate_node_data(0); 1292 allocate_node_data(0);
1308
1309 node_set_online(0); 1293 node_set_online(0);
1310} 1294}
1311 1295
@@ -1769,8 +1753,6 @@ void __init paging_init(void)
1769 sun4v_ktsb_init(); 1753 sun4v_ktsb_init();
1770 } 1754 }
1771 1755
1772 memblock_init();
1773
1774 /* Find available physical memory... 1756 /* Find available physical memory...
1775 * 1757 *
1776 * Read it twice in order to work around a bug in openfirmware. 1758 * Read it twice in order to work around a bug in openfirmware.
@@ -1796,7 +1778,7 @@ void __init paging_init(void)
1796 1778
1797 memblock_enforce_memory_limit(cmdline_memory_size); 1779 memblock_enforce_memory_limit(cmdline_memory_size);
1798 1780
1799 memblock_analyze(); 1781 memblock_allow_resize();
1800 memblock_dump_all(); 1782 memblock_dump_all();
1801 1783
1802 set_bit(0, mmu_context_bmap); 1784 set_bit(0, mmu_context_bmap);
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 9c45d8bbdf57..4c1ac6e5347a 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -85,7 +85,8 @@ void cpu_idle(void)
85 85
86 /* endless idle loop with no priority at all */ 86 /* endless idle loop with no priority at all */
87 while (1) { 87 while (1) {
88 tick_nohz_stop_sched_tick(1); 88 tick_nohz_idle_enter();
89 rcu_idle_enter();
89 while (!need_resched()) { 90 while (!need_resched()) {
90 if (cpu_is_offline(cpu)) 91 if (cpu_is_offline(cpu))
91 BUG(); /* no HOTPLUG_CPU */ 92 BUG(); /* no HOTPLUG_CPU */
@@ -105,7 +106,8 @@ void cpu_idle(void)
105 local_irq_enable(); 106 local_irq_enable();
106 current_thread_info()->status |= TS_POLLING; 107 current_thread_info()->status |= TS_POLLING;
107 } 108 }
108 tick_nohz_restart_sched_tick(); 109 rcu_idle_exit();
110 tick_nohz_idle_exit();
109 preempt_enable_no_resched(); 111 preempt_enable_no_resched();
110 schedule(); 112 schedule();
111 preempt_disable(); 113 preempt_disable();
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 25b7b90fd620..c1eaaa1fcc20 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -54,7 +54,7 @@ static noinline void force_sig_info_fault(const char *type, int si_signo,
54 if (unlikely(tsk->pid < 2)) { 54 if (unlikely(tsk->pid < 2)) {
55 panic("Signal %d (code %d) at %#lx sent to %s!", 55 panic("Signal %d (code %d) at %#lx sent to %s!",
56 si_signo, si_code & 0xffff, address, 56 si_signo, si_code & 0xffff, address,
57 tsk->pid ? "init" : "the idle task"); 57 is_idle_task(tsk) ? "the idle task" : "init");
58 } 58 }
59 59
60 info.si_signo = si_signo; 60 info.si_signo = si_signo;
@@ -515,7 +515,7 @@ no_context:
515 515
516 if (unlikely(tsk->pid < 2)) { 516 if (unlikely(tsk->pid < 2)) {
517 panic("Kernel page fault running %s!", 517 panic("Kernel page fault running %s!",
518 tsk->pid ? "init" : "the idle task"); 518 is_idle_task(tsk) ? "the idle task" : "init");
519 } 519 }
520 520
521 /* 521 /*
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5338351aecd..69f24905abdc 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -246,10 +246,12 @@ void default_idle(void)
246 if (need_resched()) 246 if (need_resched())
247 schedule(); 247 schedule();
248 248
249 tick_nohz_stop_sched_tick(1); 249 tick_nohz_idle_enter();
250 rcu_idle_enter();
250 nsecs = disable_timer(); 251 nsecs = disable_timer();
251 idle_sleep(nsecs); 252 idle_sleep(nsecs);
252 tick_nohz_restart_sched_tick(); 253 rcu_idle_exit();
254 tick_nohz_idle_exit();
253 } 255 }
254} 256}
255 257
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index ba401df971ed..52edc2b62873 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -55,7 +55,8 @@ void cpu_idle(void)
55{ 55{
56 /* endless idle loop with no priority at all */ 56 /* endless idle loop with no priority at all */
57 while (1) { 57 while (1) {
58 tick_nohz_stop_sched_tick(1); 58 tick_nohz_idle_enter();
59 rcu_idle_enter();
59 while (!need_resched()) { 60 while (!need_resched()) {
60 local_irq_disable(); 61 local_irq_disable();
61 stop_critical_timings(); 62 stop_critical_timings();
@@ -63,7 +64,8 @@ void cpu_idle(void)
63 local_irq_enable(); 64 local_irq_enable();
64 start_critical_timings(); 65 start_critical_timings();
65 } 66 }
66 tick_nohz_restart_sched_tick(); 67 rcu_idle_exit();
68 tick_nohz_idle_exit();
67 preempt_enable_no_resched(); 69 preempt_enable_no_resched();
68 schedule(); 70 schedule();
69 preempt_disable(); 71 preempt_disable();
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 471b6bca8da4..673d7a89d8ff 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -37,6 +37,7 @@
37#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
38#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
39#include <asm/traps.h> 39#include <asm/traps.h>
40#include <asm/memblock.h>
40 41
41#include "setup.h" 42#include "setup.h"
42 43
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 3b379cddbc64..de186bde8975 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -26,6 +26,7 @@
26#include <asm/setup.h> 26#include <asm/setup.h>
27#include <asm/sizes.h> 27#include <asm/sizes.h>
28#include <asm/tlb.h> 28#include <asm/tlb.h>
29#include <asm/memblock.h>
29#include <mach/map.h> 30#include <mach/map.h>
30 31
31#include "mm.h" 32#include "mm.h"
@@ -245,7 +246,6 @@ void __init uc32_memblock_init(struct meminfo *mi)
245 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), 246 sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
246 meminfo_cmp, NULL); 247 meminfo_cmp, NULL);
247 248
248 memblock_init();
249 for (i = 0; i < mi->nr_banks; i++) 249 for (i = 0; i < mi->nr_banks; i++)
250 memblock_add(mi->bank[i].start, mi->bank[i].size); 250 memblock_add(mi->bank[i].start, mi->bank[i].size);
251 251
@@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
264 264
265 uc32_mm_memblock_reserve(); 265 uc32_mm_memblock_reserve();
266 266
267 memblock_analyze(); 267 memblock_allow_resize();
268 memblock_dump_all(); 268 memblock_dump_all();
269} 269}
270 270
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 3e5c3e5a0b45..43c20b40e444 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -25,6 +25,7 @@
25#include <asm/setup.h> 25#include <asm/setup.h>
26#include <asm/sizes.h> 26#include <asm/sizes.h>
27#include <asm/tlb.h> 27#include <asm/tlb.h>
28#include <asm/memblock.h>
28 29
29#include <mach/map.h> 30#include <mach/map.h>
30 31
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb42949cc09..67d6af3581bc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -26,6 +26,8 @@ config X86
26 select HAVE_IOREMAP_PROT 26 select HAVE_IOREMAP_PROT
27 select HAVE_KPROBES 27 select HAVE_KPROBES
28 select HAVE_MEMBLOCK 28 select HAVE_MEMBLOCK
29 select HAVE_MEMBLOCK_NODE_MAP
30 select ARCH_DISCARD_MEMBLOCK
29 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
30 select ARCH_WANT_FRAME_POINTERS 32 select ARCH_WANT_FRAME_POINTERS
31 select HAVE_DMA_ATTRS 33 select HAVE_DMA_ATTRS
@@ -204,9 +206,6 @@ config ZONE_DMA32
204 bool 206 bool
205 default X86_64 207 default X86_64
206 208
207config ARCH_POPULATES_NODE_MAP
208 def_bool y
209
210config AUDIT_ARCH 209config AUDIT_ARCH
211 bool 210 bool
212 default X86_64 211 default X86_64
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b96957d88..37782566af24 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
117 117
118extern unsigned long e820_end_of_ram_pfn(void); 118extern unsigned long e820_end_of_ram_pfn(void);
119extern unsigned long e820_end_of_low_ram_pfn(void); 119extern unsigned long e820_end_of_low_ram_pfn(void);
120extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 120extern u64 early_reserve_e820(u64 sizet, u64 align);
121 121
122void memblock_x86_fill(void); 122void memblock_x86_fill(void);
123void memblock_find_dma_reserve(void); 123void memblock_find_dma_reserve(void);
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 88c765e16410..74df3f1eddfd 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
137 return (insn->vex_prefix.value != 0); 137 return (insn->vex_prefix.value != 0);
138} 138}
139 139
140/* Ensure this instruction is decoded completely */
141static inline int insn_complete(struct insn *insn)
142{
143 return insn->opcode.got && insn->modrm.got && insn->sib.got &&
144 insn->displacement.got && insn->immediate.got;
145}
146
140static inline insn_byte_t insn_vex_m_bits(struct insn *insn) 147static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
141{ 148{
142 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ 149 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644
index 0cd3800f33b9..000000000000
--- a/arch/x86/include/asm/memblock.h
+++ /dev/null
@@ -1,23 +0,0 @@
1#ifndef _X86_MEMBLOCK_H
2#define _X86_MEMBLOCK_H
3
4#define ARCH_DISCARD_MEMBLOCK
5
6u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
7
8void memblock_x86_reserve_range(u64 start, u64 end, char *name);
9void memblock_x86_free_range(u64 start, u64 end);
10struct range;
11int __get_free_all_memory_range(struct range **range, int nodeid,
12 unsigned long start_pfn, unsigned long end_pfn);
13int get_free_all_memory_range(struct range **rangep, int nodeid);
14
15void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
16 unsigned long last_pfn);
17u64 memblock_x86_hole_size(u64 start, u64 end);
18u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
19u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
20u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
21bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
22
23#endif
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f7d5d8..096c975e099f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -57,6 +57,7 @@
57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) 57 (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
58 58
59#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 59#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
60#define ARCH_PERFMON_EVENTS_COUNT 7
60 61
61/* 62/*
62 * Intel "Architectural Performance Monitoring" CPUID 63 * Intel "Architectural Performance Monitoring" CPUID
@@ -72,6 +73,19 @@ union cpuid10_eax {
72 unsigned int full; 73 unsigned int full;
73}; 74};
74 75
76union cpuid10_ebx {
77 struct {
78 unsigned int no_unhalted_core_cycles:1;
79 unsigned int no_instructions_retired:1;
80 unsigned int no_unhalted_reference_cycles:1;
81 unsigned int no_llc_reference:1;
82 unsigned int no_llc_misses:1;
83 unsigned int no_branch_instruction_retired:1;
84 unsigned int no_branch_misses_retired:1;
85 } split;
86 unsigned int full;
87};
88
75union cpuid10_edx { 89union cpuid10_edx {
76 struct { 90 struct {
77 unsigned int num_counters_fixed:5; 91 unsigned int num_counters_fixed:5;
@@ -81,6 +95,15 @@ union cpuid10_edx {
81 unsigned int full; 95 unsigned int full;
82}; 96};
83 97
98struct x86_pmu_capability {
99 int version;
100 int num_counters_gp;
101 int num_counters_fixed;
102 int bit_width_gp;
103 int bit_width_fixed;
104 unsigned int events_mask;
105 int events_mask_len;
106};
84 107
85/* 108/*
86 * Fixed-purpose performance events: 109 * Fixed-purpose performance events:
@@ -89,23 +112,24 @@ union cpuid10_edx {
89/* 112/*
90 * All 3 fixed-mode PMCs are configured via this single MSR: 113 * All 3 fixed-mode PMCs are configured via this single MSR:
91 */ 114 */
92#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d 115#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
93 116
94/* 117/*
95 * The counts are available in three separate MSRs: 118 * The counts are available in three separate MSRs:
96 */ 119 */
97 120
98/* Instr_Retired.Any: */ 121/* Instr_Retired.Any: */
99#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 122#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
100#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) 123#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
101 124
102/* CPU_CLK_Unhalted.Core: */ 125/* CPU_CLK_Unhalted.Core: */
103#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a 126#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
104#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) 127#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
105 128
106/* CPU_CLK_Unhalted.Ref: */ 129/* CPU_CLK_Unhalted.Ref: */
107#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 130#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
108#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) 131#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
132#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
109 133
110/* 134/*
111 * We model BTS tracing as another fixed-mode PMC. 135 * We model BTS tracing as another fixed-mode PMC.
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
202}; 226};
203 227
204extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); 228extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
229extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
205#else 230#else
206static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 231static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
207{ 232{
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
209 return NULL; 234 return NULL;
210} 235}
211 236
237static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
238{
239 memset(cap, 0, sizeof(*cap));
240}
241
212static inline void perf_events_lapic_init(void) { } 242static inline void perf_events_lapic_init(void) { }
213#endif 243#endif
214 244
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3d2661ca6542..6e76c191a835 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
88 */ 88 */
89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, 89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
90 aper_size, aper_size); 90 aper_size, aper_size);
91 if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { 91 if (!addr || addr + aper_size > GART_MAX_ADDR) {
92 printk(KERN_ERR 92 printk(KERN_ERR
93 "Cannot allocate aperture memory hole (%lx,%uK)\n", 93 "Cannot allocate aperture memory hole (%lx,%uK)\n",
94 addr, aper_size>>10); 94 addr, aper_size>>10);
95 return 0; 95 return 0;
96 } 96 }
97 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); 97 memblock_reserve(addr, aper_size);
98 /* 98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the 99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping. 100 * kernel direct mapping.
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f98d84caf94c..2cd2d93643dc 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -876,8 +876,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
876 * Besides, if we don't timer interrupts ignore the global 876 * Besides, if we don't timer interrupts ignore the global
877 * interrupt lock, which is the WrongThing (tm) to do. 877 * interrupt lock, which is the WrongThing (tm) to do.
878 */ 878 */
879 exit_idle();
880 irq_enter(); 879 irq_enter();
880 exit_idle();
881 local_apic_timer_interrupt(); 881 local_apic_timer_interrupt();
882 irq_exit(); 882 irq_exit();
883 883
@@ -1809,8 +1809,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1809{ 1809{
1810 u32 v; 1810 u32 v;
1811 1811
1812 exit_idle();
1813 irq_enter(); 1812 irq_enter();
1813 exit_idle();
1814 /* 1814 /*
1815 * Check if this really is a spurious interrupt and ACK it 1815 * Check if this really is a spurious interrupt and ACK it
1816 * if it is a vectored one. Just in case... 1816 * if it is a vectored one. Just in case...
@@ -1846,8 +1846,8 @@ void smp_error_interrupt(struct pt_regs *regs)
1846 "Illegal register address", /* APIC Error Bit 7 */ 1846 "Illegal register address", /* APIC Error Bit 7 */
1847 }; 1847 };
1848 1848
1849 exit_idle();
1850 irq_enter(); 1849 irq_enter();
1850 exit_idle();
1851 /* First tickle the hardware, only then report what went on. -- REW */ 1851 /* First tickle the hardware, only then report what went on. -- REW */
1852 v0 = apic_read(APIC_ESR); 1852 v0 = apic_read(APIC_ESR);
1853 apic_write(APIC_ESR, 0); 1853 apic_write(APIC_ESR, 0);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6d939d7847e2..898055585516 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2421,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2421 unsigned vector, me; 2421 unsigned vector, me;
2422 2422
2423 ack_APIC_irq(); 2423 ack_APIC_irq();
2424 exit_idle();
2425 irq_enter(); 2424 irq_enter();
2425 exit_idle();
2426 2426
2427 me = smp_processor_id(); 2427 me = smp_processor_id();
2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 452932d34730..5da1269e8ddc 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
62 62
63void __init setup_bios_corruption_check(void) 63void __init setup_bios_corruption_check(void)
64{ 64{
65 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ 65 phys_addr_t start, end;
66 u64 i;
66 67
67 if (memory_corruption_check == -1) { 68 if (memory_corruption_check == -1) {
68 memory_corruption_check = 69 memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
82 83
83 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); 84 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
84 85
85 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 86 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
86 u64 size; 87 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
87 addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); 88 PAGE_SIZE, corruption_check_size);
89 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
90 PAGE_SIZE, corruption_check_size);
91 if (start >= end)
92 continue;
88 93
89 if (addr == MEMBLOCK_ERROR) 94 memblock_reserve(start, end - start);
90 break; 95 scan_areas[num_scan_areas].addr = start;
91 96 scan_areas[num_scan_areas].size = end - start;
92 if (addr >= corruption_check_size)
93 break;
94
95 if ((addr + size) > corruption_check_size)
96 size = corruption_check_size - addr;
97
98 memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
99 scan_areas[num_scan_areas].addr = addr;
100 scan_areas[num_scan_areas].size = size;
101 num_scan_areas++;
102 97
103 /* Assume we've already mapped this early memory */ 98 /* Assume we've already mapped this early memory */
104 memset(__va(addr), 0, size); 99 memset(__va(start), 0, end - start);
105 100
106 addr += size; 101 if (++num_scan_areas >= MAX_SCAN_AREAS)
102 break;
107 } 103 }
108 104
109 if (num_scan_areas) 105 if (num_scan_areas)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea6..ce215616d5b9 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
397 397
398asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 398asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
399{ 399{
400 exit_idle();
401 irq_enter(); 400 irq_enter();
401 exit_idle();
402 inc_irq_stat(irq_thermal_count); 402 inc_irq_stat(irq_thermal_count);
403 smp_thermal_vector(); 403 smp_thermal_vector();
404 irq_exit(); 404 irq_exit();
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c9..aa578cadb940 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 19
20asmlinkage void smp_threshold_interrupt(void) 20asmlinkage void smp_threshold_interrupt(void)
21{ 21{
22 exit_idle();
23 irq_enter(); 22 irq_enter();
23 exit_idle();
24 inc_irq_stat(irq_threshold_count); 24 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector(); 25 mce_threshold_vector();
26 irq_exit(); 26 irq_exit();
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212a0010..5adce1040b11 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
484 return event->pmu == &pmu; 484 return event->pmu == &pmu;
485} 485}
486 486
487/*
488 * Event scheduler state:
489 *
490 * Assign events iterating over all events and counters, beginning
491 * with events with least weights first. Keep the current iterator
492 * state in struct sched_state.
493 */
494struct sched_state {
495 int weight;
496 int event; /* event index */
497 int counter; /* counter index */
498 int unassigned; /* number of events to be assigned left */
499 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
500};
501
502/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
503#define SCHED_STATES_MAX 2
504
505struct perf_sched {
506 int max_weight;
507 int max_events;
508 struct event_constraint **constraints;
509 struct sched_state state;
510 int saved_states;
511 struct sched_state saved[SCHED_STATES_MAX];
512};
513
514/*
515 * Initialize interator that runs through all events and counters.
516 */
517static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
518 int num, int wmin, int wmax)
519{
520 int idx;
521
522 memset(sched, 0, sizeof(*sched));
523 sched->max_events = num;
524 sched->max_weight = wmax;
525 sched->constraints = c;
526
527 for (idx = 0; idx < num; idx++) {
528 if (c[idx]->weight == wmin)
529 break;
530 }
531
532 sched->state.event = idx; /* start with min weight */
533 sched->state.weight = wmin;
534 sched->state.unassigned = num;
535}
536
537static void perf_sched_save_state(struct perf_sched *sched)
538{
539 if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
540 return;
541
542 sched->saved[sched->saved_states] = sched->state;
543 sched->saved_states++;
544}
545
546static bool perf_sched_restore_state(struct perf_sched *sched)
547{
548 if (!sched->saved_states)
549 return false;
550
551 sched->saved_states--;
552 sched->state = sched->saved[sched->saved_states];
553
554 /* continue with next counter: */
555 clear_bit(sched->state.counter++, sched->state.used);
556
557 return true;
558}
559
560/*
561 * Select a counter for the current event to schedule. Return true on
562 * success.
563 */
564static bool __perf_sched_find_counter(struct perf_sched *sched)
565{
566 struct event_constraint *c;
567 int idx;
568
569 if (!sched->state.unassigned)
570 return false;
571
572 if (sched->state.event >= sched->max_events)
573 return false;
574
575 c = sched->constraints[sched->state.event];
576
577 /* Prefer fixed purpose counters */
578 if (x86_pmu.num_counters_fixed) {
579 idx = X86_PMC_IDX_FIXED;
580 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
581 if (!__test_and_set_bit(idx, sched->state.used))
582 goto done;
583 }
584 }
585 /* Grab the first unused counter starting with idx */
586 idx = sched->state.counter;
587 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
588 if (!__test_and_set_bit(idx, sched->state.used))
589 goto done;
590 }
591
592 return false;
593
594done:
595 sched->state.counter = idx;
596
597 if (c->overlap)
598 perf_sched_save_state(sched);
599
600 return true;
601}
602
603static bool perf_sched_find_counter(struct perf_sched *sched)
604{
605 while (!__perf_sched_find_counter(sched)) {
606 if (!perf_sched_restore_state(sched))
607 return false;
608 }
609
610 return true;
611}
612
613/*
614 * Go through all unassigned events and find the next one to schedule.
615 * Take events with the least weight first. Return true on success.
616 */
617static bool perf_sched_next_event(struct perf_sched *sched)
618{
619 struct event_constraint *c;
620
621 if (!sched->state.unassigned || !--sched->state.unassigned)
622 return false;
623
624 do {
625 /* next event */
626 sched->state.event++;
627 if (sched->state.event >= sched->max_events) {
628 /* next weight */
629 sched->state.event = 0;
630 sched->state.weight++;
631 if (sched->state.weight > sched->max_weight)
632 return false;
633 }
634 c = sched->constraints[sched->state.event];
635 } while (c->weight != sched->state.weight);
636
637 sched->state.counter = 0; /* start with first counter */
638
639 return true;
640}
641
642/*
643 * Assign a counter for each event.
644 */
645static int perf_assign_events(struct event_constraint **constraints, int n,
646 int wmin, int wmax, int *assign)
647{
648 struct perf_sched sched;
649
650 perf_sched_init(&sched, constraints, n, wmin, wmax);
651
652 do {
653 if (!perf_sched_find_counter(&sched))
654 break; /* failed */
655 if (assign)
656 assign[sched.state.event] = sched.state.counter;
657 } while (perf_sched_next_event(&sched));
658
659 return sched.state.unassigned;
660}
661
487int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 662int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
488{ 663{
489 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 664 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
490 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 665 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
491 int i, j, w, wmax, num = 0; 666 int i, wmin, wmax, num = 0;
492 struct hw_perf_event *hwc; 667 struct hw_perf_event *hwc;
493 668
494 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 669 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
495 670
496 for (i = 0; i < n; i++) { 671 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
497 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 672 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
498 constraints[i] = c; 673 constraints[i] = c;
674 wmin = min(wmin, c->weight);
675 wmax = max(wmax, c->weight);
499 } 676 }
500 677
501 /* 678 /*
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
521 if (assign) 698 if (assign)
522 assign[i] = hwc->idx; 699 assign[i] = hwc->idx;
523 } 700 }
524 if (i == n)
525 goto done;
526
527 /*
528 * begin slow path
529 */
530
531 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
532 701
533 /* 702 /* slow path */
534 * weight = number of possible counters 703 if (i != n)
535 * 704 num = perf_assign_events(constraints, n, wmin, wmax, assign);
536 * 1 = most constrained, only works on one counter
537 * wmax = least constrained, works on any counter
538 *
539 * assign events to counters starting with most
540 * constrained events.
541 */
542 wmax = x86_pmu.num_counters;
543 705
544 /* 706 /*
545 * when fixed event counters are present,
546 * wmax is incremented by 1 to account
547 * for one more choice
548 */
549 if (x86_pmu.num_counters_fixed)
550 wmax++;
551
552 for (w = 1, num = n; num && w <= wmax; w++) {
553 /* for each event */
554 for (i = 0; num && i < n; i++) {
555 c = constraints[i];
556 hwc = &cpuc->event_list[i]->hw;
557
558 if (c->weight != w)
559 continue;
560
561 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
562 if (!test_bit(j, used_mask))
563 break;
564 }
565
566 if (j == X86_PMC_IDX_MAX)
567 break;
568
569 __set_bit(j, used_mask);
570
571 if (assign)
572 assign[i] = j;
573 num--;
574 }
575 }
576done:
577 /*
578 * scheduling failed or is just a simulation, 707 * scheduling failed or is just a simulation,
579 * free resources if necessary 708 * free resources if necessary
580 */ 709 */
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
1119 1248
1120static int __init init_hw_perf_events(void) 1249static int __init init_hw_perf_events(void)
1121{ 1250{
1251 struct x86_pmu_quirk *quirk;
1122 struct event_constraint *c; 1252 struct event_constraint *c;
1123 int err; 1253 int err;
1124 1254
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
1147 1277
1148 pr_cont("%s PMU driver.\n", x86_pmu.name); 1278 pr_cont("%s PMU driver.\n", x86_pmu.name);
1149 1279
1150 if (x86_pmu.quirks) 1280 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1151 x86_pmu.quirks(); 1281 quirk->func();
1152 1282
1153 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1283 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1154 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1284 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
1171 1301
1172 unconstrained = (struct event_constraint) 1302 unconstrained = (struct event_constraint)
1173 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1303 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1174 0, x86_pmu.num_counters); 1304 0, x86_pmu.num_counters, 0);
1175 1305
1176 if (x86_pmu.event_constraints) { 1306 if (x86_pmu.event_constraints) {
1307 /*
1308 * event on fixed counter2 (REF_CYCLES) only works on this
1309 * counter, so do not extend mask to generic counters
1310 */
1177 for_each_event_constraint(c, x86_pmu.event_constraints) { 1311 for_each_event_constraint(c, x86_pmu.event_constraints) {
1178 if (c->cmask != X86_RAW_EVENT_MASK) 1312 if (c->cmask != X86_RAW_EVENT_MASK
1313 || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
1179 continue; 1314 continue;
1315 }
1180 1316
1181 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 1317 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1182 c->weight += x86_pmu.num_counters; 1318 c->weight += x86_pmu.num_counters;
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1566 1702
1567 return misc; 1703 return misc;
1568} 1704}
1705
1706void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
1707{
1708 cap->version = x86_pmu.version;
1709 cap->num_counters_gp = x86_pmu.num_counters;
1710 cap->num_counters_fixed = x86_pmu.num_counters_fixed;
1711 cap->bit_width_gp = x86_pmu.cntval_bits;
1712 cap->bit_width_fixed = x86_pmu.cntval_bits;
1713 cap->events_mask = (unsigned int)x86_pmu.events_maskl;
1714 cap->events_mask_len = x86_pmu.events_mask_len;
1715}
1716EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4b..8944062f46e2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
45 u64 code; 45 u64 code;
46 u64 cmask; 46 u64 cmask;
47 int weight; 47 int weight;
48 int overlap;
48}; 49};
49 50
50struct amd_nb { 51struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
151 void *kfree_on_online; 152 void *kfree_on_online;
152}; 153};
153 154
154#define __EVENT_CONSTRAINT(c, n, m, w) {\ 155#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
155 { .idxmsk64 = (n) }, \ 156 { .idxmsk64 = (n) }, \
156 .code = (c), \ 157 .code = (c), \
157 .cmask = (m), \ 158 .cmask = (m), \
158 .weight = (w), \ 159 .weight = (w), \
160 .overlap = (o), \
159} 161}
160 162
161#define EVENT_CONSTRAINT(c, n, m) \ 163#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 164 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
165
166/*
167 * The overlap flag marks event constraints with overlapping counter
168 * masks. This is the case if the counter mask of such an event is not
169 * a subset of any other counter mask of a constraint with an equal or
170 * higher weight, e.g.:
171 *
172 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
173 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
174 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
175 *
176 * The event scheduler may not select the correct counter in the first
177 * cycle because it needs to know which subsequent events will be
178 * scheduled. It may fail to schedule the events then. So we set the
179 * overlap flag for such constraints to give the scheduler a hint which
180 * events to select for counter rescheduling.
181 *
182 * Care must be taken as the rescheduling algorithm is O(n!) which
183 * will increase scheduling cycles for an over-commited system
184 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
185 * and its counter masks must be kept at a minimum.
186 */
187#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
188 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
163 189
164/* 190/*
165 * Constraint on the Event code. 191 * Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
235 u64 capabilities; 261 u64 capabilities;
236}; 262};
237 263
264struct x86_pmu_quirk {
265 struct x86_pmu_quirk *next;
266 void (*func)(void);
267};
268
238/* 269/*
239 * struct x86_pmu - generic x86 pmu 270 * struct x86_pmu - generic x86 pmu
240 */ 271 */
@@ -259,6 +290,11 @@ struct x86_pmu {
259 int num_counters_fixed; 290 int num_counters_fixed;
260 int cntval_bits; 291 int cntval_bits;
261 u64 cntval_mask; 292 u64 cntval_mask;
293 union {
294 unsigned long events_maskl;
295 unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
296 };
297 int events_mask_len;
262 int apic; 298 int apic;
263 u64 max_period; 299 u64 max_period;
264 struct event_constraint * 300 struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
268 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 304 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
269 struct perf_event *event); 305 struct perf_event *event);
270 struct event_constraint *event_constraints; 306 struct event_constraint *event_constraints;
271 void (*quirks)(void); 307 struct x86_pmu_quirk *quirks;
272 int perfctr_second_write; 308 int perfctr_second_write;
273 309
274 int (*cpu_prepare)(int cpu); 310 int (*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 345 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
310}; 346};
311 347
348#define x86_add_quirk(func_) \
349do { \
350 static struct x86_pmu_quirk __quirk __initdata = { \
351 .func = func_, \
352 }; \
353 __quirk.next = x86_pmu.quirks; \
354 x86_pmu.quirks = &__quirk; \
355} while (0)
356
312#define ERF_NO_HT_SHARING 1 357#define ERF_NO_HT_SHARING 1
313#define ERF_HAS_RSP_1 2 358#define ERF_HAS_RSP_1 2
314 359
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a2..0397b23be8e9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); 495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 498
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8d601b18bf9f..3bd37bdf1b8e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
31 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
31}; 32};
32 33
33static struct event_constraint intel_core_event_constraints[] __read_mostly = 34static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
45{ 46{
46 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 47 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 48 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
48 /* 49 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
49 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
50 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
51 * ratio between these counters.
52 */
53 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 50 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 51 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 52 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
68{ 64{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 65 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 66 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 67 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 68 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 69 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 70 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
90{ 86{
91 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
92 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 88 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
93 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 89 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
94 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 90 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
95 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 91 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
96 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 92 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
102{ 98{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 99 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 101 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
106 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
107 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
108 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
125{ 121{
126 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 122 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
127 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 123 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
128 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 124 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
129 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
130}; 126};
131 127
@@ -1169,7 +1165,7 @@ again:
1169 */ 1165 */
1170 c = &unconstrained; 1166 c = &unconstrained;
1171 } else if (intel_try_alt_er(event, orig_idx)) { 1167 } else if (intel_try_alt_er(event, orig_idx)) {
1172 raw_spin_unlock(&era->lock); 1168 raw_spin_unlock_irqrestore(&era->lock, flags);
1173 goto again; 1169 goto again;
1174 } 1170 }
1175 raw_spin_unlock_irqrestore(&era->lock, flags); 1171 raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1519 .guest_get_msrs = intel_guest_get_msrs, 1515 .guest_get_msrs = intel_guest_get_msrs,
1520}; 1516};
1521 1517
1522static void intel_clovertown_quirks(void) 1518static __init void intel_clovertown_quirk(void)
1523{ 1519{
1524 /* 1520 /*
1525 * PEBS is unreliable due to: 1521 * PEBS is unreliable due to:
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
1545 x86_pmu.pebs_constraints = NULL; 1541 x86_pmu.pebs_constraints = NULL;
1546} 1542}
1547 1543
1548static void intel_sandybridge_quirks(void) 1544static __init void intel_sandybridge_quirk(void)
1549{ 1545{
1550 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); 1546 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1551 x86_pmu.pebs = 0; 1547 x86_pmu.pebs = 0;
1552 x86_pmu.pebs_constraints = NULL; 1548 x86_pmu.pebs_constraints = NULL;
1553} 1549}
1554 1550
1551static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
1552 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
1553 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
1554 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
1555 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
1556 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
1557 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
1558 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
1559};
1560
1561static __init void intel_arch_events_quirk(void)
1562{
1563 int bit;
1564
1565 /* disable event that reported as not presend by cpuid */
1566 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
1567 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
1568 printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
1569 intel_arch_events_map[bit].name);
1570 }
1571}
1572
1573static __init void intel_nehalem_quirk(void)
1574{
1575 union cpuid10_ebx ebx;
1576
1577 ebx.full = x86_pmu.events_maskl;
1578 if (ebx.split.no_branch_misses_retired) {
1579 /*
1580 * Erratum AAJ80 detected, we work it around by using
1581 * the BR_MISP_EXEC.ANY event. This will over-count
1582 * branch-misses, but it's still much better than the
1583 * architectural event which is often completely bogus:
1584 */
1585 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1586 ebx.split.no_branch_misses_retired = 0;
1587 x86_pmu.events_maskl = ebx.full;
1588 printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
1589 }
1590}
1591
1555__init int intel_pmu_init(void) 1592__init int intel_pmu_init(void)
1556{ 1593{
1557 union cpuid10_edx edx; 1594 union cpuid10_edx edx;
1558 union cpuid10_eax eax; 1595 union cpuid10_eax eax;
1596 union cpuid10_ebx ebx;
1559 unsigned int unused; 1597 unsigned int unused;
1560 unsigned int ebx;
1561 int version; 1598 int version;
1562 1599
1563 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 1600 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
1574 * Check whether the Architectural PerfMon supports 1611 * Check whether the Architectural PerfMon supports
1575 * Branch Misses Retired hw_event or not. 1612 * Branch Misses Retired hw_event or not.
1576 */ 1613 */
1577 cpuid(10, &eax.full, &ebx, &unused, &edx.full); 1614 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
1578 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) 1615 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
1579 return -ENODEV; 1616 return -ENODEV;
1580 1617
1581 version = eax.split.version_id; 1618 version = eax.split.version_id;
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
1589 x86_pmu.cntval_bits = eax.split.bit_width; 1626 x86_pmu.cntval_bits = eax.split.bit_width;
1590 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 1627 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
1591 1628
1629 x86_pmu.events_maskl = ebx.full;
1630 x86_pmu.events_mask_len = eax.split.mask_length;
1631
1592 /* 1632 /*
1593 * Quirk: v2 perfmon does not report fixed-purpose events, so 1633 * Quirk: v2 perfmon does not report fixed-purpose events, so
1594 * assume at least 3 events: 1634 * assume at least 3 events:
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
1608 1648
1609 intel_ds_init(); 1649 intel_ds_init();
1610 1650
1651 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
1652
1611 /* 1653 /*
1612 * Install the hw-cache-events table: 1654 * Install the hw-cache-events table:
1613 */ 1655 */
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
1617 break; 1659 break;
1618 1660
1619 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1661 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1620 x86_pmu.quirks = intel_clovertown_quirks; 1662 x86_add_quirk(intel_clovertown_quirk);
1621 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1663 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1622 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1664 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1623 case 29: /* six-core 45 nm xeon "Dunnington" */ 1665 case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
1651 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1693 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1652 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1694 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1653 1695
1654 if (ebx & 0x40) { 1696 x86_add_quirk(intel_nehalem_quirk);
1655 /*
1656 * Erratum AAJ80 detected, we work it around by using
1657 * the BR_MISP_EXEC.ANY event. This will over-count
1658 * branch-misses, but it's still much better than the
1659 * architectural event which is often completely bogus:
1660 */
1661 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1662 1697
1663 pr_cont("erratum AAJ80 worked around, ");
1664 }
1665 pr_cont("Nehalem events, "); 1698 pr_cont("Nehalem events, ");
1666 break; 1699 break;
1667 1700
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
1701 break; 1734 break;
1702 1735
1703 case 42: /* SandyBridge */ 1736 case 42: /* SandyBridge */
1704 x86_pmu.quirks = intel_sandybridge_quirks; 1737 x86_add_quirk(intel_sandybridge_quirk);
1705 case 45: /* SandyBridge, "Romely-EP" */ 1738 case 45: /* SandyBridge, "Romely-EP" */
1706 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1739 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1707 sizeof(hw_cache_event_ids)); 1740 sizeof(hw_cache_event_ids));
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
1738 break; 1771 break;
1739 } 1772 }
1740 } 1773 }
1774
1741 return 0; 1775 return 0;
1742} 1776}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 3b97a80ce329..c99f9ed013d5 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -116,16 +116,16 @@ void show_registers(struct pt_regs *regs)
116 for (i = 0; i < code_len; i++, ip++) { 116 for (i = 0; i < code_len; i++, ip++) {
117 if (ip < (u8 *)PAGE_OFFSET || 117 if (ip < (u8 *)PAGE_OFFSET ||
118 probe_kernel_address(ip, c)) { 118 probe_kernel_address(ip, c)) {
119 printk(" Bad EIP value."); 119 printk(KERN_CONT " Bad EIP value.");
120 break; 120 break;
121 } 121 }
122 if (ip == (u8 *)regs->ip) 122 if (ip == (u8 *)regs->ip)
123 printk("<%02x> ", c); 123 printk(KERN_CONT "<%02x> ", c);
124 else 124 else
125 printk("%02x ", c); 125 printk(KERN_CONT "%02x ", c);
126 } 126 }
127 } 127 }
128 printk("\n"); 128 printk(KERN_CONT "\n");
129} 129}
130 130
131int is_valid_bugaddr(unsigned long ip) 131int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 19853ad8afc5..6d728d9284bd 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -284,16 +284,16 @@ void show_registers(struct pt_regs *regs)
284 for (i = 0; i < code_len; i++, ip++) { 284 for (i = 0; i < code_len; i++, ip++) {
285 if (ip < (u8 *)PAGE_OFFSET || 285 if (ip < (u8 *)PAGE_OFFSET ||
286 probe_kernel_address(ip, c)) { 286 probe_kernel_address(ip, c)) {
287 printk(" Bad RIP value."); 287 printk(KERN_CONT " Bad RIP value.");
288 break; 288 break;
289 } 289 }
290 if (ip == (u8 *)regs->ip) 290 if (ip == (u8 *)regs->ip)
291 printk("<%02x> ", c); 291 printk(KERN_CONT "<%02x> ", c);
292 else 292 else
293 printk("%02x ", c); 293 printk(KERN_CONT "%02x ", c);
294 } 294 }
295 } 295 }
296 printk("\n"); 296 printk(KERN_CONT "\n");
297} 297}
298 298
299int is_valid_bugaddr(unsigned long ip) 299int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..8071e2f3d6eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -738,35 +738,17 @@ core_initcall(e820_mark_nvs_memory);
738/* 738/*
739 * pre allocated 4k and reserved it in memblock and e820_saved 739 * pre allocated 4k and reserved it in memblock and e820_saved
740 */ 740 */
741u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 741u64 __init early_reserve_e820(u64 size, u64 align)
742{ 742{
743 u64 size = 0;
744 u64 addr; 743 u64 addr;
745 u64 start;
746 744
747 for (start = startt; ; start += size) { 745 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
748 start = memblock_x86_find_in_range_size(start, &size, align); 746 if (addr) {
749 if (start == MEMBLOCK_ERROR) 747 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
750 return 0; 748 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
751 if (size >= sizet) 749 update_e820_saved();
752 break;
753 } 750 }
754 751
755#ifdef CONFIG_X86_32
756 if (start >= MAXMEM)
757 return 0;
758 if (start + size > MAXMEM)
759 size = MAXMEM - start;
760#endif
761
762 addr = round_down(start + size - sizet, align);
763 if (addr < start)
764 return 0;
765 memblock_x86_reserve_range(addr, addr + sizet, "new next");
766 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
767 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
768 update_e820_saved();
769
770 return addr; 752 return addr;
771} 753}
772 754
@@ -1090,7 +1072,7 @@ void __init memblock_x86_fill(void)
1090 * We are safe to enable resizing, beause memblock_x86_fill() 1072 * We are safe to enable resizing, beause memblock_x86_fill()
1091 * is rather later for x86 1073 * is rather later for x86
1092 */ 1074 */
1093 memblock_can_resize = 1; 1075 memblock_allow_resize();
1094 1076
1095 for (i = 0; i < e820.nr_map; i++) { 1077 for (i = 0; i < e820.nr_map; i++) {
1096 struct e820entry *ei = &e820.map[i]; 1078 struct e820entry *ei = &e820.map[i];
@@ -1105,22 +1087,36 @@ void __init memblock_x86_fill(void)
1105 memblock_add(ei->addr, ei->size); 1087 memblock_add(ei->addr, ei->size);
1106 } 1088 }
1107 1089
1108 memblock_analyze();
1109 memblock_dump_all(); 1090 memblock_dump_all();
1110} 1091}
1111 1092
1112void __init memblock_find_dma_reserve(void) 1093void __init memblock_find_dma_reserve(void)
1113{ 1094{
1114#ifdef CONFIG_X86_64 1095#ifdef CONFIG_X86_64
1115 u64 free_size_pfn; 1096 u64 nr_pages = 0, nr_free_pages = 0;
1116 u64 mem_size_pfn; 1097 unsigned long start_pfn, end_pfn;
1098 phys_addr_t start, end;
1099 int i;
1100 u64 u;
1101
1117 /* 1102 /*
1118 * need to find out used area below MAX_DMA_PFN 1103 * need to find out used area below MAX_DMA_PFN
1119 * need to use memblock to get free size in [0, MAX_DMA_PFN] 1104 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1120 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1105 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1121 */ 1106 */
1122 mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1107 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
1123 free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1108 start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
1124 set_dma_reserve(mem_size_pfn - free_size_pfn); 1109 end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
1110 nr_pages += end_pfn - start_pfn;
1111 }
1112
1113 for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
1114 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
1115 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
1116 if (start_pfn < end_pfn)
1117 nr_free_pages += end_pfn - start_pfn;
1118 }
1119
1120 set_dma_reserve(nr_pages - nr_free_pages);
1125#endif 1121#endif
1126} 1122}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48cf..48d9d4ea1020 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
52 lowmem = 0x9f000; 52 lowmem = 0x9f000;
53 53
54 /* reserve all memory between lowmem and the 1MB mark */ 54 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); 55 memblock_reserve(lowmem, 0x100000 - lowmem);
56} 56}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a1..51ff18616d50 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void)
31 31
32void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
33{ 33{
34 memblock_init(); 34 memblock_reserve(__pa_symbol(&_text),
35 35 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
36 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
37 36
38#ifdef CONFIG_BLK_DEV_INITRD 37#ifdef CONFIG_BLK_DEV_INITRD
39 /* Reserve INITRD */ 38 /* Reserve INITRD */
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void)
42 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 41 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
43 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 42 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
44 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 43 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
45 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 44 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
46 } 45 }
47#endif 46#endif
48 47
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272adb..3a3b779f41d3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 98{
99 copy_bootdata(__va(real_mode_data)); 99 copy_bootdata(__va(real_mode_data));
100 100
101 memblock_init(); 101 memblock_reserve(__pa_symbol(&_text),
102 102 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
103 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 103
105#ifdef CONFIG_BLK_DEV_INITRD 104#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 105 /* Reserve INITRD */
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
109 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 108 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
110 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 109 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
111 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 110 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
112 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 111 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
113 } 112 }
114#endif 113#endif
115 114
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 429e0c92924e..5d31e5bdbf85 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -181,8 +181,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
181 unsigned vector = ~regs->orig_ax; 181 unsigned vector = ~regs->orig_ax;
182 unsigned irq; 182 unsigned irq;
183 183
184 exit_idle();
185 irq_enter(); 184 irq_enter();
185 exit_idle();
186 186
187 irq = __this_cpu_read(vector_irq[vector]); 187 irq = __this_cpu_read(vector_irq[vector]);
188 188
@@ -209,10 +209,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
209 209
210 ack_APIC_irq(); 210 ack_APIC_irq();
211 211
212 exit_idle();
213
214 irq_enter(); 212 irq_enter();
215 213
214 exit_idle();
215
216 inc_irq_stat(x86_platform_ipis); 216 inc_irq_stat(x86_platform_ipis);
217 217
218 if (x86_platform_ipi_callback) 218 if (x86_platform_ipi_callback)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f13ef..2889b3d43882 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
50 put_online_cpus(); 50 put_online_cpus();
51} 51}
52 52
53void arch_jump_label_transform_static(struct jump_entry *entry, 53__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
54 enum jump_label_type type) 54 enum jump_label_type type)
55{ 55{
56 __jump_label_transform(entry, type, text_poke_early); 56 __jump_label_transform(entry, type, text_poke_early);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 0741b062a304..ca470e4c92dc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
564 564
565static void __init smp_reserve_memory(struct mpf_intel *mpf) 565static void __init smp_reserve_memory(struct mpf_intel *mpf)
566{ 566{
567 unsigned long size = get_mpc_size(mpf->physptr); 567 memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
568
569 memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
570} 568}
571 569
572static int __init smp_scan_config(unsigned long base, unsigned long length) 570static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
595 mpf, (u64)virt_to_phys(mpf)); 593 mpf, (u64)virt_to_phys(mpf));
596 594
597 mem = virt_to_phys(mpf); 595 mem = virt_to_phys(mpf);
598 memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); 596 memblock_reserve(mem, sizeof(*mpf));
599 if (mpf->physptr) 597 if (mpf->physptr)
600 smp_reserve_memory(mpf); 598 smp_reserve_memory(mpf);
601 599
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
836 834
837void __init early_reserve_e820_mpc_new(void) 835void __init early_reserve_e820_mpc_new(void)
838{ 836{
839 if (enable_update_mptable && alloc_mptable) { 837 if (enable_update_mptable && alloc_mptable)
840 u64 startt = 0; 838 mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
841 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
842 }
843} 839}
844 840
845static int __init update_mp_table(void) 841static int __init update_mp_table(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 795b79f984c2..485204f58cda 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,8 @@ void cpu_idle(void)
99 99
100 /* endless idle loop with no priority at all */ 100 /* endless idle loop with no priority at all */
101 while (1) { 101 while (1) {
102 tick_nohz_stop_sched_tick(1); 102 tick_nohz_idle_enter();
103 rcu_idle_enter();
103 while (!need_resched()) { 104 while (!need_resched()) {
104 105
105 check_pgt_cache(); 106 check_pgt_cache();
@@ -116,7 +117,8 @@ void cpu_idle(void)
116 pm_idle(); 117 pm_idle();
117 start_critical_timings(); 118 start_critical_timings();
118 } 119 }
119 tick_nohz_restart_sched_tick(); 120 rcu_idle_exit();
121 tick_nohz_idle_exit();
120 preempt_enable_no_resched(); 122 preempt_enable_no_resched();
121 schedule(); 123 schedule();
122 preempt_disable(); 124 preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3bd7e6eebf31..64e926c89a6f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
122 122
123 /* endless idle loop with no priority at all */ 123 /* endless idle loop with no priority at all */
124 while (1) { 124 while (1) {
125 tick_nohz_stop_sched_tick(1); 125 tick_nohz_idle_enter();
126 while (!need_resched()) { 126 while (!need_resched()) {
127 127
128 rmb(); 128 rmb();
@@ -139,8 +139,14 @@ void cpu_idle(void)
139 enter_idle(); 139 enter_idle();
140 /* Don't trace irqs off for idle */ 140 /* Don't trace irqs off for idle */
141 stop_critical_timings(); 141 stop_critical_timings();
142
143 /* enter_idle() needs rcu for notifiers */
144 rcu_idle_enter();
145
142 if (cpuidle_idle_call()) 146 if (cpuidle_idle_call())
143 pm_idle(); 147 pm_idle();
148
149 rcu_idle_exit();
144 start_critical_timings(); 150 start_critical_timings();
145 151
146 /* In many cases the interrupt that ended idle 152 /* In many cases the interrupt that ended idle
@@ -149,7 +155,7 @@ void cpu_idle(void)
149 __exit_idle(); 155 __exit_idle();
150 } 156 }
151 157
152 tick_nohz_restart_sched_tick(); 158 tick_nohz_idle_exit();
153 preempt_enable_no_resched(); 159 preempt_enable_no_resched();
154 schedule(); 160 schedule();
155 preempt_disable(); 161 preempt_disable();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cf0ef986cb6d..d05444ac2aea 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
306static void __init reserve_brk(void) 306static void __init reserve_brk(void)
307{ 307{
308 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
309 memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); 309 memblock_reserve(__pa(_brk_start),
310 __pa(_brk_end) - __pa(_brk_start));
310 311
311 /* Mark brk area as locked down and no longer taking any 312 /* Mark brk area as locked down and no longer taking any
312 new allocations */ 313 new allocations */
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void)
331 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 332 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
332 PAGE_SIZE); 333 PAGE_SIZE);
333 334
334 if (ramdisk_here == MEMBLOCK_ERROR) 335 if (!ramdisk_here)
335 panic("Cannot find place for new RAMDISK of size %lld\n", 336 panic("Cannot find place for new RAMDISK of size %lld\n",
336 ramdisk_size); 337 ramdisk_size);
337 338
338 /* Note: this includes all the lowmem currently occupied by 339 /* Note: this includes all the lowmem currently occupied by
339 the initrd, we rely on that fact to keep the data intact. */ 340 the initrd, we rely on that fact to keep the data intact. */
340 memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); 341 memblock_reserve(ramdisk_here, area_size);
341 initrd_start = ramdisk_here + PAGE_OFFSET; 342 initrd_start = ramdisk_here + PAGE_OFFSET;
342 initrd_end = initrd_start + ramdisk_size; 343 initrd_end = initrd_start + ramdisk_size;
343 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 344 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 394 initrd_start = 0;
394 395
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 396 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_x86_free_range(ramdisk_image, ramdisk_end); 397 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
397 printk(KERN_ERR "initrd too large to handle, " 398 printk(KERN_ERR "initrd too large to handle, "
398 "disabling initrd\n"); 399 "disabling initrd\n");
399 return; 400 return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
416 417
417 relocate_initrd(); 418 relocate_initrd();
418 419
419 memblock_x86_free_range(ramdisk_image, ramdisk_end); 420 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
420} 421}
421#else 422#else
422static void __init reserve_initrd(void) 423static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
490{ 491{
491 struct setup_data *data; 492 struct setup_data *data;
492 u64 pa_data; 493 u64 pa_data;
493 char buf[32];
494 494
495 if (boot_params.hdr.version < 0x0209) 495 if (boot_params.hdr.version < 0x0209)
496 return; 496 return;
497 pa_data = boot_params.hdr.setup_data; 497 pa_data = boot_params.hdr.setup_data;
498 while (pa_data) { 498 while (pa_data) {
499 data = early_memremap(pa_data, sizeof(*data)); 499 data = early_memremap(pa_data, sizeof(*data));
500 sprintf(buf, "setup data %x", data->type); 500 memblock_reserve(pa_data, sizeof(*data) + data->len);
501 memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
502 pa_data = data->next; 501 pa_data = data->next;
503 early_iounmap(data, sizeof(*data)); 502 early_iounmap(data, sizeof(*data));
504 } 503 }
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void)
554 crash_base = memblock_find_in_range(alignment, 553 crash_base = memblock_find_in_range(alignment,
555 CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 554 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
556 555
557 if (crash_base == MEMBLOCK_ERROR) { 556 if (!crash_base) {
558 pr_info("crashkernel reservation failed - No suitable area found.\n"); 557 pr_info("crashkernel reservation failed - No suitable area found.\n");
559 return; 558 return;
560 } 559 }
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
568 return; 567 return;
569 } 568 }
570 } 569 }
571 memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); 570 memblock_reserve(crash_base, crash_size);
572 571
573 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 572 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
574 "for crashkernel (System RAM: %ldMB)\n", 573 "for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
626 addr = find_ibft_region(&size); 625 addr = find_ibft_region(&size);
627 626
628 if (size) 627 if (size)
629 memblock_x86_reserve_range(addr, addr + size, "* ibft"); 628 memblock_reserve(addr, size);
630} 629}
631 630
632static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; 631static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a91ae7709b49..a73b61055ad6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
14 14
15 /* Has to be in very low memory so we can execute real-mode AP code. */ 15 /* Has to be in very low memory so we can execute real-mode AP code. */
16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); 16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
17 if (mem == MEMBLOCK_ERROR) 17 if (!mem)
18 panic("Cannot allocate trampoline\n"); 18 panic("Cannot allocate trampoline\n");
19 19
20 x86_trampoline_base = __va(mem); 20 x86_trampoline_base = __va(mem);
21 memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); 21 memblock_reserve(mem, size);
22 22
23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", 23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
24 x86_trampoline_base, (unsigned long long)mem, size); 24 x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 76e3f1cd0369..405f2620392f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -338,11 +338,15 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
338 return HRTIMER_NORESTART; 338 return HRTIMER_NORESTART;
339} 339}
340 340
341static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) 341static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
342{ 342{
343 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
343 struct kvm_timer *pt = &ps->pit_timer; 344 struct kvm_timer *pt = &ps->pit_timer;
344 s64 interval; 345 s64 interval;
345 346
347 if (!irqchip_in_kernel(kvm))
348 return;
349
346 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); 350 interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
347 351
348 pr_debug("create pit timer, interval is %llu nsec\n", interval); 352 pr_debug("create pit timer, interval is %llu nsec\n", interval);
@@ -394,13 +398,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
394 /* FIXME: enhance mode 4 precision */ 398 /* FIXME: enhance mode 4 precision */
395 case 4: 399 case 4:
396 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) { 400 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
397 create_pit_timer(ps, val, 0); 401 create_pit_timer(kvm, val, 0);
398 } 402 }
399 break; 403 break;
400 case 2: 404 case 2:
401 case 3: 405 case 3:
402 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){ 406 if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
403 create_pit_timer(ps, val, 1); 407 create_pit_timer(kvm, val, 1);
404 } 408 }
405 break; 409 break;
406 default: 410 default:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c38efd7b792e..4c938da2ba00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -602,7 +602,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
602{ 602{
603 struct kvm_cpuid_entry2 *best; 603 struct kvm_cpuid_entry2 *best;
604 struct kvm_lapic *apic = vcpu->arch.apic; 604 struct kvm_lapic *apic = vcpu->arch.apic;
605 u32 timer_mode_mask;
606 605
607 best = kvm_find_cpuid_entry(vcpu, 1, 0); 606 best = kvm_find_cpuid_entry(vcpu, 1, 0);
608 if (!best) 607 if (!best)
@@ -615,15 +614,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
615 best->ecx |= bit(X86_FEATURE_OSXSAVE); 614 best->ecx |= bit(X86_FEATURE_OSXSAVE);
616 } 615 }
617 616
618 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 617 if (apic) {
619 best->function == 0x1) { 618 if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
620 best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); 619 apic->lapic_timer.timer_mode_mask = 3 << 17;
621 timer_mode_mask = 3 << 17; 620 else
622 } else 621 apic->lapic_timer.timer_mode_mask = 1 << 17;
623 timer_mode_mask = 1 << 17; 622 }
624
625 if (apic)
626 apic->lapic_timer.timer_mode_mask = timer_mode_mask;
627} 623}
628 624
629int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 625int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -2135,6 +2131,9 @@ int kvm_dev_ioctl_check_extension(long ext)
2135 case KVM_CAP_TSC_CONTROL: 2131 case KVM_CAP_TSC_CONTROL:
2136 r = kvm_has_tsc_control; 2132 r = kvm_has_tsc_control;
2137 break; 2133 break;
2134 case KVM_CAP_TSC_DEADLINE_TIMER:
2135 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2136 break;
2138 default: 2137 default:
2139 r = 0; 2138 r = 0;
2140 break; 2139 break;
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 46fc4ee09fc4..88ad5fbda6e1 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
82 const insn_attr_t *table; 82 const insn_attr_t *table;
83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) 83 if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
84 return 0; 84 return 0;
85 table = inat_avx_tables[vex_m][vex_p]; 85 /* At first, this checks the master table */
86 table = inat_avx_tables[vex_m][0];
86 if (!table) 87 if (!table)
87 return 0; 88 return 0;
89 if (!inat_is_group(table[opcode]) && vex_p) {
90 /* If this is not a group, get attribute directly */
91 table = inat_avx_tables[vex_m][vex_p];
92 if (!table)
93 return 0;
94 }
88 return table[opcode]; 95 return table[opcode];
89} 96}
90 97
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 374562ed6704..5a1f9f3e3fbb 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
202 m = insn_vex_m_bits(insn); 202 m = insn_vex_m_bits(insn);
203 p = insn_vex_p_bits(insn); 203 p = insn_vex_p_bits(insn);
204 insn->attr = inat_get_avx_attribute(op, m, p); 204 insn->attr = inat_get_avx_attribute(op, m, p);
205 if (!inat_accept_vex(insn->attr)) 205 if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
206 insn->attr = 0; /* This instruction is bad */ 206 insn->attr = 0; /* This instruction is bad */
207 goto end; /* VEX has only 1 byte for opcode */ 207 goto end; /* VEX has only 1 byte for opcode */
208 } 208 }
@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn)
249 pfx = insn_last_prefix(insn); 249 pfx = insn_last_prefix(insn);
250 insn->attr = inat_get_group_attribute(mod, pfx, 250 insn->attr = inat_get_group_attribute(mod, pfx,
251 insn->attr); 251 insn->attr);
252 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
253 insn->attr = 0; /* This is bad */
252 } 254 }
253 } 255 }
254 256
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index a793da5e560e..5b83c51c12e0 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,5 +1,11 @@
1# x86 Opcode Maps 1# x86 Opcode Maps
2# 2#
3# This is (mostly) based on following documentations.
4# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
5# (#325383-040US, October 2011)
6# - Intel(R) Advanced Vector Extensions Programming Reference
7# (#319433-011,JUNE 2011).
8#
3#<Opcode maps> 9#<Opcode maps>
4# Table: table-name 10# Table: table-name
5# Referrer: escaped-name 11# Referrer: escaped-name
@@ -15,10 +21,13 @@
15# EndTable 21# EndTable
16# 22#
17# AVX Superscripts 23# AVX Superscripts
18# (VEX): this opcode can accept VEX prefix. 24# (v): this opcode requires VEX prefix.
19# (oVEX): this opcode requires VEX prefix. 25# (v1): this opcode only supports 128bit VEX.
20# (o128): this opcode only supports 128bit VEX. 26#
21# (o256): this opcode only supports 256bit VEX. 27# Last Prefix Superscripts
28# - (66): the last prefix is 0x66
29# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2
22# 31#
23 32
24Table: one byte opcode 33Table: one byte opcode
@@ -199,8 +208,8 @@ a0: MOV AL,Ob
199a1: MOV rAX,Ov 208a1: MOV rAX,Ov
200a2: MOV Ob,AL 209a2: MOV Ob,AL
201a3: MOV Ov,rAX 210a3: MOV Ov,rAX
202a4: MOVS/B Xb,Yb 211a4: MOVS/B Yb,Xb
203a5: MOVS/W/D/Q Xv,Yv 212a5: MOVS/W/D/Q Yv,Xv
204a6: CMPS/B Xb,Yb 213a6: CMPS/B Xb,Yb
205a7: CMPS/W/D Xv,Yv 214a7: CMPS/W/D Xv,Yv
206a8: TEST AL,Ib 215a8: TEST AL,Ib
@@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A)
233c1: Grp2 Ev,Ib (1A) 242c1: Grp2 Ev,Ib (1A)
234c2: RETN Iw (f64) 243c2: RETN Iw (f64)
235c3: RETN 244c3: RETN
236c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) 245c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
237c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) 246c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
238c6: Grp11 Eb,Ib (1A) 247c6: Grp11 Eb,Ib (1A)
239c7: Grp11 Ev,Iz (1A) 248c7: Grp11 Ev,Iz (1A)
240c8: ENTER Iw,Ib 249c8: ENTER Iw,Ib
@@ -320,14 +329,19 @@ AVXcode: 1
320# 3DNow! uses the last imm byte as opcode extension. 329# 3DNow! uses the last imm byte as opcode extension.
3210f: 3DNow! Pq,Qq,Ib 3300f: 3DNow! Pq,Qq,Ib
322# 0x0f 0x10-0x1f 331# 0x0f 0x10-0x1f
32310: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) 332# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
32411: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) 333# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
32512: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) 334# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
32613: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) 335# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
32714: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) 336# Reference A.1
32815: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) 33710: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
32916: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) 33811: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
33017: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 33912: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
34013: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
34114: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
34215: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
34316: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
34417: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
33118: Grp16 (1A) 34518: Grp16 (1A)
33219: 34619:
3331a: 3471a:
@@ -345,14 +359,14 @@ AVXcode: 1
34525: 35925:
34626: 36026:
34727: 36127:
34828: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) 36228: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
34929: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) 36329: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
3502a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) 3642a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
3512b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) 3652b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
3522c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) 3662c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
3532d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) 3672d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
3542e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) 3682e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
3552f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) 3692f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
356# 0x0f 0x30-0x3f 370# 0x0f 0x30-0x3f
35730: WRMSR 37130: WRMSR
35831: RDTSC 37231: RDTSC
@@ -388,65 +402,66 @@ AVXcode: 1
3884e: CMOVLE/NG Gv,Ev 4024e: CMOVLE/NG Gv,Ev
3894f: CMOVNLE/G Gv,Ev 4034f: CMOVNLE/G Gv,Ev
390# 0x0f 0x50-0x5f 404# 0x0f 0x50-0x5f
39150: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) 40550: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
39251: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) 40651: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
39352: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) 40752: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
39453: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) 40853: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
39554: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) 40954: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
39655: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) 41055: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
39756: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) 41156: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
39857: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) 41257: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
39958: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) 41358: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
40059: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) 41459: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
4015a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) 4155a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
4025b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) 4165b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
4035c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) 4175c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
4045d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) 4185d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
4055e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) 4195e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
4065f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) 4205f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
407# 0x0f 0x60-0x6f 421# 0x0f 0x60-0x6f
40860: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) 42260: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
40961: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) 42361: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
41062: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) 42462: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
41163: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) 42563: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
41264: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) 42664: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
41365: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) 42765: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
41466: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) 42866: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
41567: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) 42967: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
41668: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) 43068: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
41769: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) 43169: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
4186a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) 4326a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
4196b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) 4336b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
4206c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) 4346c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
4216d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) 4356d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
4226e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) 4366e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
4236f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) 4376f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
424# 0x0f 0x70-0x7f 438# 0x0f 0x70-0x7f
42570: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 43970: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
42671: Grp12 (1A) 44071: Grp12 (1A)
42772: Grp13 (1A) 44172: Grp13 (1A)
42873: Grp14 (1A) 44273: Grp14 (1A)
42974: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) 44374: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
43075: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) 44475: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
43176: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) 44576: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
43277: emms/vzeroupper/vzeroall (VEX) 446# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
43378: VMREAD Ed/q,Gd/q 44777: emms | vzeroupper | vzeroall
43479: VMWRITE Gd/q,Ed/q 44878: VMREAD Ey,Gy
44979: VMWRITE Gy,Ey
4357a: 4507a:
4367b: 4517b:
4377c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) 4527c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
4387d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) 4537d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
4397e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) 4547e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
4407f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) 4557f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
441# 0x0f 0x80-0x8f 456# 0x0f 0x80-0x8f
44280: JO Jz (f64) 45780: JO Jz (f64)
44381: JNO Jz (f64) 45881: JNO Jz (f64)
44482: JB/JNAE/JC Jz (f64) 45982: JB/JC/JNAE Jz (f64)
44583: JNB/JAE/JNC Jz (f64) 46083: JAE/JNB/JNC Jz (f64)
44684: JZ/JE Jz (f64) 46184: JE/JZ Jz (f64)
44785: JNZ/JNE Jz (f64) 46285: JNE/JNZ Jz (f64)
44886: JBE/JNA Jz (f64) 46386: JBE/JNA Jz (f64)
44987: JNBE/JA Jz (f64) 46487: JA/JNBE Jz (f64)
45088: JS Jz (f64) 46588: JS Jz (f64)
45189: JNS Jz (f64) 46689: JNS Jz (f64)
4528a: JP/JPE Jz (f64) 4678a: JP/JPE Jz (f64)
@@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3)
502b9: Grp10 (1A) 517b9: Grp10 (1A)
503ba: Grp8 Ev,Ib (1A) 518ba: Grp8 Ev,Ib (1A)
504bb: BTC Ev,Gv 519bb: BTC Ev,Gv
505bc: BSF Gv,Ev 520bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
506bd: BSR Gv,Ev 521bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
507be: MOVSX Gv,Eb 522be: MOVSX Gv,Eb
508bf: MOVSX Gv,Ew 523bf: MOVSX Gv,Ew
509# 0x0f 0xc0-0xcf 524# 0x0f 0xc0-0xcf
510c0: XADD Eb,Gb 525c0: XADD Eb,Gb
511c1: XADD Ev,Gv 526c1: XADD Ev,Gv
512c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) 527c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
513c3: movnti Md/q,Gd/q 528c3: movnti My,Gy
514c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) 529c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
515c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) 530c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
516c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) 531c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
517c7: Grp9 (1A) 532c7: Grp9 (1A)
518c8: BSWAP RAX/EAX/R8/R8D 533c8: BSWAP RAX/EAX/R8/R8D
519c9: BSWAP RCX/ECX/R9/R9D 534c9: BSWAP RCX/ECX/R9/R9D
@@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D
524ce: BSWAP RSI/ESI/R14/R14D 539ce: BSWAP RSI/ESI/R14/R14D
525cf: BSWAP RDI/EDI/R15/R15D 540cf: BSWAP RDI/EDI/R15/R15D
526# 0x0f 0xd0-0xdf 541# 0x0f 0xd0-0xdf
527d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) 542d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
528d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) 543d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
529d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) 544d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
530d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) 545d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
531d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) 546d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
532d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) 547d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
533d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) 548d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
534d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) 549d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
535d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) 550d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
536d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) 551d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
537da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) 552da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
538db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) 553db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
539dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) 554dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
540dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) 555dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
541de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) 556de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
542df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) 557df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
543# 0x0f 0xe0-0xef 558# 0x0f 0xe0-0xef
544e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) 559e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
545e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) 560e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
546e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) 561e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
547e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) 562e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
548e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) 563e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
549e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) 564e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
550e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) 565e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
551e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) 566e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
552e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) 567e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
553e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) 568e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
554ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) 569ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
555eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) 570eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
556ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) 571ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
557ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) 572ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
558ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) 573ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
559ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) 574ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
560# 0x0f 0xf0-0xff 575# 0x0f 0xf0-0xff
561f0: lddqu Vdq,Mdq (F2),(VEX) 576f0: vlddqu Vx,Mx (F2)
562f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) 577f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
563f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) 578f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
564f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) 579f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
565f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) 580f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
566f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) 581f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
567f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) 582f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
568f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) 583f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
569f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) 584f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
570f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) 585f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
571fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) 586fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
572fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) 587fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
573fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) 588fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
574fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) 589fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
575fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) 590fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
576ff: 591ff:
577EndTable 592EndTable
578 593
@@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38)
580Referrer: 3-byte escape 1 595Referrer: 3-byte escape 1
581AVXcode: 2 596AVXcode: 2
582# 0x0f 0x38 0x00-0x0f 597# 0x0f 0x38 0x00-0x0f
58300: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) 59800: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
58401: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) 59901: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
58502: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) 60002: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
58603: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) 60103: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
58704: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) 60204: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
58805: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) 60305: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
58906: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) 60406: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
59007: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) 60507: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
59108: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) 60608: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
59209: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) 60709: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
5930a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) 6080a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
5940b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) 6090b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
5950c: Vpermilps /r (66),(oVEX) 6100c: vpermilps Vx,Hx,Wx (66),(v)
5960d: Vpermilpd /r (66),(oVEX) 6110d: vpermilpd Vx,Hx,Wx (66),(v)
5970e: vtestps /r (66),(oVEX) 6120e: vtestps Vx,Wx (66),(v)
5980f: vtestpd /r (66),(oVEX) 6130f: vtestpd Vx,Wx (66),(v)
599# 0x0f 0x38 0x10-0x1f 614# 0x0f 0x38 0x10-0x1f
60010: pblendvb Vdq,Wdq (66) 61510: pblendvb Vdq,Wdq (66)
60111: 61611:
60212: 61712:
60313: 61813: vcvtph2ps Vx,Wx,Ib (66),(v)
60414: blendvps Vdq,Wdq (66) 61914: blendvps Vdq,Wdq (66)
60515: blendvpd Vdq,Wdq (66) 62015: blendvpd Vdq,Wdq (66)
60616: 62116: vpermps Vqq,Hqq,Wqq (66),(v)
60717: ptest Vdq,Wdq (66),(VEX) 62217: vptest Vx,Wx (66)
60818: vbroadcastss /r (66),(oVEX) 62318: vbroadcastss Vx,Wd (66),(v)
60919: vbroadcastsd /r (66),(oVEX),(o256) 62419: vbroadcastsd Vqq,Wq (66),(v)
6101a: vbroadcastf128 /r (66),(oVEX),(o256) 6251a: vbroadcastf128 Vqq,Mdq (66),(v)
6111b: 6261b:
6121c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) 6271c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
6131d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) 6281d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
6141e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 6291e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
6151f: 6301f:
616# 0x0f 0x38 0x20-0x2f 631# 0x0f 0x38 0x20-0x2f
61720: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) 63220: vpmovsxbw Vx,Ux/Mq (66),(v1)
61821: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) 63321: vpmovsxbd Vx,Ux/Md (66),(v1)
61922: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) 63422: vpmovsxbq Vx,Ux/Mw (66),(v1)
62023: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) 63523: vpmovsxwd Vx,Ux/Mq (66),(v1)
62124: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) 63624: vpmovsxwq Vx,Ux/Md (66),(v1)
62225: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 63725: vpmovsxdq Vx,Ux/Mq (66),(v1)
62326: 63826:
62427: 63927:
62528: pmuldq Vdq,Wdq (66),(VEX),(o128) 64028: vpmuldq Vx,Hx,Wx (66),(v1)
62629: pcmpeqq Vdq,Wdq (66),(VEX),(o128) 64129: vpcmpeqq Vx,Hx,Wx (66),(v1)
6272a: movntdqa Vdq,Mdq (66),(VEX),(o128) 6422a: vmovntdqa Vx,Mx (66),(v1)
6282b: packusdw Vdq,Wdq (66),(VEX),(o128) 6432b: vpackusdw Vx,Hx,Wx (66),(v1)
6292c: vmaskmovps(ld) /r (66),(oVEX) 6442c: vmaskmovps Vx,Hx,Mx (66),(v)
6302d: vmaskmovpd(ld) /r (66),(oVEX) 6452d: vmaskmovpd Vx,Hx,Mx (66),(v)
6312e: vmaskmovps(st) /r (66),(oVEX) 6462e: vmaskmovps Mx,Hx,Vx (66),(v)
6322f: vmaskmovpd(st) /r (66),(oVEX) 6472f: vmaskmovpd Mx,Hx,Vx (66),(v)
633# 0x0f 0x38 0x30-0x3f 648# 0x0f 0x38 0x30-0x3f
63430: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) 64930: vpmovzxbw Vx,Ux/Mq (66),(v1)
63531: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) 65031: vpmovzxbd Vx,Ux/Md (66),(v1)
63632: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) 65132: vpmovzxbq Vx,Ux/Mw (66),(v1)
63733: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) 65233: vpmovzxwd Vx,Ux/Mq (66),(v1)
63834: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) 65334: vpmovzxwq Vx,Ux/Md (66),(v1)
63935: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 65435: vpmovzxdq Vx,Ux/Mq (66),(v1)
64036: 65536: vpermd Vqq,Hqq,Wqq (66),(v)
64137: pcmpgtq Vdq,Wdq (66),(VEX),(o128) 65637: vpcmpgtq Vx,Hx,Wx (66),(v1)
64238: pminsb Vdq,Wdq (66),(VEX),(o128) 65738: vpminsb Vx,Hx,Wx (66),(v1)
64339: pminsd Vdq,Wdq (66),(VEX),(o128) 65839: vpminsd Vx,Hx,Wx (66),(v1)
6443a: pminuw Vdq,Wdq (66),(VEX),(o128) 6593a: vpminuw Vx,Hx,Wx (66),(v1)
6453b: pminud Vdq,Wdq (66),(VEX),(o128) 6603b: vpminud Vx,Hx,Wx (66),(v1)
6463c: pmaxsb Vdq,Wdq (66),(VEX),(o128) 6613c: vpmaxsb Vx,Hx,Wx (66),(v1)
6473d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 6623d: vpmaxsd Vx,Hx,Wx (66),(v1)
6483e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 6633e: vpmaxuw Vx,Hx,Wx (66),(v1)
6493f: pmaxud Vdq,Wdq (66),(VEX),(o128) 6643f: vpmaxud Vx,Hx,Wx (66),(v1)
650# 0x0f 0x38 0x40-0x8f 665# 0x0f 0x38 0x40-0x8f
65140: pmulld Vdq,Wdq (66),(VEX),(o128) 66640: vpmulld Vx,Hx,Wx (66),(v1)
65241: phminposuw Vdq,Wdq (66),(VEX),(o128) 66741: vphminposuw Vdq,Wdq (66),(v1)
65380: INVEPT Gd/q,Mdq (66) 66842:
65481: INVPID Gd/q,Mdq (66) 66943:
67044:
67145: vpsrlvd/q Vx,Hx,Wx (66),(v)
67246: vpsravd Vx,Hx,Wx (66),(v)
67347: vpsllvd/q Vx,Hx,Wx (66),(v)
674# Skip 0x48-0x57
67558: vpbroadcastd Vx,Wx (66),(v)
67659: vpbroadcastq Vx,Wx (66),(v)
6775a: vbroadcasti128 Vqq,Mdq (66),(v)
678# Skip 0x5b-0x77
67978: vpbroadcastb Vx,Wx (66),(v)
68079: vpbroadcastw Vx,Wx (66),(v)
681# Skip 0x7a-0x7f
68280: INVEPT Gy,Mdq (66)
68381: INVPID Gy,Mdq (66)
68482: INVPCID Gy,Mdq (66)
6858c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
6868e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
655# 0x0f 0x38 0x90-0xbf (FMA) 687# 0x0f 0x38 0x90-0xbf (FMA)
65696: vfmaddsub132pd/ps /r (66),(VEX) 68890: vgatherdd/q Vx,Hx,Wx (66),(v)
65797: vfmsubadd132pd/ps /r (66),(VEX) 68991: vgatherqd/q Vx,Hx,Wx (66),(v)
65898: vfmadd132pd/ps /r (66),(VEX) 69092: vgatherdps/d Vx,Hx,Wx (66),(v)
65999: vfmadd132sd/ss /r (66),(VEX),(o128) 69193: vgatherqps/d Vx,Hx,Wx (66),(v)
6609a: vfmsub132pd/ps /r (66),(VEX) 69294:
6619b: vfmsub132sd/ss /r (66),(VEX),(o128) 69395:
6629c: vfnmadd132pd/ps /r (66),(VEX) 69496: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
6639d: vfnmadd132sd/ss /r (66),(VEX),(o128) 69597: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
6649e: vfnmsub132pd/ps /r (66),(VEX) 69698: vfmadd132ps/d Vx,Hx,Wx (66),(v)
6659f: vfnmsub132sd/ss /r (66),(VEX),(o128) 69799: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
666a6: vfmaddsub213pd/ps /r (66),(VEX) 6989a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
667a7: vfmsubadd213pd/ps /r (66),(VEX) 6999b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
668a8: vfmadd213pd/ps /r (66),(VEX) 7009c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
669a9: vfmadd213sd/ss /r (66),(VEX),(o128) 7019d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
670aa: vfmsub213pd/ps /r (66),(VEX) 7029e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
671ab: vfmsub213sd/ss /r (66),(VEX),(o128) 7039f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
672ac: vfnmadd213pd/ps /r (66),(VEX) 704a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
673ad: vfnmadd213sd/ss /r (66),(VEX),(o128) 705a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
674ae: vfnmsub213pd/ps /r (66),(VEX) 706a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
675af: vfnmsub213sd/ss /r (66),(VEX),(o128) 707a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
676b6: vfmaddsub231pd/ps /r (66),(VEX) 708aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
677b7: vfmsubadd231pd/ps /r (66),(VEX) 709ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
678b8: vfmadd231pd/ps /r (66),(VEX) 710ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
679b9: vfmadd231sd/ss /r (66),(VEX),(o128) 711ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
680ba: vfmsub231pd/ps /r (66),(VEX) 712ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
681bb: vfmsub231sd/ss /r (66),(VEX),(o128) 713af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
682bc: vfnmadd231pd/ps /r (66),(VEX) 714b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
683bd: vfnmadd231sd/ss /r (66),(VEX),(o128) 715b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
684be: vfnmsub231pd/ps /r (66),(VEX) 716b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
685bf: vfnmsub231sd/ss /r (66),(VEX),(o128) 717b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
718ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
719bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
720bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
721bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
722be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
723bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
686# 0x0f 0x38 0xc0-0xff 724# 0x0f 0x38 0xc0-0xff
687db: aesimc Vdq,Wdq (66),(VEX),(o128) 725db: VAESIMC Vdq,Wdq (66),(v1)
688dc: aesenc Vdq,Wdq (66),(VEX),(o128) 726dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
689dd: aesenclast Vdq,Wdq (66),(VEX),(o128) 727dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
690de: aesdec Vdq,Wdq (66),(VEX),(o128) 728de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
691df: aesdeclast Vdq,Wdq (66),(VEX),(o128) 729df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
692f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) 730f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
693f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) 731f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
732f3: ANDN Gy,By,Ey (v)
733f4: Grp17 (1A)
734f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
735f6: MULX By,Gy,rDX,Ey (F2),(v)
736f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
694EndTable 737EndTable
695 738
696Table: 3-byte opcode 2 (0x0f 0x3a) 739Table: 3-byte opcode 2 (0x0f 0x3a)
697Referrer: 3-byte escape 2 740Referrer: 3-byte escape 2
698AVXcode: 3 741AVXcode: 3
699# 0x0f 0x3a 0x00-0xff 742# 0x0f 0x3a 0x00-0xff
70004: vpermilps /r,Ib (66),(oVEX) 74300: vpermq Vqq,Wqq,Ib (66),(v)
70105: vpermilpd /r,Ib (66),(oVEX) 74401: vpermpd Vqq,Wqq,Ib (66),(v)
70206: vperm2f128 /r,Ib (66),(oVEX),(o256) 74502: vpblendd Vx,Hx,Wx,Ib (66),(v)
70308: roundps Vdq,Wdq,Ib (66),(VEX) 74603:
70409: roundpd Vdq,Wdq,Ib (66),(VEX) 74704: vpermilps Vx,Wx,Ib (66),(v)
7050a: roundss Vss,Wss,Ib (66),(VEX),(o128) 74805: vpermilpd Vx,Wx,Ib (66),(v)
7060b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) 74906: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
7070c: blendps Vdq,Wdq,Ib (66),(VEX) 75007:
7080d: blendpd Vdq,Wdq,Ib (66),(VEX) 75108: vroundps Vx,Wx,Ib (66)
7090e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) 75209: vroundpd Vx,Wx,Ib (66)
7100f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) 7530a: vroundss Vss,Wss,Ib (66),(v1)
71114: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) 7540b: vroundsd Vsd,Wsd,Ib (66),(v1)
71215: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) 7550c: vblendps Vx,Hx,Wx,Ib (66)
71316: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) 7560d: vblendpd Vx,Hx,Wx,Ib (66)
71417: extractps Ed,Vdq,Ib (66),(VEX),(o128) 7570e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
71518: vinsertf128 /r,Ib (66),(oVEX),(o256) 7580f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
71619: vextractf128 /r,Ib (66),(oVEX),(o256) 75914: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
71720: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) 76015: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
71821: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) 76116: vpextrd/q Ey,Vdq,Ib (66),(v1)
71922: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) 76217: vextractps Ed,Vdq,Ib (66),(v1)
72040: dpps Vdq,Wdq,Ib (66),(VEX) 76318: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
72141: dppd Vdq,Wdq,Ib (66),(VEX),(o128) 76419: vextractf128 Wdq,Vqq,Ib (66),(v)
72242: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) 7651d: vcvtps2ph Wx,Vx,Ib (66),(v)
72344: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) 76620: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
7244a: vblendvps /r,Ib (66),(oVEX) 76721: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
7254b: vblendvpd /r,Ib (66),(oVEX) 76822: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
7264c: vpblendvb /r,Ib (66),(oVEX),(o128) 76938: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
72760: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) 77039: vextracti128 Wdq,Vqq,Ib (66),(v)
72861: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) 77140: vdpps Vx,Hx,Wx,Ib (66)
72962: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) 77241: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
73063: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) 77342: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
731df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) 77444: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
77546: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
7764a: vblendvps Vx,Hx,Wx,Lx (66),(v)
7774b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
7784c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
77960: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
78061: vpcmpestri Vdq,Wdq,Ib (66),(v1)
78162: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
78263: vpcmpistri Vdq,Wdq,Ib (66),(v1)
783df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
784f0: RORX Gy,Ey,Ib (F2),(v)
732EndTable 785EndTable
733 786
734GrpTable: Grp1 787GrpTable: Grp1
@@ -790,7 +843,7 @@ GrpTable: Grp5
7902: CALLN Ev (f64) 8432: CALLN Ev (f64)
7913: CALLF Ep 8443: CALLF Ep
7924: JMPN Ev (f64) 8454: JMPN Ev (f64)
7935: JMPF Ep 8465: JMPF Mp
7946: PUSH Ev (d64) 8476: PUSH Ev (d64)
7957: 8487:
796EndTable 849EndTable
@@ -807,7 +860,7 @@ EndTable
807GrpTable: Grp7 860GrpTable: Grp7
8080: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 8610: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8091: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) 8621: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
8102: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) 8632: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
8113: LIDT Ms 8643: LIDT Ms
8124: SMSW Mw/Rv 8654: SMSW Mw/Rv
8135: 8665:
@@ -824,44 +877,45 @@ EndTable
824 877
825GrpTable: Grp9 878GrpTable: Grp9
8261: CMPXCHG8B/16B Mq/Mdq 8791: CMPXCHG8B/16B Mq/Mdq
8276: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) 8806: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
8287: VMPTRST Mq 8817: VMPTRST Mq | VMPTRST Mq (F3)
829EndTable 882EndTable
830 883
831GrpTable: Grp10 884GrpTable: Grp10
832EndTable 885EndTable
833 886
834GrpTable: Grp11 887GrpTable: Grp11
888# Note: the operands are given by group opcode
8350: MOV 8890: MOV
836EndTable 890EndTable
837 891
838GrpTable: Grp12 892GrpTable: Grp12
8392: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) 8932: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
8404: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) 8944: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
8416: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) 8956: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
842EndTable 896EndTable
843 897
844GrpTable: Grp13 898GrpTable: Grp13
8452: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) 8992: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
8464: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) 9004: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
8476: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) 9016: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
848EndTable 902EndTable
849 903
850GrpTable: Grp14 904GrpTable: Grp14
8512: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) 9052: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
8523: psrldq Udq,Ib (66),(11B),(VEX),(o128) 9063: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
8536: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) 9076: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
8547: pslldq Udq,Ib (66),(11B),(VEX),(o128) 9087: vpslldq Hx,Ux,Ib (66),(11B),(v1)
855EndTable 909EndTable
856 910
857GrpTable: Grp15 911GrpTable: Grp15
8580: fxsave 9120: fxsave | RDFSBASE Ry (F3),(11B)
8591: fxstor 9131: fxstor | RDGSBASE Ry (F3),(11B)
8602: ldmxcsr (VEX) 9142: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
8613: stmxcsr (VEX) 9153: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
8624: XSAVE 9164: XSAVE
8635: XRSTOR | lfence (11B) 9175: XRSTOR | lfence (11B)
8646: mfence (11B) 9186: XSAVEOPT | mfence (11B)
8657: clflush | sfence (11B) 9197: clflush | sfence (11B)
866EndTable 920EndTable
867 921
@@ -872,6 +926,12 @@ GrpTable: Grp16
8723: prefetch T2 9263: prefetch T2
873EndTable 927EndTable
874 928
929GrpTable: Grp17
9301: BLSR By,Ey (v)
9312: BLSMSK By,Ey (v)
9323: BLSI By,Ey (v)
933EndTable
934
875# AMD's Prefetch Group 935# AMD's Prefetch Group
876GrpTable: GrpP 936GrpTable: GrpP
8770: PREFETCH 9370: PREFETCH
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3d11327c9ab4..23d8e5fecf76 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -27,6 +27,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
27obj-$(CONFIG_ACPI_NUMA) += srat.o 27obj-$(CONFIG_ACPI_NUMA) += srat.o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o 28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
29 29
30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
31
32obj-$(CONFIG_MEMTEST) += memtest.o 30obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 87488b93a65c..a298914058f9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -67,7 +67,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
67 good_end = max_pfn_mapped << PAGE_SHIFT; 67 good_end = max_pfn_mapped << PAGE_SHIFT;
68 68
69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 69 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
70 if (base == MEMBLOCK_ERROR) 70 if (!base)
71 panic("Cannot find space for the kernel page tables"); 71 panic("Cannot find space for the kernel page tables");
72 72
73 pgt_buf_start = base >> PAGE_SHIFT; 73 pgt_buf_start = base >> PAGE_SHIFT;
@@ -80,7 +80,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
80 80
81void __init native_pagetable_reserve(u64 start, u64 end) 81void __init native_pagetable_reserve(u64 start, u64 end)
82{ 82{
83 memblock_x86_reserve_range(start, end, "PGTABLE"); 83 memblock_reserve(start, end - start);
84} 84}
85 85
86struct map_range { 86struct map_range {
@@ -279,8 +279,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) 279 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
280 * so that they can be reused for other purposes. 280 * so that they can be reused for other purposes.
281 * 281 *
282 * On native it just means calling memblock_x86_reserve_range, on Xen it 282 * On native it just means calling memblock_reserve, on Xen it also
283 * also means marking RW the pagetable pages that we allocated before 283 * means marking RW the pagetable pages that we allocated before
284 * but that haven't been used. 284 * but that haven't been used.
285 * 285 *
286 * In fact on xen we mark RO the whole range pgt_buf_start - 286 * In fact on xen we mark RO the whole range pgt_buf_start -
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 29f7c6d98179..0c1da394a634 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,23 +427,17 @@ static void __init add_one_highpage_init(struct page *page)
427void __init add_highpages_with_active_regions(int nid, 427void __init add_highpages_with_active_regions(int nid,
428 unsigned long start_pfn, unsigned long end_pfn) 428 unsigned long start_pfn, unsigned long end_pfn)
429{ 429{
430 struct range *range; 430 phys_addr_t start, end;
431 int nr_range; 431 u64 i;
432 int i; 432
433 433 for_each_free_mem_range(i, nid, &start, &end, NULL) {
434 nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); 434 unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
435 435 start_pfn, end_pfn);
436 for (i = 0; i < nr_range; i++) { 436 unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
437 struct page *page; 437 start_pfn, end_pfn);
438 int node_pfn; 438 for ( ; pfn < e_pfn; pfn++)
439 439 if (pfn_valid(pfn))
440 for (node_pfn = range[i].start; node_pfn < range[i].end; 440 add_one_highpage_init(pfn_to_page(pfn));
441 node_pfn++) {
442 if (!pfn_valid(node_pfn))
443 continue;
444 page = pfn_to_page(node_pfn);
445 add_one_highpage_init(page);
446 }
447 } 441 }
448} 442}
449#else 443#else
@@ -650,18 +644,18 @@ void __init initmem_init(void)
650 highstart_pfn = highend_pfn = max_pfn; 644 highstart_pfn = highend_pfn = max_pfn;
651 if (max_pfn > max_low_pfn) 645 if (max_pfn > max_low_pfn)
652 highstart_pfn = max_low_pfn; 646 highstart_pfn = max_low_pfn;
653 memblock_x86_register_active_regions(0, 0, highend_pfn);
654 sparse_memory_present_with_active_regions(0);
655 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 647 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
656 pages_to_mb(highend_pfn - highstart_pfn)); 648 pages_to_mb(highend_pfn - highstart_pfn));
657 num_physpages = highend_pfn; 649 num_physpages = highend_pfn;
658 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 650 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
659#else 651#else
660 memblock_x86_register_active_regions(0, 0, max_low_pfn);
661 sparse_memory_present_with_active_regions(0);
662 num_physpages = max_low_pfn; 652 num_physpages = max_low_pfn;
663 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; 653 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
664#endif 654#endif
655
656 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
657 sparse_memory_present_with_active_regions(0);
658
665#ifdef CONFIG_FLATMEM 659#ifdef CONFIG_FLATMEM
666 max_mapnr = num_physpages; 660 max_mapnr = num_physpages;
667#endif 661#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bbaaa005bf0e..a8a56ce3a962 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -608,7 +608,7 @@ kernel_physical_mapping_init(unsigned long start,
608#ifndef CONFIG_NUMA 608#ifndef CONFIG_NUMA
609void __init initmem_init(void) 609void __init initmem_init(void)
610{ 610{
611 memblock_x86_register_active_regions(0, 0, max_pfn); 611 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
612} 612}
613#endif 613#endif
614 614
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
deleted file mode 100644
index 992da5ec5a64..000000000000
--- a/arch/x86/mm/memblock.c
+++ /dev/null
@@ -1,348 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bitops.h>
5#include <linux/memblock.h>
6#include <linux/bootmem.h>
7#include <linux/mm.h>
8#include <linux/range.h>
9
10/* Check for already reserved areas */
11bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
12{
13 struct memblock_region *r;
14 u64 addr = *addrp, last;
15 u64 size = *sizep;
16 bool changed = false;
17
18again:
19 last = addr + size;
20 for_each_memblock(reserved, r) {
21 if (last > r->base && addr < r->base) {
22 size = r->base - addr;
23 changed = true;
24 goto again;
25 }
26 if (last > (r->base + r->size) && addr < (r->base + r->size)) {
27 addr = round_up(r->base + r->size, align);
28 size = last - addr;
29 changed = true;
30 goto again;
31 }
32 if (last <= (r->base + r->size) && addr >= r->base) {
33 *sizep = 0;
34 return false;
35 }
36 }
37 if (changed) {
38 *addrp = addr;
39 *sizep = size;
40 }
41 return changed;
42}
43
44/*
45 * Find next free range after start, and size is returned in *sizep
46 */
47u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
48{
49 struct memblock_region *r;
50
51 for_each_memblock(memory, r) {
52 u64 ei_start = r->base;
53 u64 ei_last = ei_start + r->size;
54 u64 addr;
55
56 addr = round_up(ei_start, align);
57 if (addr < start)
58 addr = round_up(start, align);
59 if (addr >= ei_last)
60 continue;
61 *sizep = ei_last - addr;
62 while (memblock_x86_check_reserved_size(&addr, sizep, align))
63 ;
64
65 if (*sizep)
66 return addr;
67 }
68
69 return MEMBLOCK_ERROR;
70}
71
72static __init struct range *find_range_array(int count)
73{
74 u64 end, size, mem;
75 struct range *range;
76
77 size = sizeof(struct range) * count;
78 end = memblock.current_limit;
79
80 mem = memblock_find_in_range(0, end, size, sizeof(struct range));
81 if (mem == MEMBLOCK_ERROR)
82 panic("can not find more space for range array");
83
84 /*
85 * This range is tempoaray, so don't reserve it, it will not be
86 * overlapped because We will not alloccate new buffer before
87 * We discard this one
88 */
89 range = __va(mem);
90 memset(range, 0, size);
91
92 return range;
93}
94
95static void __init memblock_x86_subtract_reserved(struct range *range, int az)
96{
97 u64 final_start, final_end;
98 struct memblock_region *r;
99
100 /* Take out region array itself at first*/
101 memblock_free_reserved_regions();
102
103 memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt);
104
105 for_each_memblock(reserved, r) {
106 memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1);
107 final_start = PFN_DOWN(r->base);
108 final_end = PFN_UP(r->base + r->size);
109 if (final_start >= final_end)
110 continue;
111 subtract_range(range, az, final_start, final_end);
112 }
113
114 /* Put region array back ? */
115 memblock_reserve_reserved_regions();
116}
117
118struct count_data {
119 int nr;
120};
121
122static int __init count_work_fn(unsigned long start_pfn,
123 unsigned long end_pfn, void *datax)
124{
125 struct count_data *data = datax;
126
127 data->nr++;
128
129 return 0;
130}
131
132static int __init count_early_node_map(int nodeid)
133{
134 struct count_data data;
135
136 data.nr = 0;
137 work_with_active_regions(nodeid, count_work_fn, &data);
138
139 return data.nr;
140}
141
142int __init __get_free_all_memory_range(struct range **rangep, int nodeid,
143 unsigned long start_pfn, unsigned long end_pfn)
144{
145 int count;
146 struct range *range;
147 int nr_range;
148
149 count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2;
150
151 range = find_range_array(count);
152 nr_range = 0;
153
154 /*
155 * Use early_node_map[] and memblock.reserved.region to get range array
156 * at first
157 */
158 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
159 subtract_range(range, count, 0, start_pfn);
160 subtract_range(range, count, end_pfn, -1ULL);
161
162 memblock_x86_subtract_reserved(range, count);
163 nr_range = clean_sort_range(range, count);
164
165 *rangep = range;
166 return nr_range;
167}
168
169int __init get_free_all_memory_range(struct range **rangep, int nodeid)
170{
171 unsigned long end_pfn = -1UL;
172
173#ifdef CONFIG_X86_32
174 end_pfn = max_low_pfn;
175#endif
176 return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn);
177}
178
179static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free)
180{
181 int i, count;
182 struct range *range;
183 int nr_range;
184 u64 final_start, final_end;
185 u64 free_size;
186 struct memblock_region *r;
187
188 count = (memblock.reserved.cnt + memblock.memory.cnt) * 2;
189
190 range = find_range_array(count);
191 nr_range = 0;
192
193 addr = PFN_UP(addr);
194 limit = PFN_DOWN(limit);
195
196 for_each_memblock(memory, r) {
197 final_start = PFN_UP(r->base);
198 final_end = PFN_DOWN(r->base + r->size);
199 if (final_start >= final_end)
200 continue;
201 if (final_start >= limit || final_end <= addr)
202 continue;
203
204 nr_range = add_range(range, count, nr_range, final_start, final_end);
205 }
206 subtract_range(range, count, 0, addr);
207 subtract_range(range, count, limit, -1ULL);
208
209 /* Subtract memblock.reserved.region in range ? */
210 if (!get_free)
211 goto sort_and_count_them;
212 for_each_memblock(reserved, r) {
213 final_start = PFN_DOWN(r->base);
214 final_end = PFN_UP(r->base + r->size);
215 if (final_start >= final_end)
216 continue;
217 if (final_start >= limit || final_end <= addr)
218 continue;
219
220 subtract_range(range, count, final_start, final_end);
221 }
222
223sort_and_count_them:
224 nr_range = clean_sort_range(range, count);
225
226 free_size = 0;
227 for (i = 0; i < nr_range; i++)
228 free_size += range[i].end - range[i].start;
229
230 return free_size << PAGE_SHIFT;
231}
232
233u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit)
234{
235 return __memblock_x86_memory_in_range(addr, limit, true);
236}
237
238u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit)
239{
240 return __memblock_x86_memory_in_range(addr, limit, false);
241}
242
243void __init memblock_x86_reserve_range(u64 start, u64 end, char *name)
244{
245 if (start == end)
246 return;
247
248 if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end))
249 return;
250
251 memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name);
252
253 memblock_reserve(start, end - start);
254}
255
256void __init memblock_x86_free_range(u64 start, u64 end)
257{
258 if (start == end)
259 return;
260
261 if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end))
262 return;
263
264 memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1);
265
266 memblock_free(start, end - start);
267}
268
269/*
270 * Need to call this function after memblock_x86_register_active_regions,
271 * so early_node_map[] is filled already.
272 */
273u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align)
274{
275 u64 addr;
276 addr = find_memory_core_early(nid, size, align, start, end);
277 if (addr != MEMBLOCK_ERROR)
278 return addr;
279
280 /* Fallback, should already have start end within node range */
281 return memblock_find_in_range(start, end, size, align);
282}
283
284/*
285 * Finds an active region in the address range from start_pfn to last_pfn and
286 * returns its range in ei_startpfn and ei_endpfn for the memblock entry.
287 */
288static int __init memblock_x86_find_active_region(const struct memblock_region *ei,
289 unsigned long start_pfn,
290 unsigned long last_pfn,
291 unsigned long *ei_startpfn,
292 unsigned long *ei_endpfn)
293{
294 u64 align = PAGE_SIZE;
295
296 *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
297 *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
298
299 /* Skip map entries smaller than a page */
300 if (*ei_startpfn >= *ei_endpfn)
301 return 0;
302
303 /* Skip if map is outside the node */
304 if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn)
305 return 0;
306
307 /* Check for overlaps */
308 if (*ei_startpfn < start_pfn)
309 *ei_startpfn = start_pfn;
310 if (*ei_endpfn > last_pfn)
311 *ei_endpfn = last_pfn;
312
313 return 1;
314}
315
316/* Walk the memblock.memory map and register active regions within a node */
317void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
318 unsigned long last_pfn)
319{
320 unsigned long ei_startpfn;
321 unsigned long ei_endpfn;
322 struct memblock_region *r;
323
324 for_each_memblock(memory, r)
325 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
326 &ei_startpfn, &ei_endpfn))
327 add_active_range(nid, ei_startpfn, ei_endpfn);
328}
329
330/*
331 * Find the hole size (in bytes) in the memory range.
332 * @start: starting address of the memory range to scan
333 * @end: ending address of the memory range to scan
334 */
335u64 __init memblock_x86_hole_size(u64 start, u64 end)
336{
337 unsigned long start_pfn = start >> PAGE_SHIFT;
338 unsigned long last_pfn = end >> PAGE_SHIFT;
339 unsigned long ei_startpfn, ei_endpfn, ram = 0;
340 struct memblock_region *r;
341
342 for_each_memblock(memory, r)
343 if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
344 &ei_startpfn, &ei_endpfn))
345 ram += ei_endpfn - ei_startpfn;
346
347 return end - start - ((u64)ram << PAGE_SHIFT);
348}
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 92faf3a1c53e..c80b9fb95734 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -34,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
34 (unsigned long long) pattern, 34 (unsigned long long) pattern,
35 (unsigned long long) start_bad, 35 (unsigned long long) start_bad,
36 (unsigned long long) end_bad); 36 (unsigned long long) end_bad);
37 memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); 37 memblock_reserve(start_bad, end_bad - start_bad);
38} 38}
39 39
40static void __init memtest(u64 pattern, u64 start_phys, u64 size) 40static void __init memtest(u64 pattern, u64 start_phys, u64 size)
@@ -70,24 +70,19 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size)
70 70
71static void __init do_one_pass(u64 pattern, u64 start, u64 end) 71static void __init do_one_pass(u64 pattern, u64 start, u64 end)
72{ 72{
73 u64 size = 0; 73 u64 i;
74 74 phys_addr_t this_start, this_end;
75 while (start < end) { 75
76 start = memblock_x86_find_in_range_size(start, &size, 1); 76 for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) {
77 77 this_start = clamp_t(phys_addr_t, this_start, start, end);
78 /* done ? */ 78 this_end = clamp_t(phys_addr_t, this_end, start, end);
79 if (start >= end) 79 if (this_start < this_end) {
80 break; 80 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
81 if (start + size > end) 81 (unsigned long long)this_start,
82 size = end - start; 82 (unsigned long long)this_end,
83 83 (unsigned long long)cpu_to_be64(pattern));
84 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", 84 memtest(pattern, this_start, this_end - this_start);
85 (unsigned long long) start, 85 }
86 (unsigned long long) start + size,
87 (unsigned long long) cpu_to_be64(pattern));
88 memtest(pattern, start, size);
89
90 start += size;
91 } 86 }
92} 87}
93 88
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fbeaaf416610..496f494593bf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,8 +192,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
192/* Initialize NODE_DATA for a node on the local memory */ 192/* Initialize NODE_DATA for a node on the local memory */
193static void __init setup_node_data(int nid, u64 start, u64 end) 193static void __init setup_node_data(int nid, u64 start, u64 end)
194{ 194{
195 const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
196 const u64 nd_high = PFN_PHYS(max_pfn_mapped);
197 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 195 const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
198 bool remapped = false; 196 bool remapped = false;
199 u64 nd_pa; 197 u64 nd_pa;
@@ -224,17 +222,12 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
224 nd_pa = __pa(nd); 222 nd_pa = __pa(nd);
225 remapped = true; 223 remapped = true;
226 } else { 224 } else {
227 nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high, 225 nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
228 nd_size, SMP_CACHE_BYTES); 226 if (!nd_pa) {
229 if (nd_pa == MEMBLOCK_ERROR)
230 nd_pa = memblock_find_in_range(nd_low, nd_high,
231 nd_size, SMP_CACHE_BYTES);
232 if (nd_pa == MEMBLOCK_ERROR) {
233 pr_err("Cannot find %zu bytes in node %d\n", 227 pr_err("Cannot find %zu bytes in node %d\n",
234 nd_size, nid); 228 nd_size, nid);
235 return; 229 return;
236 } 230 }
237 memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
238 nd = __va(nd_pa); 231 nd = __va(nd_pa);
239 } 232 }
240 233
@@ -371,8 +364,7 @@ void __init numa_reset_distance(void)
371 364
372 /* numa_distance could be 1LU marking allocation failure, test cnt */ 365 /* numa_distance could be 1LU marking allocation failure, test cnt */
373 if (numa_distance_cnt) 366 if (numa_distance_cnt)
374 memblock_x86_free_range(__pa(numa_distance), 367 memblock_free(__pa(numa_distance), size);
375 __pa(numa_distance) + size);
376 numa_distance_cnt = 0; 368 numa_distance_cnt = 0;
377 numa_distance = NULL; /* enable table creation */ 369 numa_distance = NULL; /* enable table creation */
378} 370}
@@ -395,13 +387,13 @@ static int __init numa_alloc_distance(void)
395 387
396 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 388 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
397 size, PAGE_SIZE); 389 size, PAGE_SIZE);
398 if (phys == MEMBLOCK_ERROR) { 390 if (!phys) {
399 pr_warning("NUMA: Warning: can't allocate distance table!\n"); 391 pr_warning("NUMA: Warning: can't allocate distance table!\n");
400 /* don't retry until explicitly reset */ 392 /* don't retry until explicitly reset */
401 numa_distance = (void *)1LU; 393 numa_distance = (void *)1LU;
402 return -ENOMEM; 394 return -ENOMEM;
403 } 395 }
404 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); 396 memblock_reserve(phys, size);
405 397
406 numa_distance = __va(phys); 398 numa_distance = __va(phys);
407 numa_distance_cnt = cnt; 399 numa_distance_cnt = cnt;
@@ -482,8 +474,8 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
482 numaram = 0; 474 numaram = 0;
483 } 475 }
484 476
485 e820ram = max_pfn - (memblock_x86_hole_size(0, 477 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
486 PFN_PHYS(max_pfn)) >> PAGE_SHIFT); 478
487 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ 479 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
488 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { 480 if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
489 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", 481 printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
@@ -505,13 +497,10 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
505 if (WARN_ON(nodes_empty(node_possible_map))) 497 if (WARN_ON(nodes_empty(node_possible_map)))
506 return -EINVAL; 498 return -EINVAL;
507 499
508 for (i = 0; i < mi->nr_blks; i++) 500 for (i = 0; i < mi->nr_blks; i++) {
509 memblock_x86_register_active_regions(mi->blk[i].nid, 501 struct numa_memblk *mb = &mi->blk[i];
510 mi->blk[i].start >> PAGE_SHIFT, 502 memblock_set_node(mb->start, mb->end - mb->start, mb->nid);
511 mi->blk[i].end >> PAGE_SHIFT); 503 }
512
513 /* for out of order entries */
514 sort_node_map();
515 504
516 /* 505 /*
517 * If sections array is gonna be used for pfn -> nid mapping, check 506 * If sections array is gonna be used for pfn -> nid mapping, check
@@ -545,6 +534,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
545 setup_node_data(nid, start, end); 534 setup_node_data(nid, start, end);
546 } 535 }
547 536
537 /* Dump memblock with node info and return. */
538 memblock_dump_all();
548 return 0; 539 return 0;
549} 540}
550 541
@@ -582,7 +573,7 @@ static int __init numa_init(int (*init_func)(void))
582 nodes_clear(node_possible_map); 573 nodes_clear(node_possible_map);
583 nodes_clear(node_online_map); 574 nodes_clear(node_online_map);
584 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 575 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
585 remove_all_active_ranges(); 576 WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
586 numa_reset_distance(); 577 numa_reset_distance();
587 578
588 ret = init_func(); 579 ret = init_func();
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 3adebe7e536a..534255a36b6b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -199,23 +199,23 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
199 199
200 /* allocate node memory and the lowmem remap area */ 200 /* allocate node memory and the lowmem remap area */
201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); 201 node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
202 if (node_pa == MEMBLOCK_ERROR) { 202 if (!node_pa) {
203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", 203 pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
204 size, nid); 204 size, nid);
205 return; 205 return;
206 } 206 }
207 memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); 207 memblock_reserve(node_pa, size);
208 208
209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, 209 remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
210 max_low_pfn << PAGE_SHIFT, 210 max_low_pfn << PAGE_SHIFT,
211 size, LARGE_PAGE_BYTES); 211 size, LARGE_PAGE_BYTES);
212 if (remap_pa == MEMBLOCK_ERROR) { 212 if (!remap_pa) {
213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", 213 pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
214 size, nid); 214 size, nid);
215 memblock_x86_free_range(node_pa, node_pa + size); 215 memblock_free(node_pa, size);
216 return; 216 return;
217 } 217 }
218 memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); 218 memblock_reserve(remap_pa, size);
219 remap_va = phys_to_virt(remap_pa); 219 remap_va = phys_to_virt(remap_pa);
220 220
221 /* perform actual remap */ 221 /* perform actual remap */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index dd27f401f0a0..92e27119ee1a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -19,7 +19,7 @@ unsigned long __init numa_free_all_bootmem(void)
19 for_each_online_node(i) 19 for_each_online_node(i)
20 pages += free_all_bootmem_node(NODE_DATA(i)); 20 pages += free_all_bootmem_node(NODE_DATA(i));
21 21
22 pages += free_all_memory_core_early(MAX_NUMNODES); 22 pages += free_low_memory_core_early(MAX_NUMNODES);
23 23
24 return pages; 24 return pages;
25} 25}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index d0ed086b6247..46db56845f18 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,6 +28,16 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
28 return -ENOENT; 28 return -ENOENT;
29} 29}
30 30
31static u64 mem_hole_size(u64 start, u64 end)
32{
33 unsigned long start_pfn = PFN_UP(start);
34 unsigned long end_pfn = PFN_DOWN(end);
35
36 if (start_pfn < end_pfn)
37 return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
38 return 0;
39}
40
31/* 41/*
32 * Sets up nid to range from @start to @end. The return value is -errno if 42 * Sets up nid to range from @start to @end. The return value is -errno if
33 * something went wrong, 0 otherwise. 43 * something went wrong, 0 otherwise.
@@ -89,7 +99,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
89 * Calculate target node size. x86_32 freaks on __udivdi3() so do 99 * Calculate target node size. x86_32 freaks on __udivdi3() so do
90 * the division in ulong number of pages and convert back. 100 * the division in ulong number of pages and convert back.
91 */ 101 */
92 size = max_addr - addr - memblock_x86_hole_size(addr, max_addr); 102 size = max_addr - addr - mem_hole_size(addr, max_addr);
93 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes); 103 size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
94 104
95 /* 105 /*
@@ -135,8 +145,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
135 * Continue to add memory to this fake node if its 145 * Continue to add memory to this fake node if its
136 * non-reserved memory is less than the per-node size. 146 * non-reserved memory is less than the per-node size.
137 */ 147 */
138 while (end - start - 148 while (end - start - mem_hole_size(start, end) < size) {
139 memblock_x86_hole_size(start, end) < size) {
140 end += FAKE_NODE_MIN_SIZE; 149 end += FAKE_NODE_MIN_SIZE;
141 if (end > limit) { 150 if (end > limit) {
142 end = limit; 151 end = limit;
@@ -150,7 +159,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
150 * this one must extend to the boundary. 159 * this one must extend to the boundary.
151 */ 160 */
152 if (end < dma32_end && dma32_end - end - 161 if (end < dma32_end && dma32_end - end -
153 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 162 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
154 end = dma32_end; 163 end = dma32_end;
155 164
156 /* 165 /*
@@ -158,8 +167,7 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
158 * next node, this one must extend to the end of the 167 * next node, this one must extend to the end of the
159 * physical node. 168 * physical node.
160 */ 169 */
161 if (limit - end - 170 if (limit - end - mem_hole_size(end, limit) < size)
162 memblock_x86_hole_size(end, limit) < size)
163 end = limit; 171 end = limit;
164 172
165 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, 173 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
@@ -180,7 +188,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
180{ 188{
181 u64 end = start + size; 189 u64 end = start + size;
182 190
183 while (end - start - memblock_x86_hole_size(start, end) < size) { 191 while (end - start - mem_hole_size(start, end) < size) {
184 end += FAKE_NODE_MIN_SIZE; 192 end += FAKE_NODE_MIN_SIZE;
185 if (end > max_addr) { 193 if (end > max_addr) {
186 end = max_addr; 194 end = max_addr;
@@ -211,8 +219,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
211 * creates a uniform distribution of node sizes across the entire 219 * creates a uniform distribution of node sizes across the entire
212 * machine (but not necessarily over physical nodes). 220 * machine (but not necessarily over physical nodes).
213 */ 221 */
214 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / 222 min_size = (max_addr - addr - mem_hole_size(addr, max_addr)) / MAX_NUMNODES;
215 MAX_NUMNODES;
216 min_size = max(min_size, FAKE_NODE_MIN_SIZE); 223 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
217 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) 224 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
218 min_size = (min_size + FAKE_NODE_MIN_SIZE) & 225 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
@@ -252,7 +259,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
252 * this one must extend to the boundary. 259 * this one must extend to the boundary.
253 */ 260 */
254 if (end < dma32_end && dma32_end - end - 261 if (end < dma32_end && dma32_end - end -
255 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 262 mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
256 end = dma32_end; 263 end = dma32_end;
257 264
258 /* 265 /*
@@ -260,8 +267,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
260 * next node, this one must extend to the end of the 267 * next node, this one must extend to the end of the
261 * physical node. 268 * physical node.
262 */ 269 */
263 if (limit - end - 270 if (limit - end - mem_hole_size(end, limit) < size)
264 memblock_x86_hole_size(end, limit) < size)
265 end = limit; 271 end = limit;
266 272
267 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, 273 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
@@ -351,11 +357,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
351 357
352 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 358 phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
353 phys_size, PAGE_SIZE); 359 phys_size, PAGE_SIZE);
354 if (phys == MEMBLOCK_ERROR) { 360 if (!phys) {
355 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); 361 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
356 goto no_emu; 362 goto no_emu;
357 } 363 }
358 memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); 364 memblock_reserve(phys, phys_size);
359 phys_dist = __va(phys); 365 phys_dist = __va(phys);
360 366
361 for (i = 0; i < numa_dist_cnt; i++) 367 for (i = 0; i < numa_dist_cnt; i++)
@@ -424,7 +430,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
424 430
425 /* free the copied physical distance table */ 431 /* free the copied physical distance table */
426 if (phys_dist) 432 if (phys_dist)
427 memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); 433 memblock_free(__pa(phys_dist), phys_size);
428 return; 434 return;
429 435
430no_emu: 436no_emu:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index bfab3fa10edc..7b65f752c5f8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -568,8 +568,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
568 break; 568 break;
569 } 569 }
570 if (filter[i].jt != 0) { 570 if (filter[i].jt != 0) {
571 if (filter[i].jf) 571 if (filter[i].jf && f_offset)
572 t_offset += is_near(f_offset) ? 2 : 6; 572 t_offset += is_near(f_offset) ? 2 : 5;
573 EMIT_COND_JMP(t_op, t_offset); 573 EMIT_COND_JMP(t_op, t_offset);
574 if (filter[i].jf) 574 if (filter[i].jf)
575 EMIT_JMP(f_offset); 575 EMIT_JMP(f_offset);
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 446902b2a6b6..1599f568f0e2 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \ 4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \ 5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \ 6 oprofilefs.o oprofile_stats.o \
7 timer_int.o ) 7 timer_int.o nmi_timer_int.o )
8 8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o 9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ 10oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
11 op_model_ppro.o op_model_p4.o 11 op_model_ppro.o op_model_p4.o
12oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index f148cf652678..9e138d00ad36 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -16,37 +16,23 @@
16 * with the NMI mode driver. 16 * with the NMI mode driver.
17 */ 17 */
18 18
19#ifdef CONFIG_X86_LOCAL_APIC
19extern int op_nmi_init(struct oprofile_operations *ops); 20extern int op_nmi_init(struct oprofile_operations *ops);
20extern int op_nmi_timer_init(struct oprofile_operations *ops);
21extern void op_nmi_exit(void); 21extern void op_nmi_exit(void);
22extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); 22#else
23static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
24static void op_nmi_exit(void) { }
25#endif
23 26
24static int nmi_timer; 27extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
25 28
26int __init oprofile_arch_init(struct oprofile_operations *ops) 29int __init oprofile_arch_init(struct oprofile_operations *ops)
27{ 30{
28 int ret;
29
30 ret = -ENODEV;
31
32#ifdef CONFIG_X86_LOCAL_APIC
33 ret = op_nmi_init(ops);
34#endif
35 nmi_timer = (ret != 0);
36#ifdef CONFIG_X86_IO_APIC
37 if (nmi_timer)
38 ret = op_nmi_timer_init(ops);
39#endif
40 ops->backtrace = x86_backtrace; 31 ops->backtrace = x86_backtrace;
41 32 return op_nmi_init(ops);
42 return ret;
43} 33}
44 34
45
46void oprofile_arch_exit(void) 35void oprofile_arch_exit(void)
47{ 36{
48#ifdef CONFIG_X86_LOCAL_APIC 37 op_nmi_exit();
49 if (!nmi_timer)
50 op_nmi_exit();
51#endif
52} 38}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 75f9528e0372..26b8a8514ee5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type)
595 return 0; 595 return 0;
596} 596}
597 597
598static int force_arch_perfmon; 598enum __force_cpu_type {
599static int force_cpu_type(const char *str, struct kernel_param *kp) 599 reserved = 0, /* do not force */
600 timer,
601 arch_perfmon,
602};
603
604static int force_cpu_type;
605
606static int set_cpu_type(const char *str, struct kernel_param *kp)
600{ 607{
601 if (!strcmp(str, "arch_perfmon")) { 608 if (!strcmp(str, "timer")) {
602 force_arch_perfmon = 1; 609 force_cpu_type = timer;
610 printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
611 } else if (!strcmp(str, "arch_perfmon")) {
612 force_cpu_type = arch_perfmon;
603 printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); 613 printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
614 } else {
615 force_cpu_type = 0;
604 } 616 }
605 617
606 return 0; 618 return 0;
607} 619}
608module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); 620module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
609 621
610static int __init ppro_init(char **cpu_type) 622static int __init ppro_init(char **cpu_type)
611{ 623{
612 __u8 cpu_model = boot_cpu_data.x86_model; 624 __u8 cpu_model = boot_cpu_data.x86_model;
613 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ 625 struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
614 626
615 if (force_arch_perfmon && cpu_has_arch_perfmon) 627 if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
616 return 0; 628 return 0;
617 629
618 /* 630 /*
@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
679 if (!cpu_has_apic) 691 if (!cpu_has_apic)
680 return -ENODEV; 692 return -ENODEV;
681 693
694 if (force_cpu_type == timer)
695 return -ENODEV;
696
682 switch (vendor) { 697 switch (vendor) {
683 case X86_VENDOR_AMD: 698 case X86_VENDOR_AMD:
684 /* Needs to be at least an Athlon (or hammer in 32bit mode) */ 699 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
deleted file mode 100644
index 7f8052cd6620..000000000000
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ /dev/null
@@ -1,50 +0,0 @@
1/**
2 * @file nmi_timer_int.c
3 *
4 * @remark Copyright 2003 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author Zwane Mwaikambo <zwane@linuxpower.ca>
8 */
9
10#include <linux/init.h>
11#include <linux/smp.h>
12#include <linux/errno.h>
13#include <linux/oprofile.h>
14#include <linux/rcupdate.h>
15#include <linux/kdebug.h>
16
17#include <asm/nmi.h>
18#include <asm/apic.h>
19#include <asm/ptrace.h>
20
21static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
22{
23 oprofile_add_sample(regs, 0);
24 return NMI_HANDLED;
25}
26
27static int timer_start(void)
28{
29 if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
30 0, "oprofile-timer"))
31 return 1;
32 return 0;
33}
34
35
36static void timer_stop(void)
37{
38 unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
39 synchronize_sched(); /* Allow already-started NMIs to complete. */
40}
41
42
43int __init op_nmi_timer_init(struct oprofile_operations *ops)
44{
45 ops->start = timer_start;
46 ops->stop = timer_stop;
47 ops->cpu_type = "timer";
48 printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
49 return 0;
50}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37718f0f053d..4a01967f02e7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -352,8 +352,7 @@ void __init efi_memblock_x86_reserve_range(void)
352 boot_params.efi_info.efi_memdesc_size; 352 boot_params.efi_info.efi_memdesc_size;
353 memmap.desc_version = boot_params.efi_info.efi_memdesc_version; 353 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
354 memmap.desc_size = boot_params.efi_info.efi_memdesc_size; 354 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
355 memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, 355 memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
356 "EFI memmap");
357} 356}
358 357
359#if EFI_DEBUG 358#if EFI_DEBUG
@@ -397,16 +396,14 @@ void __init efi_reserve_boot_services(void)
397 if ((start+size >= virt_to_phys(_text) 396 if ((start+size >= virt_to_phys(_text)
398 && start <= virt_to_phys(_end)) || 397 && start <= virt_to_phys(_end)) ||
399 !e820_all_mapped(start, start+size, E820_RAM) || 398 !e820_all_mapped(start, start+size, E820_RAM) ||
400 memblock_x86_check_reserved_size(&start, &size, 399 memblock_is_region_reserved(start, size)) {
401 1<<EFI_PAGE_SHIFT)) {
402 /* Could not reserve, skip it */ 400 /* Could not reserve, skip it */
403 md->num_pages = 0; 401 md->num_pages = 0;
404 memblock_dbg(PFX "Could not reserve boot range " 402 memblock_dbg(PFX "Could not reserve boot range "
405 "[0x%010llx-0x%010llx]\n", 403 "[0x%010llx-0x%010llx]\n",
406 start, start+size-1); 404 start, start+size-1);
407 } else 405 } else
408 memblock_x86_reserve_range(start, start+size, 406 memblock_reserve(start, size);
409 "EFI Boot");
410 } 407 }
411} 408}
412 409
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index f82082677337..d511aa97533a 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
18quiet_cmd_posttest = TEST $@ 18quiet_cmd_posttest = TEST $@
19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) 19 cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
20 20
21posttest: $(obj)/test_get_len vmlinux 21quiet_cmd_sanitytest = TEST $@
22 cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
23
24posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
22 $(call cmd,posttest) 25 $(call cmd,posttest)
26 $(call cmd,sanitytest)
23 27
24hostprogs-y := test_get_len 28hostprogs-y += test_get_len insn_sanity
25 29
26# -I needed for generated C source and C source which in the kernel tree. 30# -I needed for generated C source and C source which in the kernel tree.
27HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ 31HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
28 32
33HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
34
29# Dependencies are also needed. 35# Dependencies are also needed.
30$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c 36$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
31 37
38$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index eaf11f52fc0b..5f6a5b6c3a15 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -47,7 +47,7 @@ BEGIN {
47 sep_expr = "^\\|$" 47 sep_expr = "^\\|$"
48 group_expr = "^Grp[0-9A-Za-z]+" 48 group_expr = "^Grp[0-9A-Za-z]+"
49 49
50 imm_expr = "^[IJAO][a-z]" 50 imm_expr = "^[IJAOL][a-z]"
51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -59,6 +59,7 @@ BEGIN {
59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60 imm_flag["Ob"] = "INAT_MOFFSET" 60 imm_flag["Ob"] = "INAT_MOFFSET"
61 imm_flag["Ov"] = "INAT_MOFFSET" 61 imm_flag["Ov"] = "INAT_MOFFSET"
62 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
62 63
63 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 64 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
64 force64_expr = "\\([df]64\\)" 65 force64_expr = "\\([df]64\\)"
@@ -70,8 +71,12 @@ BEGIN {
70 lprefix3_expr = "\\(F2\\)" 71 lprefix3_expr = "\\(F2\\)"
71 max_lprefix = 4 72 max_lprefix = 4
72 73
73 vexok_expr = "\\(VEX\\)" 74 # All opcodes starting with lower-case 'v' or with (v1) superscript
74 vexonly_expr = "\\(oVEX\\)" 75 # accepts VEX prefix
76 vexok_opcode_expr = "^v.*"
77 vexok_expr = "\\(v1\\)"
78 # All opcodes with (v) superscript supports *only* VEX prefix
79 vexonly_expr = "\\(v\\)"
75 80
76 prefix_expr = "\\(Prefix\\)" 81 prefix_expr = "\\(Prefix\\)"
77 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 82 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -85,8 +90,8 @@ BEGIN {
85 prefix_num["SEG=GS"] = "INAT_PFX_GS" 90 prefix_num["SEG=GS"] = "INAT_PFX_GS"
86 prefix_num["SEG=SS"] = "INAT_PFX_SS" 91 prefix_num["SEG=SS"] = "INAT_PFX_SS"
87 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 92 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
88 prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" 93 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
89 prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" 94 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
90 95
91 clear_vars() 96 clear_vars()
92} 97}
@@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod)
310 if (match(opcode, fpu_expr)) 315 if (match(opcode, fpu_expr))
311 flags = add_flags(flags, "INAT_MODRM") 316 flags = add_flags(flags, "INAT_MODRM")
312 317
313 # check VEX only code 318 # check VEX codes
314 if (match(ext, vexonly_expr)) 319 if (match(ext, vexonly_expr))
315 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 320 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
316 321 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
317 # check VEX only code
318 if (match(ext, vexok_expr))
319 flags = add_flags(flags, "INAT_VEXOK") 322 flags = add_flags(flags, "INAT_VEXOK")
320 323
321 # check prefixes 324 # check prefixes
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
new file mode 100644
index 000000000000..cc2f8c131286
--- /dev/null
+++ b/arch/x86/tools/insn_sanity.c
@@ -0,0 +1,275 @@
1/*
2 * x86 decoder sanity test - based on test_get_insn.c
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 * Copyright (C) Hitachi, Ltd., 2011
20 */
21
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <assert.h>
26#include <unistd.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <fcntl.h>
30
31#define unlikely(cond) (cond)
32#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
33
34#include <asm/insn.h>
35#include <inat.c>
36#include <insn.c>
37
38/*
39 * Test of instruction analysis against tampering.
40 * Feed random binary to instruction decoder and ensure not to
41 * access out-of-instruction-buffer.
42 */
43
44#define DEFAULT_MAX_ITER 10000
45#define INSN_NOP 0x90
46
47static const char *prog; /* Program name */
48static int verbose; /* Verbosity */
49static int x86_64; /* x86-64 bit mode flag */
50static unsigned int seed; /* Random seed */
51static unsigned long iter_start; /* Start of iteration number */
52static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */
53static FILE *input_file; /* Input file name */
54
55static void usage(const char *err)
56{
57 if (err)
58 fprintf(stderr, "Error: %s\n\n", err);
59 fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
60 fprintf(stderr, "\t-y 64bit mode\n");
61 fprintf(stderr, "\t-n 32bit mode\n");
62 fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n");
63 fprintf(stderr, "\t-s Give a random seed (and iteration number)\n");
64 fprintf(stderr, "\t-m Give a maximum iteration number\n");
65 fprintf(stderr, "\t-i Give an input file with decoded binary\n");
66 exit(1);
67}
68
69static void dump_field(FILE *fp, const char *name, const char *indent,
70 struct insn_field *field)
71{
72 fprintf(fp, "%s.%s = {\n", indent, name);
73 fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
74 indent, field->value, field->bytes[0], field->bytes[1],
75 field->bytes[2], field->bytes[3]);
76 fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
77 field->got, field->nbytes);
78}
79
80static void dump_insn(FILE *fp, struct insn *insn)
81{
82 fprintf(fp, "Instruction = {\n");
83 dump_field(fp, "prefixes", "\t", &insn->prefixes);
84 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
85 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
86 dump_field(fp, "opcode", "\t", &insn->opcode);
87 dump_field(fp, "modrm", "\t", &insn->modrm);
88 dump_field(fp, "sib", "\t", &insn->sib);
89 dump_field(fp, "displacement", "\t", &insn->displacement);
90 dump_field(fp, "immediate1", "\t", &insn->immediate1);
91 dump_field(fp, "immediate2", "\t", &insn->immediate2);
92 fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
93 insn->attr, insn->opnd_bytes, insn->addr_bytes);
94 fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
95 insn->length, insn->x86_64, insn->kaddr);
96}
97
98static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
99 unsigned char *insn_buf, struct insn *insn)
100{
101 int i;
102
103 fprintf(fp, "%s:\n", msg);
104
105 dump_insn(fp, insn);
106
107 fprintf(fp, "You can reproduce this with below command(s);\n");
108
109 /* Input a decoded instruction sequence directly */
110 fprintf(fp, " $ echo ");
111 for (i = 0; i < MAX_INSN_SIZE; i++)
112 fprintf(fp, " %02x", insn_buf[i]);
113 fprintf(fp, " | %s -i -\n", prog);
114
115 if (!input_file) {
116 fprintf(fp, "Or \n");
117 /* Give a seed and iteration number */
118 fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
119 }
120}
121
122static void init_random_seed(void)
123{
124 int fd;
125
126 fd = open("/dev/urandom", O_RDONLY);
127 if (fd < 0)
128 goto fail;
129
130 if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
131 goto fail;
132
133 close(fd);
134 return;
135fail:
136 usage("Failed to open /dev/urandom");
137}
138
139/* Read given instruction sequence from the input file */
140static int read_next_insn(unsigned char *insn_buf)
141{
142 char buf[256] = "", *tmp;
143 int i;
144
145 tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
146 if (tmp == NULL || feof(input_file))
147 return 0;
148
149 for (i = 0; i < MAX_INSN_SIZE; i++) {
150 insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
151 if (*tmp != ' ')
152 break;
153 }
154
155 return i;
156}
157
158static int generate_insn(unsigned char *insn_buf)
159{
160 int i;
161
162 if (input_file)
163 return read_next_insn(insn_buf);
164
165 /* Fills buffer with random binary up to MAX_INSN_SIZE */
166 for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
167 *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
168
169 while (i < MAX_INSN_SIZE)
170 insn_buf[i++] = random() & 0xff;
171
172 return i;
173}
174
175static void parse_args(int argc, char **argv)
176{
177 int c;
178 char *tmp = NULL;
179 int set_seed = 0;
180
181 prog = argv[0];
182 while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
183 switch (c) {
184 case 'y':
185 x86_64 = 1;
186 break;
187 case 'n':
188 x86_64 = 0;
189 break;
190 case 'v':
191 verbose++;
192 break;
193 case 'i':
194 if (strcmp("-", optarg) == 0)
195 input_file = stdin;
196 else
197 input_file = fopen(optarg, "r");
198 if (!input_file)
199 usage("Failed to open input file");
200 break;
201 case 's':
202 seed = (unsigned int)strtoul(optarg, &tmp, 0);
203 if (*tmp == ',') {
204 optarg = tmp + 1;
205 iter_start = strtoul(optarg, &tmp, 0);
206 }
207 if (*tmp != '\0' || tmp == optarg)
208 usage("Failed to parse seed");
209 set_seed = 1;
210 break;
211 case 'm':
212 iter_end = strtoul(optarg, &tmp, 0);
213 if (*tmp != '\0' || tmp == optarg)
214 usage("Failed to parse max_iter");
215 break;
216 default:
217 usage(NULL);
218 }
219 }
220
221 /* Check errors */
222 if (iter_end < iter_start)
223 usage("Max iteration number must be bigger than iter-num");
224
225 if (set_seed && input_file)
226 usage("Don't use input file (-i) with random seed (-s)");
227
228 /* Initialize random seed */
229 if (!input_file) {
230 if (!set_seed) /* No seed is given */
231 init_random_seed();
232 srand(seed);
233 }
234}
235
236int main(int argc, char **argv)
237{
238 struct insn insn;
239 int insns = 0;
240 int errors = 0;
241 unsigned long i;
242 unsigned char insn_buf[MAX_INSN_SIZE * 2];
243
244 parse_args(argc, argv);
245
246 /* Prepare stop bytes with NOPs */
247 memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
248
249 for (i = 0; i < iter_end; i++) {
250 if (generate_insn(insn_buf) <= 0)
251 break;
252
253 if (i < iter_start) /* Skip to given iteration number */
254 continue;
255
256 /* Decode an instruction */
257 insn_init(&insn, insn_buf, x86_64);
258 insn_get_length(&insn);
259
260 if (insn.next_byte <= insn.kaddr ||
261 insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
262 /* Access out-of-range memory */
263 dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
264 errors++;
265 } else if (verbose && !insn_complete(&insn))
266 dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
267 else if (verbose >= 2)
268 dump_insn(stdout, &insn);
269 insns++;
270 }
271
272 fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
273
274 return errors ? 1 : 0;
275}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 1f928659c338..12eb07bfb267 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1215,8 +1215,6 @@ asmlinkage void __init xen_start_kernel(void)
1215 local_irq_disable(); 1215 local_irq_disable();
1216 early_boot_irqs_disabled = true; 1216 early_boot_irqs_disabled = true;
1217 1217
1218 memblock_init();
1219
1220 xen_raw_console_write("mapping kernel into physical memory\n"); 1218 xen_raw_console_write("mapping kernel into physical memory\n");
1221 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 1219 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1222 xen_ident_map_ISA(); 1220 xen_ident_map_ISA();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 87f6673b1207..f4bf8aa574f4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1774,10 +1774,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1774 __xen_write_cr3(true, __pa(pgd)); 1774 __xen_write_cr3(true, __pa(pgd));
1775 xen_mc_issue(PARAVIRT_LAZY_CPU); 1775 xen_mc_issue(PARAVIRT_LAZY_CPU);
1776 1776
1777 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1777 memblock_reserve(__pa(xen_start_info->pt_base),
1778 __pa(xen_start_info->pt_base + 1778 xen_start_info->nr_pt_frames * PAGE_SIZE);
1779 xen_start_info->nr_pt_frames * PAGE_SIZE),
1780 "XEN PAGETABLES");
1781 1779
1782 return pgd; 1780 return pgd;
1783} 1781}
@@ -1853,10 +1851,8 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
1853 PFN_DOWN(__pa(initial_page_table))); 1851 PFN_DOWN(__pa(initial_page_table)));
1854 xen_write_cr3(__pa(initial_page_table)); 1852 xen_write_cr3(__pa(initial_page_table));
1855 1853
1856 memblock_x86_reserve_range(__pa(xen_start_info->pt_base), 1854 memblock_reserve(__pa(xen_start_info->pt_base),
1857 __pa(xen_start_info->pt_base + 1855 xen_start_info->nr_pt_frames * PAGE_SIZE));
1858 xen_start_info->nr_pt_frames * PAGE_SIZE),
1859 "XEN PAGETABLES");
1860 1856
1861 return initial_page_table; 1857 return initial_page_table;
1862} 1858}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1093f80c162d..e03c63692176 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -75,7 +75,7 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
75 if (i == XEN_EXTRA_MEM_MAX_REGIONS) 75 if (i == XEN_EXTRA_MEM_MAX_REGIONS)
76 printk(KERN_WARNING "Warning: not enough extra memory regions\n"); 76 printk(KERN_WARNING "Warning: not enough extra memory regions\n");
77 77
78 memblock_x86_reserve_range(start, start + size, "XEN EXTRA"); 78 memblock_reserve(start, size);
79 79
80 xen_max_p2m_pfn = PFN_DOWN(start + size); 80 xen_max_p2m_pfn = PFN_DOWN(start + size);
81 81
@@ -173,9 +173,21 @@ static unsigned long __init xen_get_max_pages(void)
173 domid_t domid = DOMID_SELF; 173 domid_t domid = DOMID_SELF;
174 int ret; 174 int ret;
175 175
176 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); 176 /*
177 if (ret > 0) 177 * For the initial domain we use the maximum reservation as
178 max_pages = ret; 178 * the maximum page.
179 *
180 * For guest domains the current maximum reservation reflects
181 * the current maximum rather than the static maximum. In this
182 * case the e820 map provided to us will cover the static
183 * maximum region.
184 */
185 if (xen_initial_domain()) {
186 ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
187 if (ret > 0)
188 max_pages = ret;
189 }
190
179 return min(max_pages, MAX_DOMAIN_PAGES); 191 return min(max_pages, MAX_DOMAIN_PAGES);
180} 192}
181 193
@@ -299,9 +311,8 @@ char * __init xen_memory_setup(void)
299 * - xen_start_info 311 * - xen_start_info
300 * See comment above "struct start_info" in <xen/interface/xen.h> 312 * See comment above "struct start_info" in <xen/interface/xen.h>
301 */ 313 */
302 memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), 314 memblock_reserve(__pa(xen_start_info->mfn_list),
303 __pa(xen_start_info->pt_base), 315 xen_start_info->pt_base - xen_start_info->mfn_list);
304 "XEN START INFO");
305 316
306 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 317 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
307 318
diff --git a/block/blk-core.c b/block/blk-core.c
index ea70e6c80cd3..15de223c7f93 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -366,7 +366,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
366 if (drain_all) 366 if (drain_all)
367 blk_throtl_drain(q); 367 blk_throtl_drain(q);
368 368
369 __blk_run_queue(q); 369 /*
370 * This function might be called on a queue which failed
371 * driver init after queue creation. Some drivers
372 * (e.g. fd) get unhappy in such cases. Kick queue iff
373 * dispatch queue has something on it.
374 */
375 if (!list_empty(&q->queue_head))
376 __blk_run_queue(q);
370 377
371 if (drain_all) 378 if (drain_all)
372 nr_rqs = q->rq.count[0] + q->rq.count[1]; 379 nr_rqs = q->rq.count[0] + q->rq.count[1];
@@ -467,6 +474,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
467 q->backing_dev_info.state = 0; 474 q->backing_dev_info.state = 0;
468 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; 475 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
469 q->backing_dev_info.name = "block"; 476 q->backing_dev_info.name = "block";
477 q->node = node_id;
470 478
471 err = bdi_init(&q->backing_dev_info); 479 err = bdi_init(&q->backing_dev_info);
472 if (err) { 480 if (err) {
@@ -551,7 +559,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
551 if (!uninit_q) 559 if (!uninit_q)
552 return NULL; 560 return NULL;
553 561
554 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); 562 q = blk_init_allocated_queue(uninit_q, rfn, lock);
555 if (!q) 563 if (!q)
556 blk_cleanup_queue(uninit_q); 564 blk_cleanup_queue(uninit_q);
557 565
@@ -563,18 +571,9 @@ struct request_queue *
563blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, 571blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
564 spinlock_t *lock) 572 spinlock_t *lock)
565{ 573{
566 return blk_init_allocated_queue_node(q, rfn, lock, -1);
567}
568EXPORT_SYMBOL(blk_init_allocated_queue);
569
570struct request_queue *
571blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
572 spinlock_t *lock, int node_id)
573{
574 if (!q) 574 if (!q)
575 return NULL; 575 return NULL;
576 576
577 q->node = node_id;
578 if (blk_init_free_list(q)) 577 if (blk_init_free_list(q))
579 return NULL; 578 return NULL;
580 579
@@ -604,7 +603,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
604 603
605 return NULL; 604 return NULL;
606} 605}
607EXPORT_SYMBOL(blk_init_allocated_queue_node); 606EXPORT_SYMBOL(blk_init_allocated_queue);
608 607
609int blk_get_queue(struct request_queue *q) 608int blk_get_queue(struct request_queue *q)
610{ 609{
diff --git a/block/blk-map.c b/block/blk-map.c
index 164cd0059706..623e1cd4cffe 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -311,7 +311,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
311 if (IS_ERR(bio)) 311 if (IS_ERR(bio))
312 return PTR_ERR(bio); 312 return PTR_ERR(bio);
313 313
314 if (rq_data_dir(rq) == WRITE) 314 if (!reading)
315 bio->bi_rw |= REQ_WRITE; 315 bio->bi_rw |= REQ_WRITE;
316 316
317 if (do_copy) 317 if (do_copy)
diff --git a/block/blk-tag.c b/block/blk-tag.c
index e74d6d13838f..4af6f5cc1167 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -282,18 +282,9 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
282void blk_queue_end_tag(struct request_queue *q, struct request *rq) 282void blk_queue_end_tag(struct request_queue *q, struct request *rq)
283{ 283{
284 struct blk_queue_tag *bqt = q->queue_tags; 284 struct blk_queue_tag *bqt = q->queue_tags;
285 int tag = rq->tag; 285 unsigned tag = rq->tag; /* negative tags invalid */
286 286
287 BUG_ON(tag == -1); 287 BUG_ON(tag >= bqt->real_max_depth);
288
289 if (unlikely(tag >= bqt->max_depth)) {
290 /*
291 * This can happen after tag depth has been reduced.
292 * But tag shouldn't be larger than real_max_depth.
293 */
294 WARN_ON(tag >= bqt->real_max_depth);
295 return;
296 }
297 288
298 list_del_init(&rq->queuelist); 289 list_del_init(&rq->queuelist);
299 rq->cmd_flags &= ~REQ_QUEUED; 290 rq->cmd_flags &= ~REQ_QUEUED;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 16ace89613bc..3548705b04e4 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1655,6 +1655,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
1655 struct request *next) 1655 struct request *next)
1656{ 1656{
1657 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1657 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1658 struct cfq_data *cfqd = q->elevator->elevator_data;
1659
1658 /* 1660 /*
1659 * reposition in fifo if next is older than rq 1661 * reposition in fifo if next is older than rq
1660 */ 1662 */
@@ -1669,6 +1671,16 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
1669 cfq_remove_request(next); 1671 cfq_remove_request(next);
1670 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, 1672 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
1671 rq_data_dir(next), rq_is_sync(next)); 1673 rq_data_dir(next), rq_is_sync(next));
1674
1675 cfqq = RQ_CFQQ(next);
1676 /*
1677 * all requests of this queue are merged to other queues, delete it
1678 * from the service tree. If it's the active_queue,
1679 * cfq_dispatch_requests() will choose to expire it or do idle
1680 */
1681 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
1682 cfqq != cfqd->active_queue)
1683 cfq_del_cfqq_rr(cfqd, cfqq);
1672} 1684}
1673 1685
1674static int cfq_allow_merge(struct request_queue *q, struct request *rq, 1686static int cfq_allow_merge(struct request_queue *q, struct request *rq,
@@ -3184,7 +3196,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
3184 } 3196 }
3185 } 3197 }
3186 3198
3187 if (ret) 3199 if (ret && ret != -EEXIST)
3188 printk(KERN_ERR "cfq: cic link failed!\n"); 3200 printk(KERN_ERR "cfq: cic link failed!\n");
3189 3201
3190 return ret; 3202 return ret;
@@ -3200,6 +3212,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3200{ 3212{
3201 struct io_context *ioc = NULL; 3213 struct io_context *ioc = NULL;
3202 struct cfq_io_context *cic; 3214 struct cfq_io_context *cic;
3215 int ret;
3203 3216
3204 might_sleep_if(gfp_mask & __GFP_WAIT); 3217 might_sleep_if(gfp_mask & __GFP_WAIT);
3205 3218
@@ -3207,6 +3220,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3207 if (!ioc) 3220 if (!ioc)
3208 return NULL; 3221 return NULL;
3209 3222
3223retry:
3210 cic = cfq_cic_lookup(cfqd, ioc); 3224 cic = cfq_cic_lookup(cfqd, ioc);
3211 if (cic) 3225 if (cic)
3212 goto out; 3226 goto out;
@@ -3215,7 +3229,12 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3215 if (cic == NULL) 3229 if (cic == NULL)
3216 goto err; 3230 goto err;
3217 3231
3218 if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) 3232 ret = cfq_cic_link(cfqd, ioc, cic, gfp_mask);
3233 if (ret == -EEXIST) {
3234 /* someone has linked cic to ioc already */
3235 cfq_cic_free(cic);
3236 goto retry;
3237 } else if (ret)
3219 goto err_free; 3238 goto err_free;
3220 3239
3221out: 3240out:
@@ -4036,6 +4055,11 @@ static void *cfq_init_queue(struct request_queue *q)
4036 4055
4037 if (blkio_alloc_blkg_stats(&cfqg->blkg)) { 4056 if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
4038 kfree(cfqg); 4057 kfree(cfqg);
4058
4059 spin_lock(&cic_index_lock);
4060 ida_remove(&cic_index_ida, cfqd->cic_index);
4061 spin_unlock(&cic_index_lock);
4062
4039 kfree(cfqd); 4063 kfree(cfqd);
4040 return NULL; 4064 return NULL;
4041 } 4065 }
diff --git a/block/ioctl.c b/block/ioctl.c
index ca939fc1030f..d510c2a4eff8 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -180,6 +180,26 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
180EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); 180EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
181 181
182/* 182/*
183 * Is it an unrecognized ioctl? The correct returns are either
184 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
185 * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
186 * code before returning.
187 *
188 * Confused drivers sometimes return EINVAL, which is wrong. It
189 * means "I understood the ioctl command, but the parameters to
190 * it were wrong".
191 *
192 * We should aim to just fix the broken drivers, the EINVAL case
193 * should go away.
194 */
195static inline int is_unrecognized_ioctl(int ret)
196{
197 return ret == -EINVAL ||
198 ret == -ENOTTY ||
199 ret == -ENOIOCTLCMD;
200}
201
202/*
183 * always keep this in sync with compat_blkdev_ioctl() 203 * always keep this in sync with compat_blkdev_ioctl()
184 */ 204 */
185int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, 205int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
@@ -196,8 +216,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
196 return -EACCES; 216 return -EACCES;
197 217
198 ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 218 ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
199 /* -EINVAL to handle old uncorrected drivers */ 219 if (!is_unrecognized_ioctl(ret))
200 if (ret != -EINVAL && ret != -ENOTTY)
201 return ret; 220 return ret;
202 221
203 fsync_bdev(bdev); 222 fsync_bdev(bdev);
@@ -206,8 +225,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
206 225
207 case BLKROSET: 226 case BLKROSET:
208 ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 227 ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
209 /* -EINVAL to handle old uncorrected drivers */ 228 if (!is_unrecognized_ioctl(ret))
210 if (ret != -EINVAL && ret != -ENOTTY)
211 return ret; 229 return ret;
212 if (!capable(CAP_SYS_ADMIN)) 230 if (!capable(CAP_SYS_ADMIN))
213 return -EACCES; 231 return -EACCES;
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 6bdedd7cca2c..cf047c406d92 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -820,7 +820,7 @@ config PATA_PLATFORM
820 820
821config PATA_OF_PLATFORM 821config PATA_OF_PLATFORM
822 tristate "OpenFirmware platform device PATA support" 822 tristate "OpenFirmware platform device PATA support"
823 depends on PATA_PLATFORM && OF 823 depends on PATA_PLATFORM && OF && OF_IRQ
824 help 824 help
825 This option enables support for generic directly connected ATA 825 This option enables support for generic directly connected ATA
826 devices commonly found on embedded systems with OpenFirmware 826 devices commonly found on embedded systems with OpenFirmware
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 251acea3d359..3991502b21e5 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -247,6 +247,13 @@ struct sys_device *get_cpu_sysdev(unsigned cpu)
247} 247}
248EXPORT_SYMBOL_GPL(get_cpu_sysdev); 248EXPORT_SYMBOL_GPL(get_cpu_sysdev);
249 249
250bool cpu_is_hotpluggable(unsigned cpu)
251{
252 struct sys_device *dev = get_cpu_sysdev(cpu);
253 return dev && container_of(dev, struct cpu, sysdev)->hotpluggable;
254}
255EXPORT_SYMBOL_GPL(cpu_is_hotpluggable);
256
250int __init cpu_dev_init(void) 257int __init cpu_dev_init(void)
251{ 258{
252 int err; 259 int err;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 8004ac30a7a8..587cce57adae 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2601,6 +2601,8 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
2601 c->Request.Timeout = 0; 2601 c->Request.Timeout = 0;
2602 c->Request.CDB[0] = BMIC_WRITE; 2602 c->Request.CDB[0] = BMIC_WRITE;
2603 c->Request.CDB[6] = BMIC_CACHE_FLUSH; 2603 c->Request.CDB[6] = BMIC_CACHE_FLUSH;
2604 c->Request.CDB[7] = (size >> 8) & 0xFF;
2605 c->Request.CDB[8] = size & 0xFF;
2604 break; 2606 break;
2605 case TEST_UNIT_READY: 2607 case TEST_UNIT_READY:
2606 c->Request.CDBLen = 6; 2608 c->Request.CDBLen = 6;
@@ -4880,7 +4882,7 @@ static int cciss_request_irq(ctlr_info_t *h,
4880{ 4882{
4881 if (h->msix_vector || h->msi_vector) { 4883 if (h->msix_vector || h->msi_vector) {
4882 if (!request_irq(h->intr[h->intr_mode], msixhandler, 4884 if (!request_irq(h->intr[h->intr_mode], msixhandler,
4883 IRQF_DISABLED, h->devname, h)) 4885 0, h->devname, h))
4884 return 0; 4886 return 0;
4885 dev_err(&h->pdev->dev, "Unable to get msi irq %d" 4887 dev_err(&h->pdev->dev, "Unable to get msi irq %d"
4886 " for %s\n", h->intr[h->intr_mode], 4888 " for %s\n", h->intr[h->intr_mode],
@@ -4889,7 +4891,7 @@ static int cciss_request_irq(ctlr_info_t *h,
4889 } 4891 }
4890 4892
4891 if (!request_irq(h->intr[h->intr_mode], intxhandler, 4893 if (!request_irq(h->intr[h->intr_mode], intxhandler,
4892 IRQF_DISABLED, h->devname, h)) 4894 IRQF_SHARED, h->devname, h))
4893 return 0; 4895 return 0;
4894 dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n", 4896 dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
4895 h->intr[h->intr_mode], h->devname); 4897 h->intr[h->intr_mode], h->devname);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 68b205a9338f..1e888c9e85b3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -422,7 +422,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
422 422
423 /* 423 /*
424 * We use punch hole to reclaim the free space used by the 424 * We use punch hole to reclaim the free space used by the
425 * image a.k.a. discard. However we do support discard if 425 * image a.k.a. discard. However we do not support discard if
426 * encryption is enabled, because it may give an attacker 426 * encryption is enabled, because it may give an attacker
427 * useful information. 427 * useful information.
428 */ 428 */
@@ -797,7 +797,7 @@ static void loop_config_discard(struct loop_device *lo)
797 } 797 }
798 798
799 q->limits.discard_granularity = inode->i_sb->s_blocksize; 799 q->limits.discard_granularity = inode->i_sb->s_blocksize;
800 q->limits.discard_alignment = inode->i_sb->s_blocksize; 800 q->limits.discard_alignment = 0;
801 q->limits.max_discard_sectors = UINT_MAX >> 9; 801 q->limits.max_discard_sectors = UINT_MAX >> 9;
802 q->limits.discard_zeroes_data = 1; 802 q->limits.discard_zeroes_data = 1;
803 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 803 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ae3e167e17ad..89ddab127e33 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -16,6 +16,8 @@
16 * handle GCR disks 16 * handle GCR disks
17 */ 17 */
18 18
19#undef DEBUG
20
19#include <linux/stddef.h> 21#include <linux/stddef.h>
20#include <linux/kernel.h> 22#include <linux/kernel.h>
21#include <linux/sched.h> 23#include <linux/sched.h>
@@ -36,13 +38,11 @@
36#include <asm/machdep.h> 38#include <asm/machdep.h>
37#include <asm/pmac_feature.h> 39#include <asm/pmac_feature.h>
38 40
39static DEFINE_MUTEX(swim3_mutex);
40static struct request_queue *swim3_queue;
41static struct gendisk *disks[2];
42static struct request *fd_req;
43
44#define MAX_FLOPPIES 2 41#define MAX_FLOPPIES 2
45 42
43static DEFINE_MUTEX(swim3_mutex);
44static struct gendisk *disks[MAX_FLOPPIES];
45
46enum swim_state { 46enum swim_state {
47 idle, 47 idle,
48 locating, 48 locating,
@@ -177,7 +177,6 @@ struct swim3 {
177 177
178struct floppy_state { 178struct floppy_state {
179 enum swim_state state; 179 enum swim_state state;
180 spinlock_t lock;
181 struct swim3 __iomem *swim3; /* hardware registers */ 180 struct swim3 __iomem *swim3; /* hardware registers */
182 struct dbdma_regs __iomem *dma; /* DMA controller registers */ 181 struct dbdma_regs __iomem *dma; /* DMA controller registers */
183 int swim3_intr; /* interrupt number for SWIM3 */ 182 int swim3_intr; /* interrupt number for SWIM3 */
@@ -204,8 +203,20 @@ struct floppy_state {
204 int wanted; 203 int wanted;
205 struct macio_dev *mdev; 204 struct macio_dev *mdev;
206 char dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)]; 205 char dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)];
206 int index;
207 struct request *cur_req;
207}; 208};
208 209
210#define swim3_err(fmt, arg...) dev_err(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg)
211#define swim3_warn(fmt, arg...) dev_warn(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg)
212#define swim3_info(fmt, arg...) dev_info(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg)
213
214#ifdef DEBUG
215#define swim3_dbg(fmt, arg...) dev_dbg(&fs->mdev->ofdev.dev, "[fd%d] " fmt, fs->index, arg)
216#else
217#define swim3_dbg(fmt, arg...) do { } while(0)
218#endif
219
209static struct floppy_state floppy_states[MAX_FLOPPIES]; 220static struct floppy_state floppy_states[MAX_FLOPPIES];
210static int floppy_count = 0; 221static int floppy_count = 0;
211static DEFINE_SPINLOCK(swim3_lock); 222static DEFINE_SPINLOCK(swim3_lock);
@@ -224,17 +235,8 @@ static unsigned short write_postamble[] = {
224 0, 0, 0, 0, 0, 0 235 0, 0, 0, 0, 0, 0
225}; 236};
226 237
227static void swim3_select(struct floppy_state *fs, int sel);
228static void swim3_action(struct floppy_state *fs, int action);
229static int swim3_readbit(struct floppy_state *fs, int bit);
230static void do_fd_request(struct request_queue * q);
231static void start_request(struct floppy_state *fs);
232static void set_timeout(struct floppy_state *fs, int nticks,
233 void (*proc)(unsigned long));
234static void scan_track(struct floppy_state *fs);
235static void seek_track(struct floppy_state *fs, int n); 238static void seek_track(struct floppy_state *fs, int n);
236static void init_dma(struct dbdma_cmd *cp, int cmd, void *buf, int count); 239static void init_dma(struct dbdma_cmd *cp, int cmd, void *buf, int count);
237static void setup_transfer(struct floppy_state *fs);
238static void act(struct floppy_state *fs); 240static void act(struct floppy_state *fs);
239static void scan_timeout(unsigned long data); 241static void scan_timeout(unsigned long data);
240static void seek_timeout(unsigned long data); 242static void seek_timeout(unsigned long data);
@@ -254,18 +256,21 @@ static unsigned int floppy_check_events(struct gendisk *disk,
254 unsigned int clearing); 256 unsigned int clearing);
255static int floppy_revalidate(struct gendisk *disk); 257static int floppy_revalidate(struct gendisk *disk);
256 258
257static bool swim3_end_request(int err, unsigned int nr_bytes) 259static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
258{ 260{
259 if (__blk_end_request(fd_req, err, nr_bytes)) 261 struct request *req = fs->cur_req;
260 return true; 262 int rc;
261 263
262 fd_req = NULL; 264 swim3_dbg(" end request, err=%d nr_bytes=%d, cur_req=%p\n",
263 return false; 265 err, nr_bytes, req);
264}
265 266
266static bool swim3_end_request_cur(int err) 267 if (err)
267{ 268 nr_bytes = blk_rq_cur_bytes(req);
268 return swim3_end_request(err, blk_rq_cur_bytes(fd_req)); 269 rc = __blk_end_request(req, err, nr_bytes);
270 if (rc)
271 return true;
272 fs->cur_req = NULL;
273 return false;
269} 274}
270 275
271static void swim3_select(struct floppy_state *fs, int sel) 276static void swim3_select(struct floppy_state *fs, int sel)
@@ -303,50 +308,53 @@ static int swim3_readbit(struct floppy_state *fs, int bit)
303 return (stat & DATA) == 0; 308 return (stat & DATA) == 0;
304} 309}
305 310
306static void do_fd_request(struct request_queue * q)
307{
308 int i;
309
310 for(i=0; i<floppy_count; i++) {
311 struct floppy_state *fs = &floppy_states[i];
312 if (fs->mdev->media_bay &&
313 check_media_bay(fs->mdev->media_bay) != MB_FD)
314 continue;
315 start_request(fs);
316 }
317}
318
319static void start_request(struct floppy_state *fs) 311static void start_request(struct floppy_state *fs)
320{ 312{
321 struct request *req; 313 struct request *req;
322 unsigned long x; 314 unsigned long x;
323 315
316 swim3_dbg("start request, initial state=%d\n", fs->state);
317
324 if (fs->state == idle && fs->wanted) { 318 if (fs->state == idle && fs->wanted) {
325 fs->state = available; 319 fs->state = available;
326 wake_up(&fs->wait); 320 wake_up(&fs->wait);
327 return; 321 return;
328 } 322 }
329 while (fs->state == idle) { 323 while (fs->state == idle) {
330 if (!fd_req) { 324 swim3_dbg("start request, idle loop, cur_req=%p\n", fs->cur_req);
331 fd_req = blk_fetch_request(swim3_queue); 325 if (!fs->cur_req) {
332 if (!fd_req) 326 fs->cur_req = blk_fetch_request(disks[fs->index]->queue);
327 swim3_dbg(" fetched request %p\n", fs->cur_req);
328 if (!fs->cur_req)
333 break; 329 break;
334 } 330 }
335 req = fd_req; 331 req = fs->cur_req;
336#if 0 332
337 printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", 333 if (fs->mdev->media_bay &&
338 req->rq_disk->disk_name, req->cmd, 334 check_media_bay(fs->mdev->media_bay) != MB_FD) {
339 (long)blk_rq_pos(req), blk_rq_sectors(req), req->buffer); 335 swim3_dbg("%s", " media bay absent, dropping req\n");
340 printk(" errors=%d current_nr_sectors=%u\n", 336 swim3_end_request(fs, -ENODEV, 0);
341 req->errors, blk_rq_cur_sectors(req)); 337 continue;
338 }
339
340#if 0 /* This is really too verbose */
341 swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
342 req->rq_disk->disk_name, req->cmd,
343 (long)blk_rq_pos(req), blk_rq_sectors(req),
344 req->buffer);
345 swim3_dbg(" errors=%d current_nr_sectors=%u\n",
346 req->errors, blk_rq_cur_sectors(req));
342#endif 347#endif
343 348
344 if (blk_rq_pos(req) >= fs->total_secs) { 349 if (blk_rq_pos(req) >= fs->total_secs) {
345 swim3_end_request_cur(-EIO); 350 swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
351 (long)blk_rq_pos(req), (long)fs->total_secs);
352 swim3_end_request(fs, -EIO, 0);
346 continue; 353 continue;
347 } 354 }
348 if (fs->ejected) { 355 if (fs->ejected) {
349 swim3_end_request_cur(-EIO); 356 swim3_dbg("%s", " disk ejected\n");
357 swim3_end_request(fs, -EIO, 0);
350 continue; 358 continue;
351 } 359 }
352 360
@@ -354,7 +362,8 @@ static void start_request(struct floppy_state *fs)
354 if (fs->write_prot < 0) 362 if (fs->write_prot < 0)
355 fs->write_prot = swim3_readbit(fs, WRITE_PROT); 363 fs->write_prot = swim3_readbit(fs, WRITE_PROT);
356 if (fs->write_prot) { 364 if (fs->write_prot) {
357 swim3_end_request_cur(-EIO); 365 swim3_dbg("%s", " try to write, disk write protected\n");
366 swim3_end_request(fs, -EIO, 0);
358 continue; 367 continue;
359 } 368 }
360 } 369 }
@@ -369,7 +378,6 @@ static void start_request(struct floppy_state *fs)
369 x = ((long)blk_rq_pos(req)) % fs->secpercyl; 378 x = ((long)blk_rq_pos(req)) % fs->secpercyl;
370 fs->head = x / fs->secpertrack; 379 fs->head = x / fs->secpertrack;
371 fs->req_sector = x % fs->secpertrack + 1; 380 fs->req_sector = x % fs->secpertrack + 1;
372 fd_req = req;
373 fs->state = do_transfer; 381 fs->state = do_transfer;
374 fs->retries = 0; 382 fs->retries = 0;
375 383
@@ -377,12 +385,14 @@ static void start_request(struct floppy_state *fs)
377 } 385 }
378} 386}
379 387
388static void do_fd_request(struct request_queue * q)
389{
390 start_request(q->queuedata);
391}
392
380static void set_timeout(struct floppy_state *fs, int nticks, 393static void set_timeout(struct floppy_state *fs, int nticks,
381 void (*proc)(unsigned long)) 394 void (*proc)(unsigned long))
382{ 395{
383 unsigned long flags;
384
385 spin_lock_irqsave(&fs->lock, flags);
386 if (fs->timeout_pending) 396 if (fs->timeout_pending)
387 del_timer(&fs->timeout); 397 del_timer(&fs->timeout);
388 fs->timeout.expires = jiffies + nticks; 398 fs->timeout.expires = jiffies + nticks;
@@ -390,7 +400,6 @@ static void set_timeout(struct floppy_state *fs, int nticks,
390 fs->timeout.data = (unsigned long) fs; 400 fs->timeout.data = (unsigned long) fs;
391 add_timer(&fs->timeout); 401 add_timer(&fs->timeout);
392 fs->timeout_pending = 1; 402 fs->timeout_pending = 1;
393 spin_unlock_irqrestore(&fs->lock, flags);
394} 403}
395 404
396static inline void scan_track(struct floppy_state *fs) 405static inline void scan_track(struct floppy_state *fs)
@@ -442,40 +451,45 @@ static inline void setup_transfer(struct floppy_state *fs)
442 struct swim3 __iomem *sw = fs->swim3; 451 struct swim3 __iomem *sw = fs->swim3;
443 struct dbdma_cmd *cp = fs->dma_cmd; 452 struct dbdma_cmd *cp = fs->dma_cmd;
444 struct dbdma_regs __iomem *dr = fs->dma; 453 struct dbdma_regs __iomem *dr = fs->dma;
454 struct request *req = fs->cur_req;
445 455
446 if (blk_rq_cur_sectors(fd_req) <= 0) { 456 if (blk_rq_cur_sectors(req) <= 0) {
447 printk(KERN_ERR "swim3: transfer 0 sectors?\n"); 457 swim3_warn("%s", "Transfer 0 sectors ?\n");
448 return; 458 return;
449 } 459 }
450 if (rq_data_dir(fd_req) == WRITE) 460 if (rq_data_dir(req) == WRITE)
451 n = 1; 461 n = 1;
452 else { 462 else {
453 n = fs->secpertrack - fs->req_sector + 1; 463 n = fs->secpertrack - fs->req_sector + 1;
454 if (n > blk_rq_cur_sectors(fd_req)) 464 if (n > blk_rq_cur_sectors(req))
455 n = blk_rq_cur_sectors(fd_req); 465 n = blk_rq_cur_sectors(req);
456 } 466 }
467
468 swim3_dbg(" setup xfer at sect %d (of %d) head %d for %d\n",
469 fs->req_sector, fs->secpertrack, fs->head, n);
470
457 fs->scount = n; 471 fs->scount = n;
458 swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0); 472 swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0);
459 out_8(&sw->sector, fs->req_sector); 473 out_8(&sw->sector, fs->req_sector);
460 out_8(&sw->nsect, n); 474 out_8(&sw->nsect, n);
461 out_8(&sw->gap3, 0); 475 out_8(&sw->gap3, 0);
462 out_le32(&dr->cmdptr, virt_to_bus(cp)); 476 out_le32(&dr->cmdptr, virt_to_bus(cp));
463 if (rq_data_dir(fd_req) == WRITE) { 477 if (rq_data_dir(req) == WRITE) {
464 /* Set up 3 dma commands: write preamble, data, postamble */ 478 /* Set up 3 dma commands: write preamble, data, postamble */
465 init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble)); 479 init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble));
466 ++cp; 480 ++cp;
467 init_dma(cp, OUTPUT_MORE, fd_req->buffer, 512); 481 init_dma(cp, OUTPUT_MORE, req->buffer, 512);
468 ++cp; 482 ++cp;
469 init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble)); 483 init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble));
470 } else { 484 } else {
471 init_dma(cp, INPUT_LAST, fd_req->buffer, n * 512); 485 init_dma(cp, INPUT_LAST, req->buffer, n * 512);
472 } 486 }
473 ++cp; 487 ++cp;
474 out_le16(&cp->command, DBDMA_STOP); 488 out_le16(&cp->command, DBDMA_STOP);
475 out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS); 489 out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
476 in_8(&sw->error); 490 in_8(&sw->error);
477 out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS); 491 out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
478 if (rq_data_dir(fd_req) == WRITE) 492 if (rq_data_dir(req) == WRITE)
479 out_8(&sw->control_bis, WRITE_SECTORS); 493 out_8(&sw->control_bis, WRITE_SECTORS);
480 in_8(&sw->intr); 494 in_8(&sw->intr);
481 out_le32(&dr->control, (RUN << 16) | RUN); 495 out_le32(&dr->control, (RUN << 16) | RUN);
@@ -488,12 +502,16 @@ static inline void setup_transfer(struct floppy_state *fs)
488static void act(struct floppy_state *fs) 502static void act(struct floppy_state *fs)
489{ 503{
490 for (;;) { 504 for (;;) {
505 swim3_dbg(" act loop, state=%d, req_cyl=%d, cur_cyl=%d\n",
506 fs->state, fs->req_cyl, fs->cur_cyl);
507
491 switch (fs->state) { 508 switch (fs->state) {
492 case idle: 509 case idle:
493 return; /* XXX shouldn't get here */ 510 return; /* XXX shouldn't get here */
494 511
495 case locating: 512 case locating:
496 if (swim3_readbit(fs, TRACK_ZERO)) { 513 if (swim3_readbit(fs, TRACK_ZERO)) {
514 swim3_dbg("%s", " locate track 0\n");
497 fs->cur_cyl = 0; 515 fs->cur_cyl = 0;
498 if (fs->req_cyl == 0) 516 if (fs->req_cyl == 0)
499 fs->state = do_transfer; 517 fs->state = do_transfer;
@@ -511,7 +529,7 @@ static void act(struct floppy_state *fs)
511 break; 529 break;
512 } 530 }
513 if (fs->req_cyl == fs->cur_cyl) { 531 if (fs->req_cyl == fs->cur_cyl) {
514 printk("whoops, seeking 0\n"); 532 swim3_warn("%s", "Whoops, seeking 0\n");
515 fs->state = do_transfer; 533 fs->state = do_transfer;
516 break; 534 break;
517 } 535 }
@@ -527,7 +545,9 @@ static void act(struct floppy_state *fs)
527 case do_transfer: 545 case do_transfer:
528 if (fs->cur_cyl != fs->req_cyl) { 546 if (fs->cur_cyl != fs->req_cyl) {
529 if (fs->retries > 5) { 547 if (fs->retries > 5) {
530 swim3_end_request_cur(-EIO); 548 swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
549 fs->req_cyl, fs->cur_cyl);
550 swim3_end_request(fs, -EIO, 0);
531 fs->state = idle; 551 fs->state = idle;
532 return; 552 return;
533 } 553 }
@@ -542,7 +562,7 @@ static void act(struct floppy_state *fs)
542 return; 562 return;
543 563
544 default: 564 default:
545 printk(KERN_ERR"swim3: unknown state %d\n", fs->state); 565 swim3_err("Unknown state %d\n", fs->state);
546 return; 566 return;
547 } 567 }
548 } 568 }
@@ -552,59 +572,75 @@ static void scan_timeout(unsigned long data)
552{ 572{
553 struct floppy_state *fs = (struct floppy_state *) data; 573 struct floppy_state *fs = (struct floppy_state *) data;
554 struct swim3 __iomem *sw = fs->swim3; 574 struct swim3 __iomem *sw = fs->swim3;
575 unsigned long flags;
576
577 swim3_dbg("* scan timeout, state=%d\n", fs->state);
555 578
579 spin_lock_irqsave(&swim3_lock, flags);
556 fs->timeout_pending = 0; 580 fs->timeout_pending = 0;
557 out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS); 581 out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
558 out_8(&sw->select, RELAX); 582 out_8(&sw->select, RELAX);
559 out_8(&sw->intr_enable, 0); 583 out_8(&sw->intr_enable, 0);
560 fs->cur_cyl = -1; 584 fs->cur_cyl = -1;
561 if (fs->retries > 5) { 585 if (fs->retries > 5) {
562 swim3_end_request_cur(-EIO); 586 swim3_end_request(fs, -EIO, 0);
563 fs->state = idle; 587 fs->state = idle;
564 start_request(fs); 588 start_request(fs);
565 } else { 589 } else {
566 fs->state = jogging; 590 fs->state = jogging;
567 act(fs); 591 act(fs);
568 } 592 }
593 spin_unlock_irqrestore(&swim3_lock, flags);
569} 594}
570 595
571static void seek_timeout(unsigned long data) 596static void seek_timeout(unsigned long data)
572{ 597{
573 struct floppy_state *fs = (struct floppy_state *) data; 598 struct floppy_state *fs = (struct floppy_state *) data;
574 struct swim3 __iomem *sw = fs->swim3; 599 struct swim3 __iomem *sw = fs->swim3;
600 unsigned long flags;
601
602 swim3_dbg("* seek timeout, state=%d\n", fs->state);
575 603
604 spin_lock_irqsave(&swim3_lock, flags);
576 fs->timeout_pending = 0; 605 fs->timeout_pending = 0;
577 out_8(&sw->control_bic, DO_SEEK); 606 out_8(&sw->control_bic, DO_SEEK);
578 out_8(&sw->select, RELAX); 607 out_8(&sw->select, RELAX);
579 out_8(&sw->intr_enable, 0); 608 out_8(&sw->intr_enable, 0);
580 printk(KERN_ERR "swim3: seek timeout\n"); 609 swim3_err("%s", "Seek timeout\n");
581 swim3_end_request_cur(-EIO); 610 swim3_end_request(fs, -EIO, 0);
582 fs->state = idle; 611 fs->state = idle;
583 start_request(fs); 612 start_request(fs);
613 spin_unlock_irqrestore(&swim3_lock, flags);
584} 614}
585 615
586static void settle_timeout(unsigned long data) 616static void settle_timeout(unsigned long data)
587{ 617{
588 struct floppy_state *fs = (struct floppy_state *) data; 618 struct floppy_state *fs = (struct floppy_state *) data;
589 struct swim3 __iomem *sw = fs->swim3; 619 struct swim3 __iomem *sw = fs->swim3;
620 unsigned long flags;
621
622 swim3_dbg("* settle timeout, state=%d\n", fs->state);
590 623
624 spin_lock_irqsave(&swim3_lock, flags);
591 fs->timeout_pending = 0; 625 fs->timeout_pending = 0;
592 if (swim3_readbit(fs, SEEK_COMPLETE)) { 626 if (swim3_readbit(fs, SEEK_COMPLETE)) {
593 out_8(&sw->select, RELAX); 627 out_8(&sw->select, RELAX);
594 fs->state = locating; 628 fs->state = locating;
595 act(fs); 629 act(fs);
596 return; 630 goto unlock;
597 } 631 }
598 out_8(&sw->select, RELAX); 632 out_8(&sw->select, RELAX);
599 if (fs->settle_time < 2*HZ) { 633 if (fs->settle_time < 2*HZ) {
600 ++fs->settle_time; 634 ++fs->settle_time;
601 set_timeout(fs, 1, settle_timeout); 635 set_timeout(fs, 1, settle_timeout);
602 return; 636 goto unlock;
603 } 637 }
604 printk(KERN_ERR "swim3: seek settle timeout\n"); 638 swim3_err("%s", "Seek settle timeout\n");
605 swim3_end_request_cur(-EIO); 639 swim3_end_request(fs, -EIO, 0);
606 fs->state = idle; 640 fs->state = idle;
607 start_request(fs); 641 start_request(fs);
642 unlock:
643 spin_unlock_irqrestore(&swim3_lock, flags);
608} 644}
609 645
610static void xfer_timeout(unsigned long data) 646static void xfer_timeout(unsigned long data)
@@ -612,8 +648,12 @@ static void xfer_timeout(unsigned long data)
612 struct floppy_state *fs = (struct floppy_state *) data; 648 struct floppy_state *fs = (struct floppy_state *) data;
613 struct swim3 __iomem *sw = fs->swim3; 649 struct swim3 __iomem *sw = fs->swim3;
614 struct dbdma_regs __iomem *dr = fs->dma; 650 struct dbdma_regs __iomem *dr = fs->dma;
651 unsigned long flags;
615 int n; 652 int n;
616 653
654 swim3_dbg("* xfer timeout, state=%d\n", fs->state);
655
656 spin_lock_irqsave(&swim3_lock, flags);
617 fs->timeout_pending = 0; 657 fs->timeout_pending = 0;
618 out_le32(&dr->control, RUN << 16); 658 out_le32(&dr->control, RUN << 16);
619 /* We must wait a bit for dbdma to stop */ 659 /* We must wait a bit for dbdma to stop */
@@ -622,12 +662,13 @@ static void xfer_timeout(unsigned long data)
622 out_8(&sw->intr_enable, 0); 662 out_8(&sw->intr_enable, 0);
623 out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION); 663 out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
624 out_8(&sw->select, RELAX); 664 out_8(&sw->select, RELAX);
625 printk(KERN_ERR "swim3: timeout %sing sector %ld\n", 665 swim3_err("Timeout %sing sector %ld\n",
626 (rq_data_dir(fd_req)==WRITE? "writ": "read"), 666 (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
627 (long)blk_rq_pos(fd_req)); 667 (long)blk_rq_pos(fs->cur_req));
628 swim3_end_request_cur(-EIO); 668 swim3_end_request(fs, -EIO, 0);
629 fs->state = idle; 669 fs->state = idle;
630 start_request(fs); 670 start_request(fs);
671 spin_unlock_irqrestore(&swim3_lock, flags);
631} 672}
632 673
633static irqreturn_t swim3_interrupt(int irq, void *dev_id) 674static irqreturn_t swim3_interrupt(int irq, void *dev_id)
@@ -638,12 +679,17 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
638 int stat, resid; 679 int stat, resid;
639 struct dbdma_regs __iomem *dr; 680 struct dbdma_regs __iomem *dr;
640 struct dbdma_cmd *cp; 681 struct dbdma_cmd *cp;
682 unsigned long flags;
683 struct request *req = fs->cur_req;
684
685 swim3_dbg("* interrupt, state=%d\n", fs->state);
641 686
687 spin_lock_irqsave(&swim3_lock, flags);
642 intr = in_8(&sw->intr); 688 intr = in_8(&sw->intr);
643 err = (intr & ERROR_INTR)? in_8(&sw->error): 0; 689 err = (intr & ERROR_INTR)? in_8(&sw->error): 0;
644 if ((intr & ERROR_INTR) && fs->state != do_transfer) 690 if ((intr & ERROR_INTR) && fs->state != do_transfer)
645 printk(KERN_ERR "swim3_interrupt, state=%d, dir=%x, intr=%x, err=%x\n", 691 swim3_err("Non-transfer error interrupt: state=%d, dir=%x, intr=%x, err=%x\n",
646 fs->state, rq_data_dir(fd_req), intr, err); 692 fs->state, rq_data_dir(req), intr, err);
647 switch (fs->state) { 693 switch (fs->state) {
648 case locating: 694 case locating:
649 if (intr & SEEN_SECTOR) { 695 if (intr & SEEN_SECTOR) {
@@ -653,10 +699,10 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
653 del_timer(&fs->timeout); 699 del_timer(&fs->timeout);
654 fs->timeout_pending = 0; 700 fs->timeout_pending = 0;
655 if (sw->ctrack == 0xff) { 701 if (sw->ctrack == 0xff) {
656 printk(KERN_ERR "swim3: seen sector but cyl=ff?\n"); 702 swim3_err("%s", "Seen sector but cyl=ff?\n");
657 fs->cur_cyl = -1; 703 fs->cur_cyl = -1;
658 if (fs->retries > 5) { 704 if (fs->retries > 5) {
659 swim3_end_request_cur(-EIO); 705 swim3_end_request(fs, -EIO, 0);
660 fs->state = idle; 706 fs->state = idle;
661 start_request(fs); 707 start_request(fs);
662 } else { 708 } else {
@@ -668,8 +714,8 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
668 fs->cur_cyl = sw->ctrack; 714 fs->cur_cyl = sw->ctrack;
669 fs->cur_sector = sw->csect; 715 fs->cur_sector = sw->csect;
670 if (fs->expect_cyl != -1 && fs->expect_cyl != fs->cur_cyl) 716 if (fs->expect_cyl != -1 && fs->expect_cyl != fs->cur_cyl)
671 printk(KERN_ERR "swim3: expected cyl %d, got %d\n", 717 swim3_err("Expected cyl %d, got %d\n",
672 fs->expect_cyl, fs->cur_cyl); 718 fs->expect_cyl, fs->cur_cyl);
673 fs->state = do_transfer; 719 fs->state = do_transfer;
674 act(fs); 720 act(fs);
675 } 721 }
@@ -704,7 +750,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
704 fs->timeout_pending = 0; 750 fs->timeout_pending = 0;
705 dr = fs->dma; 751 dr = fs->dma;
706 cp = fs->dma_cmd; 752 cp = fs->dma_cmd;
707 if (rq_data_dir(fd_req) == WRITE) 753 if (rq_data_dir(req) == WRITE)
708 ++cp; 754 ++cp;
709 /* 755 /*
710 * Check that the main data transfer has finished. 756 * Check that the main data transfer has finished.
@@ -729,31 +775,32 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
729 if (intr & ERROR_INTR) { 775 if (intr & ERROR_INTR) {
730 n = fs->scount - 1 - resid / 512; 776 n = fs->scount - 1 - resid / 512;
731 if (n > 0) { 777 if (n > 0) {
732 blk_update_request(fd_req, 0, n << 9); 778 blk_update_request(req, 0, n << 9);
733 fs->req_sector += n; 779 fs->req_sector += n;
734 } 780 }
735 if (fs->retries < 5) { 781 if (fs->retries < 5) {
736 ++fs->retries; 782 ++fs->retries;
737 act(fs); 783 act(fs);
738 } else { 784 } else {
739 printk("swim3: error %sing block %ld (err=%x)\n", 785 swim3_err("Error %sing block %ld (err=%x)\n",
740 rq_data_dir(fd_req) == WRITE? "writ": "read", 786 rq_data_dir(req) == WRITE? "writ": "read",
741 (long)blk_rq_pos(fd_req), err); 787 (long)blk_rq_pos(req), err);
742 swim3_end_request_cur(-EIO); 788 swim3_end_request(fs, -EIO, 0);
743 fs->state = idle; 789 fs->state = idle;
744 } 790 }
745 } else { 791 } else {
746 if ((stat & ACTIVE) == 0 || resid != 0) { 792 if ((stat & ACTIVE) == 0 || resid != 0) {
747 /* musta been an error */ 793 /* musta been an error */
748 printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid); 794 swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
749 printk(KERN_ERR " state=%d, dir=%x, intr=%x, err=%x\n", 795 swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n",
750 fs->state, rq_data_dir(fd_req), intr, err); 796 fs->state, rq_data_dir(req), intr, err);
751 swim3_end_request_cur(-EIO); 797 swim3_end_request(fs, -EIO, 0);
752 fs->state = idle; 798 fs->state = idle;
753 start_request(fs); 799 start_request(fs);
754 break; 800 break;
755 } 801 }
756 if (swim3_end_request(0, fs->scount << 9)) { 802 fs->retries = 0;
803 if (swim3_end_request(fs, 0, fs->scount << 9)) {
757 fs->req_sector += fs->scount; 804 fs->req_sector += fs->scount;
758 if (fs->req_sector > fs->secpertrack) { 805 if (fs->req_sector > fs->secpertrack) {
759 fs->req_sector -= fs->secpertrack; 806 fs->req_sector -= fs->secpertrack;
@@ -770,8 +817,9 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
770 start_request(fs); 817 start_request(fs);
771 break; 818 break;
772 default: 819 default:
773 printk(KERN_ERR "swim3: don't know what to do in state %d\n", fs->state); 820 swim3_err("Don't know what to do in state %d\n", fs->state);
774 } 821 }
822 spin_unlock_irqrestore(&swim3_lock, flags);
775 return IRQ_HANDLED; 823 return IRQ_HANDLED;
776} 824}
777 825
@@ -781,26 +829,31 @@ static void fd_dma_interrupt(int irq, void *dev_id)
781} 829}
782*/ 830*/
783 831
832/* Called under the mutex to grab exclusive access to a drive */
784static int grab_drive(struct floppy_state *fs, enum swim_state state, 833static int grab_drive(struct floppy_state *fs, enum swim_state state,
785 int interruptible) 834 int interruptible)
786{ 835{
787 unsigned long flags; 836 unsigned long flags;
788 837
789 spin_lock_irqsave(&fs->lock, flags); 838 swim3_dbg("%s", "-> grab drive\n");
790 if (fs->state != idle) { 839
840 spin_lock_irqsave(&swim3_lock, flags);
841 if (fs->state != idle && fs->state != available) {
791 ++fs->wanted; 842 ++fs->wanted;
792 while (fs->state != available) { 843 while (fs->state != available) {
844 spin_unlock_irqrestore(&swim3_lock, flags);
793 if (interruptible && signal_pending(current)) { 845 if (interruptible && signal_pending(current)) {
794 --fs->wanted; 846 --fs->wanted;
795 spin_unlock_irqrestore(&fs->lock, flags);
796 return -EINTR; 847 return -EINTR;
797 } 848 }
798 interruptible_sleep_on(&fs->wait); 849 interruptible_sleep_on(&fs->wait);
850 spin_lock_irqsave(&swim3_lock, flags);
799 } 851 }
800 --fs->wanted; 852 --fs->wanted;
801 } 853 }
802 fs->state = state; 854 fs->state = state;
803 spin_unlock_irqrestore(&fs->lock, flags); 855 spin_unlock_irqrestore(&swim3_lock, flags);
856
804 return 0; 857 return 0;
805} 858}
806 859
@@ -808,10 +861,12 @@ static void release_drive(struct floppy_state *fs)
808{ 861{
809 unsigned long flags; 862 unsigned long flags;
810 863
811 spin_lock_irqsave(&fs->lock, flags); 864 swim3_dbg("%s", "-> release drive\n");
865
866 spin_lock_irqsave(&swim3_lock, flags);
812 fs->state = idle; 867 fs->state = idle;
813 start_request(fs); 868 start_request(fs);
814 spin_unlock_irqrestore(&fs->lock, flags); 869 spin_unlock_irqrestore(&swim3_lock, flags);
815} 870}
816 871
817static int fd_eject(struct floppy_state *fs) 872static int fd_eject(struct floppy_state *fs)
@@ -966,6 +1021,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
966{ 1021{
967 struct floppy_state *fs = disk->private_data; 1022 struct floppy_state *fs = disk->private_data;
968 struct swim3 __iomem *sw = fs->swim3; 1023 struct swim3 __iomem *sw = fs->swim3;
1024
969 mutex_lock(&swim3_mutex); 1025 mutex_lock(&swim3_mutex);
970 if (fs->ref_count > 0 && --fs->ref_count == 0) { 1026 if (fs->ref_count > 0 && --fs->ref_count == 0) {
971 swim3_action(fs, MOTOR_OFF); 1027 swim3_action(fs, MOTOR_OFF);
@@ -1031,30 +1087,48 @@ static const struct block_device_operations floppy_fops = {
1031 .revalidate_disk= floppy_revalidate, 1087 .revalidate_disk= floppy_revalidate,
1032}; 1088};
1033 1089
1090static void swim3_mb_event(struct macio_dev* mdev, int mb_state)
1091{
1092 struct floppy_state *fs = macio_get_drvdata(mdev);
1093 struct swim3 __iomem *sw = fs->swim3;
1094
1095 if (!fs)
1096 return;
1097 if (mb_state != MB_FD)
1098 return;
1099
1100 /* Clear state */
1101 out_8(&sw->intr_enable, 0);
1102 in_8(&sw->intr);
1103 in_8(&sw->error);
1104}
1105
1034static int swim3_add_device(struct macio_dev *mdev, int index) 1106static int swim3_add_device(struct macio_dev *mdev, int index)
1035{ 1107{
1036 struct device_node *swim = mdev->ofdev.dev.of_node; 1108 struct device_node *swim = mdev->ofdev.dev.of_node;
1037 struct floppy_state *fs = &floppy_states[index]; 1109 struct floppy_state *fs = &floppy_states[index];
1038 int rc = -EBUSY; 1110 int rc = -EBUSY;
1039 1111
1112 /* Do this first for message macros */
1113 memset(fs, 0, sizeof(*fs));
1114 fs->mdev = mdev;
1115 fs->index = index;
1116
1040 /* Check & Request resources */ 1117 /* Check & Request resources */
1041 if (macio_resource_count(mdev) < 2) { 1118 if (macio_resource_count(mdev) < 2) {
1042 printk(KERN_WARNING "ifd%d: no address for %s\n", 1119 swim3_err("%s", "No address in device-tree\n");
1043 index, swim->full_name);
1044 return -ENXIO; 1120 return -ENXIO;
1045 } 1121 }
1046 if (macio_irq_count(mdev) < 2) { 1122 if (macio_irq_count(mdev) < 1) {
1047 printk(KERN_WARNING "fd%d: no intrs for device %s\n", 1123 swim3_err("%s", "No interrupt in device-tree\n");
1048 index, swim->full_name); 1124 return -ENXIO;
1049 } 1125 }
1050 if (macio_request_resource(mdev, 0, "swim3 (mmio)")) { 1126 if (macio_request_resource(mdev, 0, "swim3 (mmio)")) {
1051 printk(KERN_ERR "fd%d: can't request mmio resource for %s\n", 1127 swim3_err("%s", "Can't request mmio resource\n");
1052 index, swim->full_name);
1053 return -EBUSY; 1128 return -EBUSY;
1054 } 1129 }
1055 if (macio_request_resource(mdev, 1, "swim3 (dma)")) { 1130 if (macio_request_resource(mdev, 1, "swim3 (dma)")) {
1056 printk(KERN_ERR "fd%d: can't request dma resource for %s\n", 1131 swim3_err("%s", "Can't request dma resource\n");
1057 index, swim->full_name);
1058 macio_release_resource(mdev, 0); 1132 macio_release_resource(mdev, 0);
1059 return -EBUSY; 1133 return -EBUSY;
1060 } 1134 }
@@ -1063,22 +1137,18 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
1063 if (mdev->media_bay == NULL) 1137 if (mdev->media_bay == NULL)
1064 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1); 1138 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1);
1065 1139
1066 memset(fs, 0, sizeof(*fs));
1067 spin_lock_init(&fs->lock);
1068 fs->state = idle; 1140 fs->state = idle;
1069 fs->swim3 = (struct swim3 __iomem *) 1141 fs->swim3 = (struct swim3 __iomem *)
1070 ioremap(macio_resource_start(mdev, 0), 0x200); 1142 ioremap(macio_resource_start(mdev, 0), 0x200);
1071 if (fs->swim3 == NULL) { 1143 if (fs->swim3 == NULL) {
1072 printk("fd%d: couldn't map registers for %s\n", 1144 swim3_err("%s", "Couldn't map mmio registers\n");
1073 index, swim->full_name);
1074 rc = -ENOMEM; 1145 rc = -ENOMEM;
1075 goto out_release; 1146 goto out_release;
1076 } 1147 }
1077 fs->dma = (struct dbdma_regs __iomem *) 1148 fs->dma = (struct dbdma_regs __iomem *)
1078 ioremap(macio_resource_start(mdev, 1), 0x200); 1149 ioremap(macio_resource_start(mdev, 1), 0x200);
1079 if (fs->dma == NULL) { 1150 if (fs->dma == NULL) {
1080 printk("fd%d: couldn't map DMA for %s\n", 1151 swim3_err("%s", "Couldn't map dma registers\n");
1081 index, swim->full_name);
1082 iounmap(fs->swim3); 1152 iounmap(fs->swim3);
1083 rc = -ENOMEM; 1153 rc = -ENOMEM;
1084 goto out_release; 1154 goto out_release;
@@ -1090,31 +1160,25 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
1090 fs->secpercyl = 36; 1160 fs->secpercyl = 36;
1091 fs->secpertrack = 18; 1161 fs->secpertrack = 18;
1092 fs->total_secs = 2880; 1162 fs->total_secs = 2880;
1093 fs->mdev = mdev;
1094 init_waitqueue_head(&fs->wait); 1163 init_waitqueue_head(&fs->wait);
1095 1164
1096 fs->dma_cmd = (struct dbdma_cmd *) DBDMA_ALIGN(fs->dbdma_cmd_space); 1165 fs->dma_cmd = (struct dbdma_cmd *) DBDMA_ALIGN(fs->dbdma_cmd_space);
1097 memset(fs->dma_cmd, 0, 2 * sizeof(struct dbdma_cmd)); 1166 memset(fs->dma_cmd, 0, 2 * sizeof(struct dbdma_cmd));
1098 st_le16(&fs->dma_cmd[1].command, DBDMA_STOP); 1167 st_le16(&fs->dma_cmd[1].command, DBDMA_STOP);
1099 1168
1169 if (mdev->media_bay == NULL || check_media_bay(mdev->media_bay) == MB_FD)
1170 swim3_mb_event(mdev, MB_FD);
1171
1100 if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) { 1172 if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) {
1101 printk(KERN_ERR "fd%d: couldn't request irq %d for %s\n", 1173 swim3_err("%s", "Couldn't request interrupt\n");
1102 index, fs->swim3_intr, swim->full_name);
1103 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0); 1174 pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
1104 goto out_unmap; 1175 goto out_unmap;
1105 return -EBUSY; 1176 return -EBUSY;
1106 } 1177 }
1107/*
1108 if (request_irq(fs->dma_intr, fd_dma_interrupt, 0, "SWIM3-dma", fs)) {
1109 printk(KERN_ERR "Couldn't get irq %d for SWIM3 DMA",
1110 fs->dma_intr);
1111 return -EBUSY;
1112 }
1113*/
1114 1178
1115 init_timer(&fs->timeout); 1179 init_timer(&fs->timeout);
1116 1180
1117 printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count, 1181 swim3_info("SWIM3 floppy controller %s\n",
1118 mdev->media_bay ? "in media bay" : ""); 1182 mdev->media_bay ? "in media bay" : "");
1119 1183
1120 return 0; 1184 return 0;
@@ -1132,41 +1196,42 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
1132 1196
1133static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device_id *match) 1197static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device_id *match)
1134{ 1198{
1135 int i, rc;
1136 struct gendisk *disk; 1199 struct gendisk *disk;
1200 int index, rc;
1201
1202 index = floppy_count++;
1203 if (index >= MAX_FLOPPIES)
1204 return -ENXIO;
1137 1205
1138 /* Add the drive */ 1206 /* Add the drive */
1139 rc = swim3_add_device(mdev, floppy_count); 1207 rc = swim3_add_device(mdev, index);
1140 if (rc) 1208 if (rc)
1141 return rc; 1209 return rc;
1210 /* Now register that disk. Same comment about failure handling */
1211 disk = disks[index] = alloc_disk(1);
1212 if (disk == NULL)
1213 return -ENOMEM;
1214 disk->queue = blk_init_queue(do_fd_request, &swim3_lock);
1215 if (disk->queue == NULL) {
1216 put_disk(disk);
1217 return -ENOMEM;
1218 }
1219 disk->queue->queuedata = &floppy_states[index];
1142 1220
1143 /* Now create the queue if not there yet */ 1221 if (index == 0) {
1144 if (swim3_queue == NULL) {
1145 /* If we failed, there isn't much we can do as the driver is still 1222 /* If we failed, there isn't much we can do as the driver is still
1146 * too dumb to remove the device, just bail out 1223 * too dumb to remove the device, just bail out
1147 */ 1224 */
1148 if (register_blkdev(FLOPPY_MAJOR, "fd")) 1225 if (register_blkdev(FLOPPY_MAJOR, "fd"))
1149 return 0; 1226 return 0;
1150 swim3_queue = blk_init_queue(do_fd_request, &swim3_lock);
1151 if (swim3_queue == NULL) {
1152 unregister_blkdev(FLOPPY_MAJOR, "fd");
1153 return 0;
1154 }
1155 } 1227 }
1156 1228
1157 /* Now register that disk. Same comment about failure handling */
1158 i = floppy_count++;
1159 disk = disks[i] = alloc_disk(1);
1160 if (disk == NULL)
1161 return 0;
1162
1163 disk->major = FLOPPY_MAJOR; 1229 disk->major = FLOPPY_MAJOR;
1164 disk->first_minor = i; 1230 disk->first_minor = index;
1165 disk->fops = &floppy_fops; 1231 disk->fops = &floppy_fops;
1166 disk->private_data = &floppy_states[i]; 1232 disk->private_data = &floppy_states[index];
1167 disk->queue = swim3_queue;
1168 disk->flags |= GENHD_FL_REMOVABLE; 1233 disk->flags |= GENHD_FL_REMOVABLE;
1169 sprintf(disk->disk_name, "fd%d", i); 1234 sprintf(disk->disk_name, "fd%d", index);
1170 set_capacity(disk, 2880); 1235 set_capacity(disk, 2880);
1171 add_disk(disk); 1236 add_disk(disk);
1172 1237
@@ -1194,6 +1259,9 @@ static struct macio_driver swim3_driver =
1194 .of_match_table = swim3_match, 1259 .of_match_table = swim3_match,
1195 }, 1260 },
1196 .probe = swim3_attach, 1261 .probe = swim3_attach,
1262#ifdef CONFIG_PMAC_MEDIABAY
1263 .mediabay_event = swim3_mb_event,
1264#endif
1197#if 0 1265#if 0
1198 .suspend = swim3_suspend, 1266 .suspend = swim3_suspend,
1199 .resume = swim3_resume, 1267 .resume = swim3_resume,
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 11b41fd40c27..5ccf142ef0b8 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -188,7 +188,7 @@ config BT_MRVL
188 The core driver to support Marvell Bluetooth devices. 188 The core driver to support Marvell Bluetooth devices.
189 189
190 This driver is required if you want to support 190 This driver is required if you want to support
191 Marvell Bluetooth devices, such as 8688/8787. 191 Marvell Bluetooth devices, such as 8688/8787/8797.
192 192
193 Say Y here to compile Marvell Bluetooth driver 193 Say Y here to compile Marvell Bluetooth driver
194 into the kernel or say M to compile it as module. 194 into the kernel or say M to compile it as module.
@@ -201,8 +201,8 @@ config BT_MRVL_SDIO
201 The driver for Marvell Bluetooth chipsets with SDIO interface. 201 The driver for Marvell Bluetooth chipsets with SDIO interface.
202 202
203 This driver is required if you want to use Marvell Bluetooth 203 This driver is required if you want to use Marvell Bluetooth
204 devices with SDIO interface. Currently SD8688/SD8787 chipsets are 204 devices with SDIO interface. Currently SD8688/SD8787/SD8797
205 supported. 205 chipsets are supported.
206 206
207 Say Y here to compile support for Marvell BT-over-SDIO driver 207 Say Y here to compile support for Marvell BT-over-SDIO driver
208 into the kernel or say M to compile it as module. 208 into the kernel or say M to compile it as module.
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 9ef48167e2cf..27b74b0d547b 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -65,7 +65,7 @@ static const struct btmrvl_sdio_card_reg btmrvl_reg_8688 = {
65 .io_port_1 = 0x01, 65 .io_port_1 = 0x01,
66 .io_port_2 = 0x02, 66 .io_port_2 = 0x02,
67}; 67};
68static const struct btmrvl_sdio_card_reg btmrvl_reg_8787 = { 68static const struct btmrvl_sdio_card_reg btmrvl_reg_87xx = {
69 .cfg = 0x00, 69 .cfg = 0x00,
70 .host_int_mask = 0x02, 70 .host_int_mask = 0x02,
71 .host_intstatus = 0x03, 71 .host_intstatus = 0x03,
@@ -92,7 +92,14 @@ static const struct btmrvl_sdio_device btmrvl_sdio_sd8688 = {
92static const struct btmrvl_sdio_device btmrvl_sdio_sd8787 = { 92static const struct btmrvl_sdio_device btmrvl_sdio_sd8787 = {
93 .helper = NULL, 93 .helper = NULL,
94 .firmware = "mrvl/sd8787_uapsta.bin", 94 .firmware = "mrvl/sd8787_uapsta.bin",
95 .reg = &btmrvl_reg_8787, 95 .reg = &btmrvl_reg_87xx,
96 .sd_blksz_fw_dl = 256,
97};
98
99static const struct btmrvl_sdio_device btmrvl_sdio_sd8797 = {
100 .helper = NULL,
101 .firmware = "mrvl/sd8797_uapsta.bin",
102 .reg = &btmrvl_reg_87xx,
96 .sd_blksz_fw_dl = 256, 103 .sd_blksz_fw_dl = 256,
97}; 104};
98 105
@@ -103,6 +110,9 @@ static const struct sdio_device_id btmrvl_sdio_ids[] = {
103 /* Marvell SD8787 Bluetooth device */ 110 /* Marvell SD8787 Bluetooth device */
104 { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x911A), 111 { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x911A),
105 .driver_data = (unsigned long) &btmrvl_sdio_sd8787 }, 112 .driver_data = (unsigned long) &btmrvl_sdio_sd8787 },
113 /* Marvell SD8797 Bluetooth device */
114 { SDIO_DEVICE(SDIO_VENDOR_ID_MARVELL, 0x912A),
115 .driver_data = (unsigned long) &btmrvl_sdio_sd8797 },
106 116
107 { } /* Terminating entry */ 117 { } /* Terminating entry */
108}; 118};
@@ -1076,3 +1086,4 @@ MODULE_LICENSE("GPL v2");
1076MODULE_FIRMWARE("sd8688_helper.bin"); 1086MODULE_FIRMWARE("sd8688_helper.bin");
1077MODULE_FIRMWARE("sd8688.bin"); 1087MODULE_FIRMWARE("sd8688.bin");
1078MODULE_FIRMWARE("mrvl/sd8787_uapsta.bin"); 1088MODULE_FIRMWARE("mrvl/sd8787_uapsta.bin");
1089MODULE_FIRMWARE("mrvl/sd8797_uapsta.bin");
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index fe4ebc375b3d..eabc437ce500 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -777,9 +777,8 @@ skip_waking:
777 usb_mark_last_busy(data->udev); 777 usb_mark_last_busy(data->udev);
778 } 778 }
779 779
780 usb_free_urb(urb);
781
782done: 780done:
781 usb_free_urb(urb);
783 return err; 782 return err;
784} 783}
785 784
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index c2917ffad2c2..34767a6d7f42 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -139,6 +139,8 @@
139#define IPMI_WDOG_SET_TIMER 0x24 139#define IPMI_WDOG_SET_TIMER 0x24
140#define IPMI_WDOG_GET_TIMER 0x25 140#define IPMI_WDOG_GET_TIMER 0x25
141 141
142#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80
143
142/* These are here until the real ones get into the watchdog.h interface. */ 144/* These are here until the real ones get into the watchdog.h interface. */
143#ifndef WDIOC_GETTIMEOUT 145#ifndef WDIOC_GETTIMEOUT
144#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int) 146#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
@@ -596,6 +598,7 @@ static int ipmi_heartbeat(void)
596 struct kernel_ipmi_msg msg; 598 struct kernel_ipmi_msg msg;
597 int rv; 599 int rv;
598 struct ipmi_system_interface_addr addr; 600 struct ipmi_system_interface_addr addr;
601 int timeout_retries = 0;
599 602
600 if (ipmi_ignore_heartbeat) 603 if (ipmi_ignore_heartbeat)
601 return 0; 604 return 0;
@@ -616,6 +619,7 @@ static int ipmi_heartbeat(void)
616 619
617 mutex_lock(&heartbeat_lock); 620 mutex_lock(&heartbeat_lock);
618 621
622restart:
619 atomic_set(&heartbeat_tofree, 2); 623 atomic_set(&heartbeat_tofree, 2);
620 624
621 /* 625 /*
@@ -653,7 +657,33 @@ static int ipmi_heartbeat(void)
653 /* Wait for the heartbeat to be sent. */ 657 /* Wait for the heartbeat to be sent. */
654 wait_for_completion(&heartbeat_wait); 658 wait_for_completion(&heartbeat_wait);
655 659
656 if (heartbeat_recv_msg.msg.data[0] != 0) { 660 if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
661 timeout_retries++;
662 if (timeout_retries > 3) {
663 printk(KERN_ERR PFX ": Unable to restore the IPMI"
664 " watchdog's settings, giving up.\n");
665 rv = -EIO;
666 goto out_unlock;
667 }
668
669 /*
670 * The timer was not initialized, that means the BMC was
671 * probably reset and lost the watchdog information. Attempt
672 * to restore the timer's info. Note that we still hold
673 * the heartbeat lock, to keep a heartbeat from happening
674 * in this process, so must say no heartbeat to avoid a
675 * deadlock on this mutex.
676 */
677 rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
678 if (rv) {
679 printk(KERN_ERR PFX ": Unable to send the command to"
680 " set the watchdog's settings, giving up.\n");
681 goto out_unlock;
682 }
683
684 /* We might need a new heartbeat, so do it now */
685 goto restart;
686 } else if (heartbeat_recv_msg.msg.data[0] != 0) {
657 /* 687 /*
658 * Got an error in the heartbeat response. It was already 688 * Got an error in the heartbeat response. It was already
659 * reported in ipmi_wdog_msg_handler, but we should return 689 * reported in ipmi_wdog_msg_handler, but we should return
@@ -662,6 +692,7 @@ static int ipmi_heartbeat(void)
662 rv = -EINVAL; 692 rv = -EINVAL;
663 } 693 }
664 694
695out_unlock:
665 mutex_unlock(&heartbeat_lock); 696 mutex_unlock(&heartbeat_lock);
666 697
667 return rv; 698 return rv;
@@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = {
922static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, 953static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
923 void *handler_data) 954 void *handler_data)
924{ 955{
925 if (msg->msg.data[0] != 0) { 956 if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
957 msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
958 printk(KERN_INFO PFX "response: The IPMI controller appears"
959 " to have been reset, will attempt to reinitialize"
960 " the watchdog timer\n");
961 else if (msg->msg.data[0] != 0)
926 printk(KERN_ERR PFX "response: Error %x on cmd %x\n", 962 printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
927 msg->msg.data[0], 963 msg->msg.data[0],
928 msg->msg.cmd); 964 msg->msg.cmd);
929 }
930 965
931 ipmi_free_recv_msg(msg); 966 ipmi_free_recv_msg(msg);
932} 967}
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index ab8f469f5cf8..5a99bb3f255a 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -124,7 +124,7 @@ config MV_XOR
124 124
125config MX3_IPU 125config MX3_IPU
126 bool "MX3x Image Processing Unit support" 126 bool "MX3x Image Processing Unit support"
127 depends on ARCH_MX3 127 depends on SOC_IMX31 || SOC_IMX35
128 select DMA_ENGINE 128 select DMA_ENGINE
129 default y 129 default y
130 help 130 help
@@ -216,7 +216,7 @@ config PCH_DMA
216 216
217config IMX_SDMA 217config IMX_SDMA
218 tristate "i.MX SDMA support" 218 tristate "i.MX SDMA support"
219 depends on ARCH_MX25 || ARCH_MX3 || ARCH_MX5 219 depends on ARCH_MX25 || SOC_IMX31 || SOC_IMX35 || ARCH_MX5
220 select DMA_ENGINE 220 select DMA_ENGINE
221 help 221 help
222 Support the i.MX SDMA engine. This engine is integrated into 222 Support the i.MX SDMA engine. This engine is integrated into
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index c811cb107904..2cce44a1d7d0 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -746,6 +746,37 @@ static void __exit ibft_exit(void)
746 ibft_cleanup(); 746 ibft_cleanup();
747} 747}
748 748
749#ifdef CONFIG_ACPI
750static const struct {
751 char *sign;
752} ibft_signs[] = {
753 /*
754 * One spec says "IBFT", the other says "iBFT". We have to check
755 * for both.
756 */
757 { ACPI_SIG_IBFT },
758 { "iBFT" },
759};
760
761static void __init acpi_find_ibft_region(void)
762{
763 int i;
764 struct acpi_table_header *table = NULL;
765
766 if (acpi_disabled)
767 return;
768
769 for (i = 0; i < ARRAY_SIZE(ibft_signs) && !ibft_addr; i++) {
770 acpi_get_table(ibft_signs[i].sign, 0, &table);
771 ibft_addr = (struct acpi_table_ibft *)table;
772 }
773}
774#else
775static void __init acpi_find_ibft_region(void)
776{
777}
778#endif
779
749/* 780/*
750 * ibft_init() - creates sysfs tree entries for the iBFT data. 781 * ibft_init() - creates sysfs tree entries for the iBFT data.
751 */ 782 */
@@ -753,9 +784,16 @@ static int __init ibft_init(void)
753{ 784{
754 int rc = 0; 785 int rc = 0;
755 786
787 /*
788 As on UEFI systems the setup_arch()/find_ibft_region()
789 is called before ACPI tables are parsed and it only does
790 legacy finding.
791 */
792 if (!ibft_addr)
793 acpi_find_ibft_region();
794
756 if (ibft_addr) { 795 if (ibft_addr) {
757 printk(KERN_INFO "iBFT detected at 0x%llx.\n", 796 pr_info("iBFT detected.\n");
758 (u64)isa_virt_to_bus(ibft_addr));
759 797
760 rc = ibft_check_device(); 798 rc = ibft_check_device();
761 if (rc) 799 if (rc)
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index bfe723266fd8..4da4eb9ae926 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -45,13 +45,6 @@ EXPORT_SYMBOL_GPL(ibft_addr);
45static const struct { 45static const struct {
46 char *sign; 46 char *sign;
47} ibft_signs[] = { 47} ibft_signs[] = {
48#ifdef CONFIG_ACPI
49 /*
50 * One spec says "IBFT", the other says "iBFT". We have to check
51 * for both.
52 */
53 { ACPI_SIG_IBFT },
54#endif
55 { "iBFT" }, 48 { "iBFT" },
56 { "BIFT" }, /* Broadcom iSCSI Offload */ 49 { "BIFT" }, /* Broadcom iSCSI Offload */
57}; 50};
@@ -62,14 +55,6 @@ static const struct {
62#define VGA_MEM 0xA0000 /* VGA buffer */ 55#define VGA_MEM 0xA0000 /* VGA buffer */
63#define VGA_SIZE 0x20000 /* 128kB */ 56#define VGA_SIZE 0x20000 /* 128kB */
64 57
65#ifdef CONFIG_ACPI
66static int __init acpi_find_ibft(struct acpi_table_header *header)
67{
68 ibft_addr = (struct acpi_table_ibft *)header;
69 return 0;
70}
71#endif /* CONFIG_ACPI */
72
73static int __init find_ibft_in_mem(void) 58static int __init find_ibft_in_mem(void)
74{ 59{
75 unsigned long pos; 60 unsigned long pos;
@@ -94,6 +79,7 @@ static int __init find_ibft_in_mem(void)
94 * the table cannot be valid. */ 79 * the table cannot be valid. */
95 if (pos + len <= (IBFT_END-1)) { 80 if (pos + len <= (IBFT_END-1)) {
96 ibft_addr = (struct acpi_table_ibft *)virt; 81 ibft_addr = (struct acpi_table_ibft *)virt;
82 pr_info("iBFT found at 0x%lx.\n", pos);
97 goto done; 83 goto done;
98 } 84 }
99 } 85 }
@@ -108,20 +94,12 @@ done:
108 */ 94 */
109unsigned long __init find_ibft_region(unsigned long *sizep) 95unsigned long __init find_ibft_region(unsigned long *sizep)
110{ 96{
111#ifdef CONFIG_ACPI
112 int i;
113#endif
114 ibft_addr = NULL; 97 ibft_addr = NULL;
115 98
116#ifdef CONFIG_ACPI
117 for (i = 0; i < ARRAY_SIZE(ibft_signs) && !ibft_addr; i++)
118 acpi_table_parse(ibft_signs[i].sign, acpi_find_ibft);
119#endif /* CONFIG_ACPI */
120
121 /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will 99 /* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
122 * only use ACPI for this */ 100 * only use ACPI for this */
123 101
124 if (!ibft_addr && !efi_enabled) 102 if (!efi_enabled)
125 find_ibft_in_mem(); 103 find_ibft_in_mem();
126 104
127 if (ibft_addr) { 105 if (ibft_addr) {
diff --git a/drivers/gpio/gpio-da9052.c b/drivers/gpio/gpio-da9052.c
index 038f5eb8b13d..f8ce29ef9f88 100644
--- a/drivers/gpio/gpio-da9052.c
+++ b/drivers/gpio/gpio-da9052.c
@@ -22,7 +22,6 @@
22#include <linux/mfd/da9052/da9052.h> 22#include <linux/mfd/da9052/da9052.h>
23#include <linux/mfd/da9052/reg.h> 23#include <linux/mfd/da9052/reg.h>
24#include <linux/mfd/da9052/pdata.h> 24#include <linux/mfd/da9052/pdata.h>
25#include <linux/mfd/da9052/gpio.h>
26 25
27#define DA9052_INPUT 1 26#define DA9052_INPUT 1
28#define DA9052_OUTPUT_OPENDRAIN 2 27#define DA9052_OUTPUT_OPENDRAIN 2
@@ -43,6 +42,9 @@
43#define DA9052_GPIO_MASK_UPPER_NIBBLE 0xF0 42#define DA9052_GPIO_MASK_UPPER_NIBBLE 0xF0
44#define DA9052_GPIO_MASK_LOWER_NIBBLE 0x0F 43#define DA9052_GPIO_MASK_LOWER_NIBBLE 0x0F
45#define DA9052_GPIO_NIBBLE_SHIFT 4 44#define DA9052_GPIO_NIBBLE_SHIFT 4
45#define DA9052_IRQ_GPI0 16
46#define DA9052_GPIO_ODD_SHIFT 7
47#define DA9052_GPIO_EVEN_SHIFT 3
46 48
47struct da9052_gpio { 49struct da9052_gpio {
48 struct da9052 *da9052; 50 struct da9052 *da9052;
@@ -104,33 +106,26 @@ static int da9052_gpio_get(struct gpio_chip *gc, unsigned offset)
104static void da9052_gpio_set(struct gpio_chip *gc, unsigned offset, int value) 106static void da9052_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
105{ 107{
106 struct da9052_gpio *gpio = to_da9052_gpio(gc); 108 struct da9052_gpio *gpio = to_da9052_gpio(gc);
107 unsigned char register_value = 0;
108 int ret; 109 int ret;
109 110
110 if (da9052_gpio_port_odd(offset)) { 111 if (da9052_gpio_port_odd(offset)) {
111 if (value) {
112 register_value = DA9052_GPIO_ODD_PORT_MODE;
113 ret = da9052_reg_update(gpio->da9052, (offset >> 1) + 112 ret = da9052_reg_update(gpio->da9052, (offset >> 1) +
114 DA9052_GPIO_0_1_REG, 113 DA9052_GPIO_0_1_REG,
115 DA9052_GPIO_ODD_PORT_MODE, 114 DA9052_GPIO_ODD_PORT_MODE,
116 register_value); 115 value << DA9052_GPIO_ODD_SHIFT);
117 if (ret != 0) 116 if (ret != 0)
118 dev_err(gpio->da9052->dev, 117 dev_err(gpio->da9052->dev,
119 "Failed to updated gpio odd reg,%d", 118 "Failed to updated gpio odd reg,%d",
120 ret); 119 ret);
121 }
122 } else { 120 } else {
123 if (value) {
124 register_value = DA9052_GPIO_EVEN_PORT_MODE;
125 ret = da9052_reg_update(gpio->da9052, (offset >> 1) + 121 ret = da9052_reg_update(gpio->da9052, (offset >> 1) +
126 DA9052_GPIO_0_1_REG, 122 DA9052_GPIO_0_1_REG,
127 DA9052_GPIO_EVEN_PORT_MODE, 123 DA9052_GPIO_EVEN_PORT_MODE,
128 register_value); 124 value << DA9052_GPIO_EVEN_SHIFT);
129 if (ret != 0) 125 if (ret != 0)
130 dev_err(gpio->da9052->dev, 126 dev_err(gpio->da9052->dev,
131 "Failed to updated gpio even reg,%d", 127 "Failed to updated gpio even reg,%d",
132 ret); 128 ret);
133 }
134 } 129 }
135} 130}
136 131
@@ -201,9 +196,9 @@ static struct gpio_chip reference_gp __devinitdata = {
201 .direction_input = da9052_gpio_direction_input, 196 .direction_input = da9052_gpio_direction_input,
202 .direction_output = da9052_gpio_direction_output, 197 .direction_output = da9052_gpio_direction_output,
203 .to_irq = da9052_gpio_to_irq, 198 .to_irq = da9052_gpio_to_irq,
204 .can_sleep = 1; 199 .can_sleep = 1,
205 .ngpio = 16; 200 .ngpio = 16,
206 .base = -1; 201 .base = -1,
207}; 202};
208 203
209static int __devinit da9052_gpio_probe(struct platform_device *pdev) 204static int __devinit da9052_gpio_probe(struct platform_device *pdev)
diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c
index ea8e73869250..461958fc2264 100644
--- a/drivers/gpio/gpio-ml-ioh.c
+++ b/drivers/gpio/gpio-ml-ioh.c
@@ -332,6 +332,34 @@ static void ioh_irq_mask(struct irq_data *d)
332 &chip->reg->regs[chip->ch].imask); 332 &chip->reg->regs[chip->ch].imask);
333} 333}
334 334
335static void ioh_irq_disable(struct irq_data *d)
336{
337 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
338 struct ioh_gpio *chip = gc->private;
339 unsigned long flags;
340 u32 ien;
341
342 spin_lock_irqsave(&chip->spinlock, flags);
343 ien = ioread32(&chip->reg->regs[chip->ch].ien);
344 ien &= ~(1 << (d->irq - chip->irq_base));
345 iowrite32(ien, &chip->reg->regs[chip->ch].ien);
346 spin_unlock_irqrestore(&chip->spinlock, flags);
347}
348
349static void ioh_irq_enable(struct irq_data *d)
350{
351 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
352 struct ioh_gpio *chip = gc->private;
353 unsigned long flags;
354 u32 ien;
355
356 spin_lock_irqsave(&chip->spinlock, flags);
357 ien = ioread32(&chip->reg->regs[chip->ch].ien);
358 ien |= 1 << (d->irq - chip->irq_base);
359 iowrite32(ien, &chip->reg->regs[chip->ch].ien);
360 spin_unlock_irqrestore(&chip->spinlock, flags);
361}
362
335static irqreturn_t ioh_gpio_handler(int irq, void *dev_id) 363static irqreturn_t ioh_gpio_handler(int irq, void *dev_id)
336{ 364{
337 struct ioh_gpio *chip = dev_id; 365 struct ioh_gpio *chip = dev_id;
@@ -339,7 +367,7 @@ static irqreturn_t ioh_gpio_handler(int irq, void *dev_id)
339 int i, j; 367 int i, j;
340 int ret = IRQ_NONE; 368 int ret = IRQ_NONE;
341 369
342 for (i = 0; i < 8; i++) { 370 for (i = 0; i < 8; i++, chip++) {
343 reg_val = ioread32(&chip->reg->regs[i].istatus); 371 reg_val = ioread32(&chip->reg->regs[i].istatus);
344 for (j = 0; j < num_ports[i]; j++) { 372 for (j = 0; j < num_ports[i]; j++) {
345 if (reg_val & BIT(j)) { 373 if (reg_val & BIT(j)) {
@@ -370,6 +398,8 @@ static __devinit void ioh_gpio_alloc_generic_chip(struct ioh_gpio *chip,
370 ct->chip.irq_mask = ioh_irq_mask; 398 ct->chip.irq_mask = ioh_irq_mask;
371 ct->chip.irq_unmask = ioh_irq_unmask; 399 ct->chip.irq_unmask = ioh_irq_unmask;
372 ct->chip.irq_set_type = ioh_irq_type; 400 ct->chip.irq_set_type = ioh_irq_type;
401 ct->chip.irq_disable = ioh_irq_disable;
402 ct->chip.irq_enable = ioh_irq_enable;
373 403
374 irq_setup_generic_chip(gc, IRQ_MSK(num), IRQ_GC_INIT_MASK_CACHE, 404 irq_setup_generic_chip(gc, IRQ_MSK(num), IRQ_GC_INIT_MASK_CACHE,
375 IRQ_NOREQUEST | IRQ_NOPROBE, 0); 405 IRQ_NOREQUEST | IRQ_NOPROBE, 0);
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index ec3fcf0a7e12..5cd04b65c556 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -132,6 +132,15 @@ static int mpc8xxx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val
132 return 0; 132 return 0;
133} 133}
134 134
135static int mpc5121_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
136{
137 /* GPIO 28..31 are input only on MPC5121 */
138 if (gpio >= 28)
139 return -EINVAL;
140
141 return mpc8xxx_gpio_dir_out(gc, gpio, val);
142}
143
135static int mpc8xxx_gpio_to_irq(struct gpio_chip *gc, unsigned offset) 144static int mpc8xxx_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
136{ 145{
137 struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc); 146 struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc);
@@ -340,11 +349,10 @@ static void __init mpc8xxx_add_controller(struct device_node *np)
340 mm_gc->save_regs = mpc8xxx_gpio_save_regs; 349 mm_gc->save_regs = mpc8xxx_gpio_save_regs;
341 gc->ngpio = MPC8XXX_GPIO_PINS; 350 gc->ngpio = MPC8XXX_GPIO_PINS;
342 gc->direction_input = mpc8xxx_gpio_dir_in; 351 gc->direction_input = mpc8xxx_gpio_dir_in;
343 gc->direction_output = mpc8xxx_gpio_dir_out; 352 gc->direction_output = of_device_is_compatible(np, "fsl,mpc5121-gpio") ?
344 if (of_device_is_compatible(np, "fsl,mpc8572-gpio")) 353 mpc5121_gpio_dir_out : mpc8xxx_gpio_dir_out;
345 gc->get = mpc8572_gpio_get; 354 gc->get = of_device_is_compatible(np, "fsl,mpc8572-gpio") ?
346 else 355 mpc8572_gpio_get : mpc8xxx_gpio_get;
347 gc->get = mpc8xxx_gpio_get;
348 gc->set = mpc8xxx_gpio_set; 356 gc->set = mpc8xxx_gpio_set;
349 gc->to_irq = mpc8xxx_gpio_to_irq; 357 gc->to_irq = mpc8xxx_gpio_to_irq;
350 358
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 093c90bd3c1d..4102f63230fd 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -238,10 +238,6 @@ static int pl061_probe(struct amba_device *dev, const struct amba_id *id)
238 int ret, irq, i; 238 int ret, irq, i;
239 static DECLARE_BITMAP(init_irq, NR_IRQS); 239 static DECLARE_BITMAP(init_irq, NR_IRQS);
240 240
241 pdata = dev->dev.platform_data;
242 if (pdata == NULL)
243 return -ENODEV;
244
245 chip = kzalloc(sizeof(*chip), GFP_KERNEL); 241 chip = kzalloc(sizeof(*chip), GFP_KERNEL);
246 if (chip == NULL) 242 if (chip == NULL)
247 return -ENOMEM; 243 return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d09a6e02dc95..004b048c5192 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -62,6 +62,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
62 const struct intel_device_info *info = INTEL_INFO(dev); 62 const struct intel_device_info *info = INTEL_INFO(dev);
63 63
64 seq_printf(m, "gen: %d\n", info->gen); 64 seq_printf(m, "gen: %d\n", info->gen);
65 seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev));
65#define B(x) seq_printf(m, #x ": %s\n", yesno(info->x)) 66#define B(x) seq_printf(m, #x ": %s\n", yesno(info->x))
66 B(is_mobile); 67 B(is_mobile);
67 B(is_i85x); 68 B(is_i85x);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a9533c54c93c..a9ae374861e7 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1454,6 +1454,14 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
1454 1454
1455 diff1 = now - dev_priv->last_time1; 1455 diff1 = now - dev_priv->last_time1;
1456 1456
1457 /* Prevent division-by-zero if we are asking too fast.
1458 * Also, we don't get interesting results if we are polling
1459 * faster than once in 10ms, so just return the saved value
1460 * in such cases.
1461 */
1462 if (diff1 <= 10)
1463 return dev_priv->chipset_power;
1464
1457 count1 = I915_READ(DMIEC); 1465 count1 = I915_READ(DMIEC);
1458 count2 = I915_READ(DDREC); 1466 count2 = I915_READ(DDREC);
1459 count3 = I915_READ(CSIEC); 1467 count3 = I915_READ(CSIEC);
@@ -1484,6 +1492,8 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
1484 dev_priv->last_count1 = total_count; 1492 dev_priv->last_count1 = total_count;
1485 dev_priv->last_time1 = now; 1493 dev_priv->last_time1 = now;
1486 1494
1495 dev_priv->chipset_power = ret;
1496
1487 return ret; 1497 return ret;
1488} 1498}
1489 1499
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 15bfa9145d2b..a1103fc6597d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -58,15 +58,15 @@ module_param_named(powersave, i915_powersave, int, 0600);
58MODULE_PARM_DESC(powersave, 58MODULE_PARM_DESC(powersave,
59 "Enable powersavings, fbc, downclocking, etc. (default: true)"); 59 "Enable powersavings, fbc, downclocking, etc. (default: true)");
60 60
61unsigned int i915_semaphores __read_mostly = 0; 61int i915_semaphores __read_mostly = -1;
62module_param_named(semaphores, i915_semaphores, int, 0600); 62module_param_named(semaphores, i915_semaphores, int, 0600);
63MODULE_PARM_DESC(semaphores, 63MODULE_PARM_DESC(semaphores,
64 "Use semaphores for inter-ring sync (default: false)"); 64 "Use semaphores for inter-ring sync (default: -1 (use per-chip defaults))");
65 65
66unsigned int i915_enable_rc6 __read_mostly = 0; 66int i915_enable_rc6 __read_mostly = -1;
67module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); 67module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
68MODULE_PARM_DESC(i915_enable_rc6, 68MODULE_PARM_DESC(i915_enable_rc6,
69 "Enable power-saving render C-state 6 (default: true)"); 69 "Enable power-saving render C-state 6 (default: -1 (use per-chip default)");
70 70
71int i915_enable_fbc __read_mostly = -1; 71int i915_enable_fbc __read_mostly = -1;
72module_param_named(i915_enable_fbc, i915_enable_fbc, int, 0600); 72module_param_named(i915_enable_fbc, i915_enable_fbc, int, 0600);
@@ -328,7 +328,7 @@ void intel_detect_pch(struct drm_device *dev)
328 } 328 }
329} 329}
330 330
331static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) 331void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
332{ 332{
333 int count; 333 int count;
334 334
@@ -344,6 +344,22 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
344 udelay(10); 344 udelay(10);
345} 345}
346 346
347void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
348{
349 int count;
350
351 count = 0;
352 while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_MT_ACK) & 1))
353 udelay(10);
354
355 I915_WRITE_NOTRACE(FORCEWAKE_MT, (1<<16) | 1);
356 POSTING_READ(FORCEWAKE_MT);
357
358 count = 0;
359 while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_MT_ACK) & 1) == 0)
360 udelay(10);
361}
362
347/* 363/*
348 * Generally this is called implicitly by the register read function. However, 364 * Generally this is called implicitly by the register read function. However,
349 * if some sequence requires the GT to not power down then this function should 365 * if some sequence requires the GT to not power down then this function should
@@ -356,15 +372,21 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
356 372
357 /* Forcewake is atomic in case we get in here without the lock */ 373 /* Forcewake is atomic in case we get in here without the lock */
358 if (atomic_add_return(1, &dev_priv->forcewake_count) == 1) 374 if (atomic_add_return(1, &dev_priv->forcewake_count) == 1)
359 __gen6_gt_force_wake_get(dev_priv); 375 dev_priv->display.force_wake_get(dev_priv);
360} 376}
361 377
362static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) 378void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
363{ 379{
364 I915_WRITE_NOTRACE(FORCEWAKE, 0); 380 I915_WRITE_NOTRACE(FORCEWAKE, 0);
365 POSTING_READ(FORCEWAKE); 381 POSTING_READ(FORCEWAKE);
366} 382}
367 383
384void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
385{
386 I915_WRITE_NOTRACE(FORCEWAKE_MT, (1<<16) | 0);
387 POSTING_READ(FORCEWAKE_MT);
388}
389
368/* 390/*
369 * see gen6_gt_force_wake_get() 391 * see gen6_gt_force_wake_get()
370 */ 392 */
@@ -373,7 +395,7 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
373 WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 395 WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
374 396
375 if (atomic_dec_and_test(&dev_priv->forcewake_count)) 397 if (atomic_dec_and_test(&dev_priv->forcewake_count))
376 __gen6_gt_force_wake_put(dev_priv); 398 dev_priv->display.force_wake_put(dev_priv);
377} 399}
378 400
379void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) 401void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
@@ -903,8 +925,9 @@ MODULE_LICENSE("GPL and additional rights");
903/* We give fast paths for the really cool registers */ 925/* We give fast paths for the really cool registers */
904#define NEEDS_FORCE_WAKE(dev_priv, reg) \ 926#define NEEDS_FORCE_WAKE(dev_priv, reg) \
905 (((dev_priv)->info->gen >= 6) && \ 927 (((dev_priv)->info->gen >= 6) && \
906 ((reg) < 0x40000) && \ 928 ((reg) < 0x40000) && \
907 ((reg) != FORCEWAKE)) 929 ((reg) != FORCEWAKE) && \
930 ((reg) != ECOBUS))
908 931
909#define __i915_read(x, y) \ 932#define __i915_read(x, y) \
910u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ 933u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a9c1b979804..554bef7a3b9c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -107,6 +107,7 @@ struct opregion_header;
107struct opregion_acpi; 107struct opregion_acpi;
108struct opregion_swsci; 108struct opregion_swsci;
109struct opregion_asle; 109struct opregion_asle;
110struct drm_i915_private;
110 111
111struct intel_opregion { 112struct intel_opregion {
112 struct opregion_header *header; 113 struct opregion_header *header;
@@ -221,6 +222,8 @@ struct drm_i915_display_funcs {
221 struct drm_i915_gem_object *obj); 222 struct drm_i915_gem_object *obj);
222 int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb, 223 int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
223 int x, int y); 224 int x, int y);
225 void (*force_wake_get)(struct drm_i915_private *dev_priv);
226 void (*force_wake_put)(struct drm_i915_private *dev_priv);
224 /* clock updates for mode set */ 227 /* clock updates for mode set */
225 /* cursor updates */ 228 /* cursor updates */
226 /* render clock increase/decrease */ 229 /* render clock increase/decrease */
@@ -710,6 +713,7 @@ typedef struct drm_i915_private {
710 713
711 u64 last_count1; 714 u64 last_count1;
712 unsigned long last_time1; 715 unsigned long last_time1;
716 unsigned long chipset_power;
713 u64 last_count2; 717 u64 last_count2;
714 struct timespec last_time2; 718 struct timespec last_time2;
715 unsigned long gfx_power; 719 unsigned long gfx_power;
@@ -998,11 +1002,11 @@ extern int i915_max_ioctl;
998extern unsigned int i915_fbpercrtc __always_unused; 1002extern unsigned int i915_fbpercrtc __always_unused;
999extern int i915_panel_ignore_lid __read_mostly; 1003extern int i915_panel_ignore_lid __read_mostly;
1000extern unsigned int i915_powersave __read_mostly; 1004extern unsigned int i915_powersave __read_mostly;
1001extern unsigned int i915_semaphores __read_mostly; 1005extern int i915_semaphores __read_mostly;
1002extern unsigned int i915_lvds_downclock __read_mostly; 1006extern unsigned int i915_lvds_downclock __read_mostly;
1003extern int i915_panel_use_ssc __read_mostly; 1007extern int i915_panel_use_ssc __read_mostly;
1004extern int i915_vbt_sdvo_panel_type __read_mostly; 1008extern int i915_vbt_sdvo_panel_type __read_mostly;
1005extern unsigned int i915_enable_rc6 __read_mostly; 1009extern int i915_enable_rc6 __read_mostly;
1006extern int i915_enable_fbc __read_mostly; 1010extern int i915_enable_fbc __read_mostly;
1007extern bool i915_enable_hangcheck __read_mostly; 1011extern bool i915_enable_hangcheck __read_mostly;
1008 1012
@@ -1308,6 +1312,11 @@ extern void gen6_set_rps(struct drm_device *dev, u8 val);
1308extern void intel_detect_pch(struct drm_device *dev); 1312extern void intel_detect_pch(struct drm_device *dev);
1309extern int intel_trans_dp_port_sel(struct drm_crtc *crtc); 1313extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
1310 1314
1315extern void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
1316extern void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv);
1317extern void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
1318extern void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv);
1319
1311/* overlay */ 1320/* overlay */
1312#ifdef CONFIG_DEBUG_FS 1321#ifdef CONFIG_DEBUG_FS
1313extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev); 1322extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
@@ -1352,8 +1361,9 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
1352/* We give fast paths for the really cool registers */ 1361/* We give fast paths for the really cool registers */
1353#define NEEDS_FORCE_WAKE(dev_priv, reg) \ 1362#define NEEDS_FORCE_WAKE(dev_priv, reg) \
1354 (((dev_priv)->info->gen >= 6) && \ 1363 (((dev_priv)->info->gen >= 6) && \
1355 ((reg) < 0x40000) && \ 1364 ((reg) < 0x40000) && \
1356 ((reg) != FORCEWAKE)) 1365 ((reg) != FORCEWAKE) && \
1366 ((reg) != ECOBUS))
1357 1367
1358#define __i915_read(x, y) \ 1368#define __i915_read(x, y) \
1359 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg); 1369 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 60ff1b63b568..8359dc777041 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2026,13 +2026,8 @@ i915_wait_request(struct intel_ring_buffer *ring,
2026 * to handle this, the waiter on a request often wants an associated 2026 * to handle this, the waiter on a request often wants an associated
2027 * buffer to have made it to the inactive list, and we would need 2027 * buffer to have made it to the inactive list, and we would need
2028 * a separate wait queue to handle that. 2028 * a separate wait queue to handle that.
2029 *
2030 * To avoid a recursion with the ilk VT-d workaround (that calls
2031 * gpu_idle when unbinding objects with interruptible==false) don't
2032 * retire requests in that case (because it might call unbind if the
2033 * active list holds the last reference to the object).
2034 */ 2029 */
2035 if (ret == 0 && dev_priv->mm.interruptible) 2030 if (ret == 0)
2036 i915_gem_retire_requests_ring(ring); 2031 i915_gem_retire_requests_ring(ring);
2037 2032
2038 return ret; 2033 return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3693e83a97f3..b9da8900ae4e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -32,6 +32,7 @@
32#include "i915_drv.h" 32#include "i915_drv.h"
33#include "i915_trace.h" 33#include "i915_trace.h"
34#include "intel_drv.h" 34#include "intel_drv.h"
35#include <linux/dma_remapping.h>
35 36
36struct change_domains { 37struct change_domains {
37 uint32_t invalidate_domains; 38 uint32_t invalidate_domains;
@@ -746,6 +747,22 @@ i915_gem_execbuffer_flush(struct drm_device *dev,
746 return 0; 747 return 0;
747} 748}
748 749
750static bool
751intel_enable_semaphores(struct drm_device *dev)
752{
753 if (INTEL_INFO(dev)->gen < 6)
754 return 0;
755
756 if (i915_semaphores >= 0)
757 return i915_semaphores;
758
759 /* Disable semaphores on SNB */
760 if (INTEL_INFO(dev)->gen == 6)
761 return 0;
762
763 return 1;
764}
765
749static int 766static int
750i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, 767i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
751 struct intel_ring_buffer *to) 768 struct intel_ring_buffer *to)
@@ -758,7 +775,7 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
758 return 0; 775 return 0;
759 776
760 /* XXX gpu semaphores are implicated in various hard hangs on SNB */ 777 /* XXX gpu semaphores are implicated in various hard hangs on SNB */
761 if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores) 778 if (!intel_enable_semaphores(obj->base.dev))
762 return i915_gem_object_wait_rendering(obj); 779 return i915_gem_object_wait_rendering(obj);
763 780
764 idx = intel_ring_sync_index(from, to); 781 idx = intel_ring_sync_index(from, to);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b080cc824001..a26d5b0a3690 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3303,10 +3303,10 @@
3303/* or SDVOB */ 3303/* or SDVOB */
3304#define HDMIB 0xe1140 3304#define HDMIB 0xe1140
3305#define PORT_ENABLE (1 << 31) 3305#define PORT_ENABLE (1 << 31)
3306#define TRANSCODER_A (0) 3306#define TRANSCODER(pipe) ((pipe) << 30)
3307#define TRANSCODER_B (1 << 30) 3307#define TRANSCODER_CPT(pipe) ((pipe) << 29)
3308#define TRANSCODER(pipe) ((pipe) << 30) 3308#define TRANSCODER_MASK (1 << 30)
3309#define TRANSCODER_MASK (1 << 30) 3309#define TRANSCODER_MASK_CPT (3 << 29)
3310#define COLOR_FORMAT_8bpc (0) 3310#define COLOR_FORMAT_8bpc (0)
3311#define COLOR_FORMAT_12bpc (3 << 26) 3311#define COLOR_FORMAT_12bpc (3 << 26)
3312#define SDVOB_HOTPLUG_ENABLE (1 << 23) 3312#define SDVOB_HOTPLUG_ENABLE (1 << 23)
@@ -3447,8 +3447,30 @@
3447#define EDP_LINK_TRAIN_800_1200MV_0DB_SNB_B (0x38<<22) 3447#define EDP_LINK_TRAIN_800_1200MV_0DB_SNB_B (0x38<<22)
3448#define EDP_LINK_TRAIN_VOL_EMP_MASK_SNB (0x3f<<22) 3448#define EDP_LINK_TRAIN_VOL_EMP_MASK_SNB (0x3f<<22)
3449 3449
3450/* IVB */
3451#define EDP_LINK_TRAIN_400MV_0DB_IVB (0x24 <<22)
3452#define EDP_LINK_TRAIN_400MV_3_5DB_IVB (0x2a <<22)
3453#define EDP_LINK_TRAIN_400MV_6DB_IVB (0x2f <<22)
3454#define EDP_LINK_TRAIN_600MV_0DB_IVB (0x30 <<22)
3455#define EDP_LINK_TRAIN_600MV_3_5DB_IVB (0x36 <<22)
3456#define EDP_LINK_TRAIN_800MV_0DB_IVB (0x38 <<22)
3457#define EDP_LINK_TRAIN_800MV_3_5DB_IVB (0x33 <<22)
3458
3459/* legacy values */
3460#define EDP_LINK_TRAIN_500MV_0DB_IVB (0x00 <<22)
3461#define EDP_LINK_TRAIN_1000MV_0DB_IVB (0x20 <<22)
3462#define EDP_LINK_TRAIN_500MV_3_5DB_IVB (0x02 <<22)
3463#define EDP_LINK_TRAIN_1000MV_3_5DB_IVB (0x22 <<22)
3464#define EDP_LINK_TRAIN_1000MV_6DB_IVB (0x23 <<22)
3465
3466#define EDP_LINK_TRAIN_VOL_EMP_MASK_IVB (0x3f<<22)
3467
3450#define FORCEWAKE 0xA18C 3468#define FORCEWAKE 0xA18C
3451#define FORCEWAKE_ACK 0x130090 3469#define FORCEWAKE_ACK 0x130090
3470#define FORCEWAKE_MT 0xa188 /* multi-threaded */
3471#define FORCEWAKE_MT_ACK 0x130040
3472#define ECOBUS 0xa180
3473#define FORCEWAKE_MT_ENABLE (1<<5)
3452 3474
3453#define GT_FIFO_FREE_ENTRIES 0x120008 3475#define GT_FIFO_FREE_ENTRIES 0x120008
3454#define GT_FIFO_NUM_RESERVED_ENTRIES 20 3476#define GT_FIFO_NUM_RESERVED_ENTRIES 20
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e77a863a3833..daa5743ccbd6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -38,8 +38,8 @@
38#include "i915_drv.h" 38#include "i915_drv.h"
39#include "i915_trace.h" 39#include "i915_trace.h"
40#include "drm_dp_helper.h" 40#include "drm_dp_helper.h"
41
42#include "drm_crtc_helper.h" 41#include "drm_crtc_helper.h"
42#include <linux/dma_remapping.h>
43 43
44#define HAS_eDP (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) 44#define HAS_eDP (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))
45 45
@@ -4670,6 +4670,7 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
4670/** 4670/**
4671 * intel_choose_pipe_bpp_dither - figure out what color depth the pipe should send 4671 * intel_choose_pipe_bpp_dither - figure out what color depth the pipe should send
4672 * @crtc: CRTC structure 4672 * @crtc: CRTC structure
4673 * @mode: requested mode
4673 * 4674 *
4674 * A pipe may be connected to one or more outputs. Based on the depth of the 4675 * A pipe may be connected to one or more outputs. Based on the depth of the
4675 * attached framebuffer, choose a good color depth to use on the pipe. 4676 * attached framebuffer, choose a good color depth to use on the pipe.
@@ -4681,13 +4682,15 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
4681 * HDMI supports only 8bpc or 12bpc, so clamp to 8bpc with dither for 10bpc 4682 * HDMI supports only 8bpc or 12bpc, so clamp to 8bpc with dither for 10bpc
4682 * Displays may support a restricted set as well, check EDID and clamp as 4683 * Displays may support a restricted set as well, check EDID and clamp as
4683 * appropriate. 4684 * appropriate.
4685 * DP may want to dither down to 6bpc to fit larger modes
4684 * 4686 *
4685 * RETURNS: 4687 * RETURNS:
4686 * Dithering requirement (i.e. false if display bpc and pipe bpc match, 4688 * Dithering requirement (i.e. false if display bpc and pipe bpc match,
4687 * true if they don't match). 4689 * true if they don't match).
4688 */ 4690 */
4689static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc, 4691static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc,
4690 unsigned int *pipe_bpp) 4692 unsigned int *pipe_bpp,
4693 struct drm_display_mode *mode)
4691{ 4694{
4692 struct drm_device *dev = crtc->dev; 4695 struct drm_device *dev = crtc->dev;
4693 struct drm_i915_private *dev_priv = dev->dev_private; 4696 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4758,6 +4761,11 @@ static bool intel_choose_pipe_bpp_dither(struct drm_crtc *crtc,
4758 } 4761 }
4759 } 4762 }
4760 4763
4764 if (mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) {
4765 DRM_DEBUG_KMS("Dithering DP to 6bpc\n");
4766 display_bpc = 6;
4767 }
4768
4761 /* 4769 /*
4762 * We could just drive the pipe at the highest bpc all the time and 4770 * We could just drive the pipe at the highest bpc all the time and
4763 * enable dithering as needed, but that costs bandwidth. So choose 4771 * enable dithering as needed, but that costs bandwidth. So choose
@@ -5019,6 +5027,16 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
5019 pipeconf &= ~PIPECONF_DOUBLE_WIDE; 5027 pipeconf &= ~PIPECONF_DOUBLE_WIDE;
5020 } 5028 }
5021 5029
5030 /* default to 8bpc */
5031 pipeconf &= ~(PIPECONF_BPP_MASK | PIPECONF_DITHER_EN);
5032 if (is_dp) {
5033 if (mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) {
5034 pipeconf |= PIPECONF_BPP_6 |
5035 PIPECONF_DITHER_EN |
5036 PIPECONF_DITHER_TYPE_SP;
5037 }
5038 }
5039
5022 dpll |= DPLL_VCO_ENABLE; 5040 dpll |= DPLL_VCO_ENABLE;
5023 5041
5024 DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B'); 5042 DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
@@ -5480,7 +5498,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
5480 /* determine panel color depth */ 5498 /* determine panel color depth */
5481 temp = I915_READ(PIPECONF(pipe)); 5499 temp = I915_READ(PIPECONF(pipe));
5482 temp &= ~PIPE_BPC_MASK; 5500 temp &= ~PIPE_BPC_MASK;
5483 dither = intel_choose_pipe_bpp_dither(crtc, &pipe_bpp); 5501 dither = intel_choose_pipe_bpp_dither(crtc, &pipe_bpp, mode);
5484 switch (pipe_bpp) { 5502 switch (pipe_bpp) {
5485 case 18: 5503 case 18:
5486 temp |= PIPE_6BPC; 5504 temp |= PIPE_6BPC;
@@ -7189,11 +7207,16 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
7189 work->old_fb_obj = intel_fb->obj; 7207 work->old_fb_obj = intel_fb->obj;
7190 INIT_WORK(&work->work, intel_unpin_work_fn); 7208 INIT_WORK(&work->work, intel_unpin_work_fn);
7191 7209
7210 ret = drm_vblank_get(dev, intel_crtc->pipe);
7211 if (ret)
7212 goto free_work;
7213
7192 /* We borrow the event spin lock for protecting unpin_work */ 7214 /* We borrow the event spin lock for protecting unpin_work */
7193 spin_lock_irqsave(&dev->event_lock, flags); 7215 spin_lock_irqsave(&dev->event_lock, flags);
7194 if (intel_crtc->unpin_work) { 7216 if (intel_crtc->unpin_work) {
7195 spin_unlock_irqrestore(&dev->event_lock, flags); 7217 spin_unlock_irqrestore(&dev->event_lock, flags);
7196 kfree(work); 7218 kfree(work);
7219 drm_vblank_put(dev, intel_crtc->pipe);
7197 7220
7198 DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); 7221 DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
7199 return -EBUSY; 7222 return -EBUSY;
@@ -7212,10 +7235,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
7212 7235
7213 crtc->fb = fb; 7236 crtc->fb = fb;
7214 7237
7215 ret = drm_vblank_get(dev, intel_crtc->pipe);
7216 if (ret)
7217 goto cleanup_objs;
7218
7219 work->pending_flip_obj = obj; 7238 work->pending_flip_obj = obj;
7220 7239
7221 work->enable_stall_check = true; 7240 work->enable_stall_check = true;
@@ -7238,7 +7257,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
7238 7257
7239cleanup_pending: 7258cleanup_pending:
7240 atomic_sub(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip); 7259 atomic_sub(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
7241cleanup_objs:
7242 drm_gem_object_unreference(&work->old_fb_obj->base); 7260 drm_gem_object_unreference(&work->old_fb_obj->base);
7243 drm_gem_object_unreference(&obj->base); 7261 drm_gem_object_unreference(&obj->base);
7244 mutex_unlock(&dev->struct_mutex); 7262 mutex_unlock(&dev->struct_mutex);
@@ -7247,6 +7265,8 @@ cleanup_objs:
7247 intel_crtc->unpin_work = NULL; 7265 intel_crtc->unpin_work = NULL;
7248 spin_unlock_irqrestore(&dev->event_lock, flags); 7266 spin_unlock_irqrestore(&dev->event_lock, flags);
7249 7267
7268 drm_vblank_put(dev, intel_crtc->pipe);
7269free_work:
7250 kfree(work); 7270 kfree(work);
7251 7271
7252 return ret; 7272 return ret;
@@ -7887,6 +7907,31 @@ void intel_init_emon(struct drm_device *dev)
7887 dev_priv->corr = (lcfuse & LCFUSE_HIV_MASK); 7907 dev_priv->corr = (lcfuse & LCFUSE_HIV_MASK);
7888} 7908}
7889 7909
7910static bool intel_enable_rc6(struct drm_device *dev)
7911{
7912 /*
7913 * Respect the kernel parameter if it is set
7914 */
7915 if (i915_enable_rc6 >= 0)
7916 return i915_enable_rc6;
7917
7918 /*
7919 * Disable RC6 on Ironlake
7920 */
7921 if (INTEL_INFO(dev)->gen == 5)
7922 return 0;
7923
7924 /*
7925 * Disable rc6 on Sandybridge
7926 */
7927 if (INTEL_INFO(dev)->gen == 6) {
7928 DRM_DEBUG_DRIVER("Sandybridge: RC6 disabled\n");
7929 return 0;
7930 }
7931 DRM_DEBUG_DRIVER("RC6 enabled\n");
7932 return 1;
7933}
7934
7890void gen6_enable_rps(struct drm_i915_private *dev_priv) 7935void gen6_enable_rps(struct drm_i915_private *dev_priv)
7891{ 7936{
7892 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 7937 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
@@ -7923,7 +7968,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
7923 I915_WRITE(GEN6_RC6p_THRESHOLD, 100000); 7968 I915_WRITE(GEN6_RC6p_THRESHOLD, 100000);
7924 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 7969 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7925 7970
7926 if (i915_enable_rc6) 7971 if (intel_enable_rc6(dev_priv->dev))
7927 rc6_mask = GEN6_RC_CTL_RC6p_ENABLE | 7972 rc6_mask = GEN6_RC_CTL_RC6p_ENABLE |
7928 GEN6_RC_CTL_RC6_ENABLE; 7973 GEN6_RC_CTL_RC6_ENABLE;
7929 7974
@@ -8372,7 +8417,7 @@ void ironlake_enable_rc6(struct drm_device *dev)
8372 /* rc6 disabled by default due to repeated reports of hanging during 8417 /* rc6 disabled by default due to repeated reports of hanging during
8373 * boot and resume. 8418 * boot and resume.
8374 */ 8419 */
8375 if (!i915_enable_rc6) 8420 if (!intel_enable_rc6(dev))
8376 return; 8421 return;
8377 8422
8378 mutex_lock(&dev->struct_mutex); 8423 mutex_lock(&dev->struct_mutex);
@@ -8491,6 +8536,28 @@ static void intel_init_display(struct drm_device *dev)
8491 8536
8492 /* For FIFO watermark updates */ 8537 /* For FIFO watermark updates */
8493 if (HAS_PCH_SPLIT(dev)) { 8538 if (HAS_PCH_SPLIT(dev)) {
8539 dev_priv->display.force_wake_get = __gen6_gt_force_wake_get;
8540 dev_priv->display.force_wake_put = __gen6_gt_force_wake_put;
8541
8542 /* IVB configs may use multi-threaded forcewake */
8543 if (IS_IVYBRIDGE(dev)) {
8544 u32 ecobus;
8545
8546 mutex_lock(&dev->struct_mutex);
8547 __gen6_gt_force_wake_mt_get(dev_priv);
8548 ecobus = I915_READ(ECOBUS);
8549 __gen6_gt_force_wake_mt_put(dev_priv);
8550 mutex_unlock(&dev->struct_mutex);
8551
8552 if (ecobus & FORCEWAKE_MT_ENABLE) {
8553 DRM_DEBUG_KMS("Using MT version of forcewake\n");
8554 dev_priv->display.force_wake_get =
8555 __gen6_gt_force_wake_mt_get;
8556 dev_priv->display.force_wake_put =
8557 __gen6_gt_force_wake_mt_put;
8558 }
8559 }
8560
8494 if (HAS_PCH_IBX(dev)) 8561 if (HAS_PCH_IBX(dev))
8495 dev_priv->display.init_pch_clock_gating = ibx_init_clock_gating; 8562 dev_priv->display.init_pch_clock_gating = ibx_init_clock_gating;
8496 else if (HAS_PCH_CPT(dev)) 8563 else if (HAS_PCH_CPT(dev))
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 4d0358fad937..92b041b66e49 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -208,13 +208,15 @@ intel_dp_link_clock(uint8_t link_bw)
208 */ 208 */
209 209
210static int 210static int
211intel_dp_link_required(struct intel_dp *intel_dp, int pixel_clock) 211intel_dp_link_required(struct intel_dp *intel_dp, int pixel_clock, int check_bpp)
212{ 212{
213 struct drm_crtc *crtc = intel_dp->base.base.crtc; 213 struct drm_crtc *crtc = intel_dp->base.base.crtc;
214 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 214 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
215 int bpp = 24; 215 int bpp = 24;
216 216
217 if (intel_crtc) 217 if (check_bpp)
218 bpp = check_bpp;
219 else if (intel_crtc)
218 bpp = intel_crtc->bpp; 220 bpp = intel_crtc->bpp;
219 221
220 return (pixel_clock * bpp + 9) / 10; 222 return (pixel_clock * bpp + 9) / 10;
@@ -233,6 +235,7 @@ intel_dp_mode_valid(struct drm_connector *connector,
233 struct intel_dp *intel_dp = intel_attached_dp(connector); 235 struct intel_dp *intel_dp = intel_attached_dp(connector);
234 int max_link_clock = intel_dp_link_clock(intel_dp_max_link_bw(intel_dp)); 236 int max_link_clock = intel_dp_link_clock(intel_dp_max_link_bw(intel_dp));
235 int max_lanes = intel_dp_max_lane_count(intel_dp); 237 int max_lanes = intel_dp_max_lane_count(intel_dp);
238 int max_rate, mode_rate;
236 239
237 if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) { 240 if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) {
238 if (mode->hdisplay > intel_dp->panel_fixed_mode->hdisplay) 241 if (mode->hdisplay > intel_dp->panel_fixed_mode->hdisplay)
@@ -242,9 +245,17 @@ intel_dp_mode_valid(struct drm_connector *connector,
242 return MODE_PANEL; 245 return MODE_PANEL;
243 } 246 }
244 247
245 if (intel_dp_link_required(intel_dp, mode->clock) 248 mode_rate = intel_dp_link_required(intel_dp, mode->clock, 0);
246 > intel_dp_max_data_rate(max_link_clock, max_lanes)) 249 max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
247 return MODE_CLOCK_HIGH; 250
251 if (mode_rate > max_rate) {
252 mode_rate = intel_dp_link_required(intel_dp,
253 mode->clock, 18);
254 if (mode_rate > max_rate)
255 return MODE_CLOCK_HIGH;
256 else
257 mode->private_flags |= INTEL_MODE_DP_FORCE_6BPC;
258 }
248 259
249 if (mode->clock < 10000) 260 if (mode->clock < 10000)
250 return MODE_CLOCK_LOW; 261 return MODE_CLOCK_LOW;
@@ -362,8 +373,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
362 * clock divider. 373 * clock divider.
363 */ 374 */
364 if (is_cpu_edp(intel_dp)) { 375 if (is_cpu_edp(intel_dp)) {
365 if (IS_GEN6(dev)) 376 if (IS_GEN6(dev) || IS_GEN7(dev))
366 aux_clock_divider = 200; /* SNB eDP input clock at 400Mhz */ 377 aux_clock_divider = 200; /* SNB & IVB eDP input clock at 400Mhz */
367 else 378 else
368 aux_clock_divider = 225; /* eDP input clock at 450Mhz */ 379 aux_clock_divider = 225; /* eDP input clock at 450Mhz */
369 } else if (HAS_PCH_SPLIT(dev)) 380 } else if (HAS_PCH_SPLIT(dev))
@@ -672,6 +683,7 @@ intel_dp_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
672 int lane_count, clock; 683 int lane_count, clock;
673 int max_lane_count = intel_dp_max_lane_count(intel_dp); 684 int max_lane_count = intel_dp_max_lane_count(intel_dp);
674 int max_clock = intel_dp_max_link_bw(intel_dp) == DP_LINK_BW_2_7 ? 1 : 0; 685 int max_clock = intel_dp_max_link_bw(intel_dp) == DP_LINK_BW_2_7 ? 1 : 0;
686 int bpp = mode->private_flags & INTEL_MODE_DP_FORCE_6BPC ? 18 : 0;
675 static int bws[2] = { DP_LINK_BW_1_62, DP_LINK_BW_2_7 }; 687 static int bws[2] = { DP_LINK_BW_1_62, DP_LINK_BW_2_7 };
676 688
677 if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) { 689 if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) {
@@ -689,7 +701,7 @@ intel_dp_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
689 for (clock = 0; clock <= max_clock; clock++) { 701 for (clock = 0; clock <= max_clock; clock++) {
690 int link_avail = intel_dp_max_data_rate(intel_dp_link_clock(bws[clock]), lane_count); 702 int link_avail = intel_dp_max_data_rate(intel_dp_link_clock(bws[clock]), lane_count);
691 703
692 if (intel_dp_link_required(intel_dp, mode->clock) 704 if (intel_dp_link_required(intel_dp, mode->clock, bpp)
693 <= link_avail) { 705 <= link_avail) {
694 intel_dp->link_bw = bws[clock]; 706 intel_dp->link_bw = bws[clock];
695 intel_dp->lane_count = lane_count; 707 intel_dp->lane_count = lane_count;
@@ -817,10 +829,11 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
817 } 829 }
818 830
819 /* 831 /*
820 * There are three kinds of DP registers: 832 * There are four kinds of DP registers:
821 * 833 *
822 * IBX PCH 834 * IBX PCH
823 * CPU 835 * SNB CPU
836 * IVB CPU
824 * CPT PCH 837 * CPT PCH
825 * 838 *
826 * IBX PCH and CPU are the same for almost everything, 839 * IBX PCH and CPU are the same for almost everything,
@@ -873,7 +886,25 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
873 886
874 /* Split out the IBX/CPU vs CPT settings */ 887 /* Split out the IBX/CPU vs CPT settings */
875 888
876 if (!HAS_PCH_CPT(dev) || is_cpu_edp(intel_dp)) { 889 if (is_cpu_edp(intel_dp) && IS_GEN7(dev)) {
890 if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
891 intel_dp->DP |= DP_SYNC_HS_HIGH;
892 if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
893 intel_dp->DP |= DP_SYNC_VS_HIGH;
894 intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
895
896 if (intel_dp->link_configuration[1] & DP_LANE_COUNT_ENHANCED_FRAME_EN)
897 intel_dp->DP |= DP_ENHANCED_FRAMING;
898
899 intel_dp->DP |= intel_crtc->pipe << 29;
900
901 /* don't miss out required setting for eDP */
902 intel_dp->DP |= DP_PLL_ENABLE;
903 if (adjusted_mode->clock < 200000)
904 intel_dp->DP |= DP_PLL_FREQ_160MHZ;
905 else
906 intel_dp->DP |= DP_PLL_FREQ_270MHZ;
907 } else if (!HAS_PCH_CPT(dev) || is_cpu_edp(intel_dp)) {
877 intel_dp->DP |= intel_dp->color_range; 908 intel_dp->DP |= intel_dp->color_range;
878 909
879 if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) 910 if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
@@ -1375,34 +1406,59 @@ static char *link_train_names[] = {
1375 * These are source-specific values; current Intel hardware supports 1406 * These are source-specific values; current Intel hardware supports
1376 * a maximum voltage of 800mV and a maximum pre-emphasis of 6dB 1407 * a maximum voltage of 800mV and a maximum pre-emphasis of 6dB
1377 */ 1408 */
1378#define I830_DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_800
1379#define I830_DP_VOLTAGE_MAX_CPT DP_TRAIN_VOLTAGE_SWING_1200
1380 1409
1381static uint8_t 1410static uint8_t
1382intel_dp_pre_emphasis_max(uint8_t voltage_swing) 1411intel_dp_voltage_max(struct intel_dp *intel_dp)
1383{ 1412{
1384 switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { 1413 struct drm_device *dev = intel_dp->base.base.dev;
1385 case DP_TRAIN_VOLTAGE_SWING_400: 1414
1386 return DP_TRAIN_PRE_EMPHASIS_6; 1415 if (IS_GEN7(dev) && is_cpu_edp(intel_dp))
1387 case DP_TRAIN_VOLTAGE_SWING_600: 1416 return DP_TRAIN_VOLTAGE_SWING_800;
1388 return DP_TRAIN_PRE_EMPHASIS_6; 1417 else if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp))
1389 case DP_TRAIN_VOLTAGE_SWING_800: 1418 return DP_TRAIN_VOLTAGE_SWING_1200;
1390 return DP_TRAIN_PRE_EMPHASIS_3_5; 1419 else
1391 case DP_TRAIN_VOLTAGE_SWING_1200: 1420 return DP_TRAIN_VOLTAGE_SWING_800;
1392 default: 1421}
1393 return DP_TRAIN_PRE_EMPHASIS_0; 1422
1423static uint8_t
1424intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing)
1425{
1426 struct drm_device *dev = intel_dp->base.base.dev;
1427
1428 if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) {
1429 switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) {
1430 case DP_TRAIN_VOLTAGE_SWING_400:
1431 return DP_TRAIN_PRE_EMPHASIS_6;
1432 case DP_TRAIN_VOLTAGE_SWING_600:
1433 case DP_TRAIN_VOLTAGE_SWING_800:
1434 return DP_TRAIN_PRE_EMPHASIS_3_5;
1435 default:
1436 return DP_TRAIN_PRE_EMPHASIS_0;
1437 }
1438 } else {
1439 switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) {
1440 case DP_TRAIN_VOLTAGE_SWING_400:
1441 return DP_TRAIN_PRE_EMPHASIS_6;
1442 case DP_TRAIN_VOLTAGE_SWING_600:
1443 return DP_TRAIN_PRE_EMPHASIS_6;
1444 case DP_TRAIN_VOLTAGE_SWING_800:
1445 return DP_TRAIN_PRE_EMPHASIS_3_5;
1446 case DP_TRAIN_VOLTAGE_SWING_1200:
1447 default:
1448 return DP_TRAIN_PRE_EMPHASIS_0;
1449 }
1394 } 1450 }
1395} 1451}
1396 1452
1397static void 1453static void
1398intel_get_adjust_train(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]) 1454intel_get_adjust_train(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE])
1399{ 1455{
1400 struct drm_device *dev = intel_dp->base.base.dev;
1401 uint8_t v = 0; 1456 uint8_t v = 0;
1402 uint8_t p = 0; 1457 uint8_t p = 0;
1403 int lane; 1458 int lane;
1404 uint8_t *adjust_request = link_status + (DP_ADJUST_REQUEST_LANE0_1 - DP_LANE0_1_STATUS); 1459 uint8_t *adjust_request = link_status + (DP_ADJUST_REQUEST_LANE0_1 - DP_LANE0_1_STATUS);
1405 int voltage_max; 1460 uint8_t voltage_max;
1461 uint8_t preemph_max;
1406 1462
1407 for (lane = 0; lane < intel_dp->lane_count; lane++) { 1463 for (lane = 0; lane < intel_dp->lane_count; lane++) {
1408 uint8_t this_v = intel_get_adjust_request_voltage(adjust_request, lane); 1464 uint8_t this_v = intel_get_adjust_request_voltage(adjust_request, lane);
@@ -1414,15 +1470,13 @@ intel_get_adjust_train(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_ST
1414 p = this_p; 1470 p = this_p;
1415 } 1471 }
1416 1472
1417 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) 1473 voltage_max = intel_dp_voltage_max(intel_dp);
1418 voltage_max = I830_DP_VOLTAGE_MAX_CPT;
1419 else
1420 voltage_max = I830_DP_VOLTAGE_MAX;
1421 if (v >= voltage_max) 1474 if (v >= voltage_max)
1422 v = voltage_max | DP_TRAIN_MAX_SWING_REACHED; 1475 v = voltage_max | DP_TRAIN_MAX_SWING_REACHED;
1423 1476
1424 if (p >= intel_dp_pre_emphasis_max(v)) 1477 preemph_max = intel_dp_pre_emphasis_max(intel_dp, v);
1425 p = intel_dp_pre_emphasis_max(v) | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; 1478 if (p >= preemph_max)
1479 p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
1426 1480
1427 for (lane = 0; lane < 4; lane++) 1481 for (lane = 0; lane < 4; lane++)
1428 intel_dp->train_set[lane] = v | p; 1482 intel_dp->train_set[lane] = v | p;
@@ -1494,6 +1548,37 @@ intel_gen6_edp_signal_levels(uint8_t train_set)
1494 } 1548 }
1495} 1549}
1496 1550
1551/* Gen7's DP voltage swing and pre-emphasis control */
1552static uint32_t
1553intel_gen7_edp_signal_levels(uint8_t train_set)
1554{
1555 int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
1556 DP_TRAIN_PRE_EMPHASIS_MASK);
1557 switch (signal_levels) {
1558 case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_0:
1559 return EDP_LINK_TRAIN_400MV_0DB_IVB;
1560 case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_3_5:
1561 return EDP_LINK_TRAIN_400MV_3_5DB_IVB;
1562 case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_6:
1563 return EDP_LINK_TRAIN_400MV_6DB_IVB;
1564
1565 case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_0:
1566 return EDP_LINK_TRAIN_600MV_0DB_IVB;
1567 case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_3_5:
1568 return EDP_LINK_TRAIN_600MV_3_5DB_IVB;
1569
1570 case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_0:
1571 return EDP_LINK_TRAIN_800MV_0DB_IVB;
1572 case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_3_5:
1573 return EDP_LINK_TRAIN_800MV_3_5DB_IVB;
1574
1575 default:
1576 DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level:"
1577 "0x%x\n", signal_levels);
1578 return EDP_LINK_TRAIN_500MV_0DB_IVB;
1579 }
1580}
1581
1497static uint8_t 1582static uint8_t
1498intel_get_lane_status(uint8_t link_status[DP_LINK_STATUS_SIZE], 1583intel_get_lane_status(uint8_t link_status[DP_LINK_STATUS_SIZE],
1499 int lane) 1584 int lane)
@@ -1599,7 +1684,8 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
1599 DP_LINK_CONFIGURATION_SIZE); 1684 DP_LINK_CONFIGURATION_SIZE);
1600 1685
1601 DP |= DP_PORT_EN; 1686 DP |= DP_PORT_EN;
1602 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) 1687
1688 if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp)))
1603 DP &= ~DP_LINK_TRAIN_MASK_CPT; 1689 DP &= ~DP_LINK_TRAIN_MASK_CPT;
1604 else 1690 else
1605 DP &= ~DP_LINK_TRAIN_MASK; 1691 DP &= ~DP_LINK_TRAIN_MASK;
@@ -1613,7 +1699,11 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
1613 uint8_t link_status[DP_LINK_STATUS_SIZE]; 1699 uint8_t link_status[DP_LINK_STATUS_SIZE];
1614 uint32_t signal_levels; 1700 uint32_t signal_levels;
1615 1701
1616 if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) { 1702
1703 if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) {
1704 signal_levels = intel_gen7_edp_signal_levels(intel_dp->train_set[0]);
1705 DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB) | signal_levels;
1706 } else if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) {
1617 signal_levels = intel_gen6_edp_signal_levels(intel_dp->train_set[0]); 1707 signal_levels = intel_gen6_edp_signal_levels(intel_dp->train_set[0]);
1618 DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB) | signal_levels; 1708 DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB) | signal_levels;
1619 } else { 1709 } else {
@@ -1622,7 +1712,7 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
1622 DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels; 1712 DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels;
1623 } 1713 }
1624 1714
1625 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) 1715 if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp)))
1626 reg = DP | DP_LINK_TRAIN_PAT_1_CPT; 1716 reg = DP | DP_LINK_TRAIN_PAT_1_CPT;
1627 else 1717 else
1628 reg = DP | DP_LINK_TRAIN_PAT_1; 1718 reg = DP | DP_LINK_TRAIN_PAT_1;
@@ -1703,7 +1793,10 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
1703 break; 1793 break;
1704 } 1794 }
1705 1795
1706 if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) { 1796 if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) {
1797 signal_levels = intel_gen7_edp_signal_levels(intel_dp->train_set[0]);
1798 DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB) | signal_levels;
1799 } else if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) {
1707 signal_levels = intel_gen6_edp_signal_levels(intel_dp->train_set[0]); 1800 signal_levels = intel_gen6_edp_signal_levels(intel_dp->train_set[0]);
1708 DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB) | signal_levels; 1801 DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_SNB) | signal_levels;
1709 } else { 1802 } else {
@@ -1711,7 +1804,7 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
1711 DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels; 1804 DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels;
1712 } 1805 }
1713 1806
1714 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) 1807 if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp)))
1715 reg = DP | DP_LINK_TRAIN_PAT_2_CPT; 1808 reg = DP | DP_LINK_TRAIN_PAT_2_CPT;
1716 else 1809 else
1717 reg = DP | DP_LINK_TRAIN_PAT_2; 1810 reg = DP | DP_LINK_TRAIN_PAT_2;
@@ -1752,7 +1845,7 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
1752 ++tries; 1845 ++tries;
1753 } 1846 }
1754 1847
1755 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) 1848 if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp)))
1756 reg = DP | DP_LINK_TRAIN_OFF_CPT; 1849 reg = DP | DP_LINK_TRAIN_OFF_CPT;
1757 else 1850 else
1758 reg = DP | DP_LINK_TRAIN_OFF; 1851 reg = DP | DP_LINK_TRAIN_OFF;
@@ -1782,7 +1875,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
1782 udelay(100); 1875 udelay(100);
1783 } 1876 }
1784 1877
1785 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) { 1878 if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp))) {
1786 DP &= ~DP_LINK_TRAIN_MASK_CPT; 1879 DP &= ~DP_LINK_TRAIN_MASK_CPT;
1787 I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT); 1880 I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT);
1788 } else { 1881 } else {
@@ -1794,7 +1887,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
1794 msleep(17); 1887 msleep(17);
1795 1888
1796 if (is_edp(intel_dp)) { 1889 if (is_edp(intel_dp)) {
1797 if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) 1890 if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || !is_cpu_edp(intel_dp)))
1798 DP |= DP_LINK_TRAIN_OFF_CPT; 1891 DP |= DP_LINK_TRAIN_OFF_CPT;
1799 else 1892 else
1800 DP |= DP_LINK_TRAIN_OFF; 1893 DP |= DP_LINK_TRAIN_OFF;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index bd9a604b73da..a1b4343814e8 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -110,6 +110,7 @@
110/* drm_display_mode->private_flags */ 110/* drm_display_mode->private_flags */
111#define INTEL_MODE_PIXEL_MULTIPLIER_SHIFT (0x0) 111#define INTEL_MODE_PIXEL_MULTIPLIER_SHIFT (0x0)
112#define INTEL_MODE_PIXEL_MULTIPLIER_MASK (0xf << INTEL_MODE_PIXEL_MULTIPLIER_SHIFT) 112#define INTEL_MODE_PIXEL_MULTIPLIER_MASK (0xf << INTEL_MODE_PIXEL_MULTIPLIER_SHIFT)
113#define INTEL_MODE_DP_FORCE_6BPC (0x10)
113 114
114static inline void 115static inline void
115intel_mode_set_pixel_multiplier(struct drm_display_mode *mode, 116intel_mode_set_pixel_multiplier(struct drm_display_mode *mode,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 42f165a520de..e44191132ac4 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -715,6 +715,14 @@ static const struct dmi_system_id intel_no_lvds[] = {
715 DMI_MATCH(DMI_PRODUCT_NAME, "EB1007"), 715 DMI_MATCH(DMI_PRODUCT_NAME, "EB1007"),
716 }, 716 },
717 }, 717 },
718 {
719 .callback = intel_no_lvds_dmi_callback,
720 .ident = "Asus AT5NM10T-I",
721 .matches = {
722 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
723 DMI_MATCH(DMI_BOARD_NAME, "AT5NM10T-I"),
724 },
725 },
718 726
719 { } /* terminating entry */ 727 { } /* terminating entry */
720}; 728};
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 21f60b7d69a3..04d79fd1dc9d 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -178,13 +178,10 @@ u32 intel_panel_get_max_backlight(struct drm_device *dev)
178 if (HAS_PCH_SPLIT(dev)) { 178 if (HAS_PCH_SPLIT(dev)) {
179 max >>= 16; 179 max >>= 16;
180 } else { 180 } else {
181 if (IS_PINEVIEW(dev)) { 181 if (INTEL_INFO(dev)->gen < 4)
182 max >>= 17; 182 max >>= 17;
183 } else { 183 else
184 max >>= 16; 184 max >>= 16;
185 if (INTEL_INFO(dev)->gen < 4)
186 max &= ~1;
187 }
188 185
189 if (is_backlight_combination_mode(dev)) 186 if (is_backlight_combination_mode(dev))
190 max *= 0xff; 187 max *= 0xff;
@@ -203,13 +200,12 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
203 val = I915_READ(BLC_PWM_CPU_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; 200 val = I915_READ(BLC_PWM_CPU_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
204 } else { 201 } else {
205 val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; 202 val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
206 if (IS_PINEVIEW(dev)) 203 if (INTEL_INFO(dev)->gen < 4)
207 val >>= 1; 204 val >>= 1;
208 205
209 if (is_backlight_combination_mode(dev)) { 206 if (is_backlight_combination_mode(dev)) {
210 u8 lbpc; 207 u8 lbpc;
211 208
212 val &= ~1;
213 pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc); 209 pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc);
214 val *= lbpc; 210 val *= lbpc;
215 } 211 }
@@ -246,11 +242,9 @@ static void intel_panel_actually_set_backlight(struct drm_device *dev, u32 level
246 } 242 }
247 243
248 tmp = I915_READ(BLC_PWM_CTL); 244 tmp = I915_READ(BLC_PWM_CTL);
249 if (IS_PINEVIEW(dev)) { 245 if (INTEL_INFO(dev)->gen < 4)
250 tmp &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
251 level <<= 1; 246 level <<= 1;
252 } else 247 tmp &= ~BACKLIGHT_DUTY_CYCLE_MASK;
253 tmp &= ~BACKLIGHT_DUTY_CYCLE_MASK;
254 I915_WRITE(BLC_PWM_CTL, tmp | level); 248 I915_WRITE(BLC_PWM_CTL, tmp | level);
255} 249}
256 250
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 3003fb25aefd..f7b9268df266 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -50,6 +50,7 @@
50#define IS_TMDS(c) (c->output_flag & SDVO_TMDS_MASK) 50#define IS_TMDS(c) (c->output_flag & SDVO_TMDS_MASK)
51#define IS_LVDS(c) (c->output_flag & SDVO_LVDS_MASK) 51#define IS_LVDS(c) (c->output_flag & SDVO_LVDS_MASK)
52#define IS_TV_OR_LVDS(c) (c->output_flag & (SDVO_TV_MASK | SDVO_LVDS_MASK)) 52#define IS_TV_OR_LVDS(c) (c->output_flag & (SDVO_TV_MASK | SDVO_LVDS_MASK))
53#define IS_DIGITAL(c) (c->output_flag & (SDVO_TMDS_MASK | SDVO_LVDS_MASK))
53 54
54 55
55static const char *tv_format_names[] = { 56static const char *tv_format_names[] = {
@@ -1086,8 +1087,12 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder,
1086 } 1087 }
1087 sdvox |= (9 << 19) | SDVO_BORDER_ENABLE; 1088 sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
1088 } 1089 }
1089 if (intel_crtc->pipe == 1) 1090
1090 sdvox |= SDVO_PIPE_B_SELECT; 1091 if (INTEL_PCH_TYPE(dev) >= PCH_CPT)
1092 sdvox |= TRANSCODER_CPT(intel_crtc->pipe);
1093 else
1094 sdvox |= TRANSCODER(intel_crtc->pipe);
1095
1091 if (intel_sdvo->has_hdmi_audio) 1096 if (intel_sdvo->has_hdmi_audio)
1092 sdvox |= SDVO_AUDIO_ENABLE; 1097 sdvox |= SDVO_AUDIO_ENABLE;
1093 1098
@@ -1314,6 +1319,18 @@ intel_sdvo_tmds_sink_detect(struct drm_connector *connector)
1314 return status; 1319 return status;
1315} 1320}
1316 1321
1322static bool
1323intel_sdvo_connector_matches_edid(struct intel_sdvo_connector *sdvo,
1324 struct edid *edid)
1325{
1326 bool monitor_is_digital = !!(edid->input & DRM_EDID_INPUT_DIGITAL);
1327 bool connector_is_digital = !!IS_DIGITAL(sdvo);
1328
1329 DRM_DEBUG_KMS("connector_is_digital? %d, monitor_is_digital? %d\n",
1330 connector_is_digital, monitor_is_digital);
1331 return connector_is_digital == monitor_is_digital;
1332}
1333
1317static enum drm_connector_status 1334static enum drm_connector_status
1318intel_sdvo_detect(struct drm_connector *connector, bool force) 1335intel_sdvo_detect(struct drm_connector *connector, bool force)
1319{ 1336{
@@ -1358,10 +1375,12 @@ intel_sdvo_detect(struct drm_connector *connector, bool force)
1358 if (edid == NULL) 1375 if (edid == NULL)
1359 edid = intel_sdvo_get_analog_edid(connector); 1376 edid = intel_sdvo_get_analog_edid(connector);
1360 if (edid != NULL) { 1377 if (edid != NULL) {
1361 if (edid->input & DRM_EDID_INPUT_DIGITAL) 1378 if (intel_sdvo_connector_matches_edid(intel_sdvo_connector,
1362 ret = connector_status_disconnected; 1379 edid))
1363 else
1364 ret = connector_status_connected; 1380 ret = connector_status_connected;
1381 else
1382 ret = connector_status_disconnected;
1383
1365 connector->display_info.raw_edid = NULL; 1384 connector->display_info.raw_edid = NULL;
1366 kfree(edid); 1385 kfree(edid);
1367 } else 1386 } else
@@ -1402,11 +1421,8 @@ static void intel_sdvo_get_ddc_modes(struct drm_connector *connector)
1402 edid = intel_sdvo_get_analog_edid(connector); 1421 edid = intel_sdvo_get_analog_edid(connector);
1403 1422
1404 if (edid != NULL) { 1423 if (edid != NULL) {
1405 struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); 1424 if (intel_sdvo_connector_matches_edid(to_intel_sdvo_connector(connector),
1406 bool monitor_is_digital = !!(edid->input & DRM_EDID_INPUT_DIGITAL); 1425 edid)) {
1407 bool connector_is_digital = !!IS_TMDS(intel_sdvo_connector);
1408
1409 if (connector_is_digital == monitor_is_digital) {
1410 drm_mode_connector_update_edid_property(connector, edid); 1426 drm_mode_connector_update_edid_property(connector, edid);
1411 drm_add_edid_modes(connector, edid); 1427 drm_add_edid_modes(connector, edid);
1412 } 1428 }
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 5e00d1670aa9..92c9628c572d 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3276,6 +3276,18 @@ int evergreen_init(struct radeon_device *rdev)
3276 rdev->accel_working = false; 3276 rdev->accel_working = false;
3277 } 3277 }
3278 } 3278 }
3279
3280 /* Don't start up if the MC ucode is missing on BTC parts.
3281 * The default clocks and voltages before the MC ucode
3282 * is loaded are not suffient for advanced operations.
3283 */
3284 if (ASIC_IS_DCE5(rdev)) {
3285 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
3286 DRM_ERROR("radeon: MC ucode required for NI+.\n");
3287 return -EINVAL;
3288 }
3289 }
3290
3279 return 0; 3291 return 0;
3280} 3292}
3281 3293
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index d24baf30efcb..5082d17d14dc 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2560,7 +2560,11 @@ void radeon_atombios_get_power_modes(struct radeon_device *rdev)
2560 2560
2561 rdev->pm.current_power_state_index = rdev->pm.default_power_state_index; 2561 rdev->pm.current_power_state_index = rdev->pm.default_power_state_index;
2562 rdev->pm.current_clock_mode_index = 0; 2562 rdev->pm.current_clock_mode_index = 0;
2563 rdev->pm.current_vddc = rdev->pm.power_state[rdev->pm.default_power_state_index].clock_info[0].voltage.voltage; 2563 if (rdev->pm.default_power_state_index >= 0)
2564 rdev->pm.current_vddc =
2565 rdev->pm.power_state[rdev->pm.default_power_state_index].clock_info[0].voltage.voltage;
2566 else
2567 rdev->pm.current_vddc = 0;
2564} 2568}
2565 2569
2566void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable) 2570void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 8cca91a93bde..dc279706ca70 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -390,6 +390,11 @@ extern int vmw_context_check(struct vmw_private *dev_priv,
390 struct ttm_object_file *tfile, 390 struct ttm_object_file *tfile,
391 int id, 391 int id,
392 struct vmw_resource **p_res); 392 struct vmw_resource **p_res);
393extern int vmw_user_lookup_handle(struct vmw_private *dev_priv,
394 struct ttm_object_file *tfile,
395 uint32_t handle,
396 struct vmw_surface **out_surf,
397 struct vmw_dma_buffer **out_buf);
393extern void vmw_surface_res_free(struct vmw_resource *res); 398extern void vmw_surface_res_free(struct vmw_resource *res);
394extern int vmw_surface_init(struct vmw_private *dev_priv, 399extern int vmw_surface_init(struct vmw_private *dev_priv,
395 struct vmw_surface *srf, 400 struct vmw_surface *srf,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 03bbc2a6f9a7..a0c2f12b1e1b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -33,6 +33,7 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
33{ 33{
34 __le32 __iomem *fifo_mem = dev_priv->mmio_virt; 34 __le32 __iomem *fifo_mem = dev_priv->mmio_virt;
35 uint32_t fifo_min, hwversion; 35 uint32_t fifo_min, hwversion;
36 const struct vmw_fifo_state *fifo = &dev_priv->fifo;
36 37
37 if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO)) 38 if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO))
38 return false; 39 return false;
@@ -41,7 +42,12 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
41 if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int)) 42 if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int))
42 return false; 43 return false;
43 44
44 hwversion = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION); 45 hwversion = ioread32(fifo_mem +
46 ((fifo->capabilities &
47 SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
48 SVGA_FIFO_3D_HWVERSION_REVISED :
49 SVGA_FIFO_3D_HWVERSION));
50
45 if (hwversion == 0) 51 if (hwversion == 0)
46 return false; 52 return false;
47 53
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 5ff561d4e0b4..66917c6c3813 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -58,8 +58,14 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
58 case DRM_VMW_PARAM_FIFO_HW_VERSION: 58 case DRM_VMW_PARAM_FIFO_HW_VERSION:
59 { 59 {
60 __le32 __iomem *fifo_mem = dev_priv->mmio_virt; 60 __le32 __iomem *fifo_mem = dev_priv->mmio_virt;
61 61 const struct vmw_fifo_state *fifo = &dev_priv->fifo;
62 param->value = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION); 62
63 param->value =
64 ioread32(fifo_mem +
65 ((fifo->capabilities &
66 SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
67 SVGA_FIFO_3D_HWVERSION_REVISED :
68 SVGA_FIFO_3D_HWVERSION));
63 break; 69 break;
64 } 70 }
65 default: 71 default:
@@ -166,13 +172,7 @@ int vmw_present_ioctl(struct drm_device *dev, void *data,
166 ret = -EINVAL; 172 ret = -EINVAL;
167 goto out_no_fb; 173 goto out_no_fb;
168 } 174 }
169
170 vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj)); 175 vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
171 if (!vfb->dmabuf) {
172 DRM_ERROR("Framebuffer not dmabuf backed.\n");
173 ret = -EINVAL;
174 goto out_no_fb;
175 }
176 176
177 ret = ttm_read_lock(&vmaster->lock, true); 177 ret = ttm_read_lock(&vmaster->lock, true);
178 if (unlikely(ret != 0)) 178 if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 37d40545ed77..f94b33ae2215 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -31,6 +31,44 @@
31/* Might need a hrtimer here? */ 31/* Might need a hrtimer here? */
32#define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1) 32#define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
33 33
34
35struct vmw_clip_rect {
36 int x1, x2, y1, y2;
37};
38
39/**
40 * Clip @num_rects number of @rects against @clip storing the
41 * results in @out_rects and the number of passed rects in @out_num.
42 */
43void vmw_clip_cliprects(struct drm_clip_rect *rects,
44 int num_rects,
45 struct vmw_clip_rect clip,
46 SVGASignedRect *out_rects,
47 int *out_num)
48{
49 int i, k;
50
51 for (i = 0, k = 0; i < num_rects; i++) {
52 int x1 = max_t(int, clip.x1, rects[i].x1);
53 int y1 = max_t(int, clip.y1, rects[i].y1);
54 int x2 = min_t(int, clip.x2, rects[i].x2);
55 int y2 = min_t(int, clip.y2, rects[i].y2);
56
57 if (x1 >= x2)
58 continue;
59 if (y1 >= y2)
60 continue;
61
62 out_rects[k].left = x1;
63 out_rects[k].top = y1;
64 out_rects[k].right = x2;
65 out_rects[k].bottom = y2;
66 k++;
67 }
68
69 *out_num = k;
70}
71
34void vmw_display_unit_cleanup(struct vmw_display_unit *du) 72void vmw_display_unit_cleanup(struct vmw_display_unit *du)
35{ 73{
36 if (du->cursor_surface) 74 if (du->cursor_surface)
@@ -82,6 +120,43 @@ int vmw_cursor_update_image(struct vmw_private *dev_priv,
82 return 0; 120 return 0;
83} 121}
84 122
123int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
124 struct vmw_dma_buffer *dmabuf,
125 u32 width, u32 height,
126 u32 hotspotX, u32 hotspotY)
127{
128 struct ttm_bo_kmap_obj map;
129 unsigned long kmap_offset;
130 unsigned long kmap_num;
131 void *virtual;
132 bool dummy;
133 int ret;
134
135 kmap_offset = 0;
136 kmap_num = (width*height*4 + PAGE_SIZE - 1) >> PAGE_SHIFT;
137
138 ret = ttm_bo_reserve(&dmabuf->base, true, false, false, 0);
139 if (unlikely(ret != 0)) {
140 DRM_ERROR("reserve failed\n");
141 return -EINVAL;
142 }
143
144 ret = ttm_bo_kmap(&dmabuf->base, kmap_offset, kmap_num, &map);
145 if (unlikely(ret != 0))
146 goto err_unreserve;
147
148 virtual = ttm_kmap_obj_virtual(&map, &dummy);
149 ret = vmw_cursor_update_image(dev_priv, virtual, width, height,
150 hotspotX, hotspotY);
151
152 ttm_bo_kunmap(&map);
153err_unreserve:
154 ttm_bo_unreserve(&dmabuf->base);
155
156 return ret;
157}
158
159
85void vmw_cursor_update_position(struct vmw_private *dev_priv, 160void vmw_cursor_update_position(struct vmw_private *dev_priv,
86 bool show, int x, int y) 161 bool show, int x, int y)
87{ 162{
@@ -110,24 +185,21 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
110 return -EINVAL; 185 return -EINVAL;
111 186
112 if (handle) { 187 if (handle) {
113 ret = vmw_user_surface_lookup_handle(dev_priv, tfile, 188 ret = vmw_user_lookup_handle(dev_priv, tfile,
114 handle, &surface); 189 handle, &surface, &dmabuf);
115 if (!ret) { 190 if (ret) {
116 if (!surface->snooper.image) { 191 DRM_ERROR("failed to find surface or dmabuf: %i\n", ret);
117 DRM_ERROR("surface not suitable for cursor\n"); 192 return -EINVAL;
118 vmw_surface_unreference(&surface);
119 return -EINVAL;
120 }
121 } else {
122 ret = vmw_user_dmabuf_lookup(tfile,
123 handle, &dmabuf);
124 if (ret) {
125 DRM_ERROR("failed to find surface or dmabuf: %i\n", ret);
126 return -EINVAL;
127 }
128 } 193 }
129 } 194 }
130 195
196 /* need to do this before taking down old image */
197 if (surface && !surface->snooper.image) {
198 DRM_ERROR("surface not suitable for cursor\n");
199 vmw_surface_unreference(&surface);
200 return -EINVAL;
201 }
202
131 /* takedown old cursor */ 203 /* takedown old cursor */
132 if (du->cursor_surface) { 204 if (du->cursor_surface) {
133 du->cursor_surface->snooper.crtc = NULL; 205 du->cursor_surface->snooper.crtc = NULL;
@@ -146,36 +218,11 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
146 vmw_cursor_update_image(dev_priv, surface->snooper.image, 218 vmw_cursor_update_image(dev_priv, surface->snooper.image,
147 64, 64, du->hotspot_x, du->hotspot_y); 219 64, 64, du->hotspot_x, du->hotspot_y);
148 } else if (dmabuf) { 220 } else if (dmabuf) {
149 struct ttm_bo_kmap_obj map;
150 unsigned long kmap_offset;
151 unsigned long kmap_num;
152 void *virtual;
153 bool dummy;
154
155 /* vmw_user_surface_lookup takes one reference */ 221 /* vmw_user_surface_lookup takes one reference */
156 du->cursor_dmabuf = dmabuf; 222 du->cursor_dmabuf = dmabuf;
157 223
158 kmap_offset = 0; 224 ret = vmw_cursor_update_dmabuf(dev_priv, dmabuf, width, height,
159 kmap_num = (64*64*4) >> PAGE_SHIFT; 225 du->hotspot_x, du->hotspot_y);
160
161 ret = ttm_bo_reserve(&dmabuf->base, true, false, false, 0);
162 if (unlikely(ret != 0)) {
163 DRM_ERROR("reserve failed\n");
164 return -EINVAL;
165 }
166
167 ret = ttm_bo_kmap(&dmabuf->base, kmap_offset, kmap_num, &map);
168 if (unlikely(ret != 0))
169 goto err_unreserve;
170
171 virtual = ttm_kmap_obj_virtual(&map, &dummy);
172 vmw_cursor_update_image(dev_priv, virtual, 64, 64,
173 du->hotspot_x, du->hotspot_y);
174
175 ttm_bo_kunmap(&map);
176err_unreserve:
177 ttm_bo_unreserve(&dmabuf->base);
178
179 } else { 226 } else {
180 vmw_cursor_update_position(dev_priv, false, 0, 0); 227 vmw_cursor_update_position(dev_priv, false, 0, 0);
181 return 0; 228 return 0;
@@ -377,8 +424,9 @@ static int do_surface_dirty_sou(struct vmw_private *dev_priv,
377 struct drm_clip_rect *clips, 424 struct drm_clip_rect *clips,
378 unsigned num_clips, int inc) 425 unsigned num_clips, int inc)
379{ 426{
380 struct drm_clip_rect *clips_ptr;
381 struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS]; 427 struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
428 struct drm_clip_rect *clips_ptr;
429 struct drm_clip_rect *tmp;
382 struct drm_crtc *crtc; 430 struct drm_crtc *crtc;
383 size_t fifo_size; 431 size_t fifo_size;
384 int i, num_units; 432 int i, num_units;
@@ -391,7 +439,6 @@ static int do_surface_dirty_sou(struct vmw_private *dev_priv,
391 } *cmd; 439 } *cmd;
392 SVGASignedRect *blits; 440 SVGASignedRect *blits;
393 441
394
395 num_units = 0; 442 num_units = 0;
396 list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, 443 list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list,
397 head) { 444 head) {
@@ -402,13 +449,24 @@ static int do_surface_dirty_sou(struct vmw_private *dev_priv,
402 449
403 BUG_ON(!clips || !num_clips); 450 BUG_ON(!clips || !num_clips);
404 451
452 tmp = kzalloc(sizeof(*tmp) * num_clips, GFP_KERNEL);
453 if (unlikely(tmp == NULL)) {
454 DRM_ERROR("Temporary cliprect memory alloc failed.\n");
455 return -ENOMEM;
456 }
457
405 fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips; 458 fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
406 cmd = kzalloc(fifo_size, GFP_KERNEL); 459 cmd = kzalloc(fifo_size, GFP_KERNEL);
407 if (unlikely(cmd == NULL)) { 460 if (unlikely(cmd == NULL)) {
408 DRM_ERROR("Temporary fifo memory alloc failed.\n"); 461 DRM_ERROR("Temporary fifo memory alloc failed.\n");
409 return -ENOMEM; 462 ret = -ENOMEM;
463 goto out_free_tmp;
410 } 464 }
411 465
466 /* setup blits pointer */
467 blits = (SVGASignedRect *)&cmd[1];
468
469 /* initial clip region */
412 left = clips->x1; 470 left = clips->x1;
413 right = clips->x2; 471 right = clips->x2;
414 top = clips->y1; 472 top = clips->y1;
@@ -434,45 +492,60 @@ static int do_surface_dirty_sou(struct vmw_private *dev_priv,
434 cmd->body.srcRect.bottom = bottom; 492 cmd->body.srcRect.bottom = bottom;
435 493
436 clips_ptr = clips; 494 clips_ptr = clips;
437 blits = (SVGASignedRect *)&cmd[1];
438 for (i = 0; i < num_clips; i++, clips_ptr += inc) { 495 for (i = 0; i < num_clips; i++, clips_ptr += inc) {
439 blits[i].left = clips_ptr->x1 - left; 496 tmp[i].x1 = clips_ptr->x1 - left;
440 blits[i].right = clips_ptr->x2 - left; 497 tmp[i].x2 = clips_ptr->x2 - left;
441 blits[i].top = clips_ptr->y1 - top; 498 tmp[i].y1 = clips_ptr->y1 - top;
442 blits[i].bottom = clips_ptr->y2 - top; 499 tmp[i].y2 = clips_ptr->y2 - top;
443 } 500 }
444 501
445 /* do per unit writing, reuse fifo for each */ 502 /* do per unit writing, reuse fifo for each */
446 for (i = 0; i < num_units; i++) { 503 for (i = 0; i < num_units; i++) {
447 struct vmw_display_unit *unit = units[i]; 504 struct vmw_display_unit *unit = units[i];
448 int clip_x1 = left - unit->crtc.x; 505 struct vmw_clip_rect clip;
449 int clip_y1 = top - unit->crtc.y; 506 int num;
450 int clip_x2 = right - unit->crtc.x; 507
451 int clip_y2 = bottom - unit->crtc.y; 508 clip.x1 = left - unit->crtc.x;
509 clip.y1 = top - unit->crtc.y;
510 clip.x2 = right - unit->crtc.x;
511 clip.y2 = bottom - unit->crtc.y;
452 512
453 /* skip any crtcs that misses the clip region */ 513 /* skip any crtcs that misses the clip region */
454 if (clip_x1 >= unit->crtc.mode.hdisplay || 514 if (clip.x1 >= unit->crtc.mode.hdisplay ||
455 clip_y1 >= unit->crtc.mode.vdisplay || 515 clip.y1 >= unit->crtc.mode.vdisplay ||
456 clip_x2 <= 0 || clip_y2 <= 0) 516 clip.x2 <= 0 || clip.y2 <= 0)
457 continue; 517 continue;
458 518
519 /*
520 * In order for the clip rects to be correctly scaled
521 * the src and dest rects needs to be the same size.
522 */
523 cmd->body.destRect.left = clip.x1;
524 cmd->body.destRect.right = clip.x2;
525 cmd->body.destRect.top = clip.y1;
526 cmd->body.destRect.bottom = clip.y2;
527
528 /* create a clip rect of the crtc in dest coords */
529 clip.x2 = unit->crtc.mode.hdisplay - clip.x1;
530 clip.y2 = unit->crtc.mode.vdisplay - clip.y1;
531 clip.x1 = 0 - clip.x1;
532 clip.y1 = 0 - clip.y1;
533
459 /* need to reset sid as it is changed by execbuf */ 534 /* need to reset sid as it is changed by execbuf */
460 cmd->body.srcImage.sid = cpu_to_le32(framebuffer->user_handle); 535 cmd->body.srcImage.sid = cpu_to_le32(framebuffer->user_handle);
461
462 cmd->body.destScreenId = unit->unit; 536 cmd->body.destScreenId = unit->unit;
463 537
464 /* 538 /* clip and write blits to cmd stream */
465 * The blit command is a lot more resilient then the 539 vmw_clip_cliprects(tmp, num_clips, clip, blits, &num);
466 * readback command when it comes to clip rects. So its
467 * okay to go out of bounds.
468 */
469 540
470 cmd->body.destRect.left = clip_x1; 541 /* if no cliprects hit skip this */
471 cmd->body.destRect.right = clip_x2; 542 if (num == 0)
472 cmd->body.destRect.top = clip_y1; 543 continue;
473 cmd->body.destRect.bottom = clip_y2;
474 544
475 545
546 /* recalculate package length */
547 fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num;
548 cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
476 ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd, 549 ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
477 fifo_size, 0, NULL); 550 fifo_size, 0, NULL);
478 551
@@ -480,7 +553,10 @@ static int do_surface_dirty_sou(struct vmw_private *dev_priv,
480 break; 553 break;
481 } 554 }
482 555
556
483 kfree(cmd); 557 kfree(cmd);
558out_free_tmp:
559 kfree(tmp);
484 560
485 return ret; 561 return ret;
486} 562}
@@ -556,6 +632,10 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
556 * Sanity checks. 632 * Sanity checks.
557 */ 633 */
558 634
635 /* Surface must be marked as a scanout. */
636 if (unlikely(!surface->scanout))
637 return -EINVAL;
638
559 if (unlikely(surface->mip_levels[0] != 1 || 639 if (unlikely(surface->mip_levels[0] != 1 ||
560 surface->num_sizes != 1 || 640 surface->num_sizes != 1 ||
561 surface->sizes[0].width < mode_cmd->width || 641 surface->sizes[0].width < mode_cmd->width ||
@@ -782,6 +862,7 @@ static int do_dmabuf_dirty_sou(struct drm_file *file_priv,
782 int clip_y1 = clips_ptr->y1 - unit->crtc.y; 862 int clip_y1 = clips_ptr->y1 - unit->crtc.y;
783 int clip_x2 = clips_ptr->x2 - unit->crtc.x; 863 int clip_x2 = clips_ptr->x2 - unit->crtc.x;
784 int clip_y2 = clips_ptr->y2 - unit->crtc.y; 864 int clip_y2 = clips_ptr->y2 - unit->crtc.y;
865 int move_x, move_y;
785 866
786 /* skip any crtcs that misses the clip region */ 867 /* skip any crtcs that misses the clip region */
787 if (clip_x1 >= unit->crtc.mode.hdisplay || 868 if (clip_x1 >= unit->crtc.mode.hdisplay ||
@@ -789,12 +870,21 @@ static int do_dmabuf_dirty_sou(struct drm_file *file_priv,
789 clip_x2 <= 0 || clip_y2 <= 0) 870 clip_x2 <= 0 || clip_y2 <= 0)
790 continue; 871 continue;
791 872
873 /* clip size to crtc size */
874 clip_x2 = min_t(int, clip_x2, unit->crtc.mode.hdisplay);
875 clip_y2 = min_t(int, clip_y2, unit->crtc.mode.vdisplay);
876
877 /* translate both src and dest to bring clip into screen */
878 move_x = min_t(int, clip_x1, 0);
879 move_y = min_t(int, clip_y1, 0);
880
881 /* actual translate done here */
792 blits[hit_num].header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN; 882 blits[hit_num].header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN;
793 blits[hit_num].body.destScreenId = unit->unit; 883 blits[hit_num].body.destScreenId = unit->unit;
794 blits[hit_num].body.srcOrigin.x = clips_ptr->x1; 884 blits[hit_num].body.srcOrigin.x = clips_ptr->x1 - move_x;
795 blits[hit_num].body.srcOrigin.y = clips_ptr->y1; 885 blits[hit_num].body.srcOrigin.y = clips_ptr->y1 - move_y;
796 blits[hit_num].body.destRect.left = clip_x1; 886 blits[hit_num].body.destRect.left = clip_x1 - move_x;
797 blits[hit_num].body.destRect.top = clip_y1; 887 blits[hit_num].body.destRect.top = clip_y1 - move_y;
798 blits[hit_num].body.destRect.right = clip_x2; 888 blits[hit_num].body.destRect.right = clip_x2;
799 blits[hit_num].body.destRect.bottom = clip_y2; 889 blits[hit_num].body.destRect.bottom = clip_y2;
800 hit_num++; 890 hit_num++;
@@ -1003,7 +1093,6 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
1003 struct vmw_surface *surface = NULL; 1093 struct vmw_surface *surface = NULL;
1004 struct vmw_dma_buffer *bo = NULL; 1094 struct vmw_dma_buffer *bo = NULL;
1005 struct ttm_base_object *user_obj; 1095 struct ttm_base_object *user_obj;
1006 u64 required_size;
1007 int ret; 1096 int ret;
1008 1097
1009 /** 1098 /**
@@ -1012,8 +1101,9 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
1012 * requested framebuffer. 1101 * requested framebuffer.
1013 */ 1102 */
1014 1103
1015 required_size = mode_cmd->pitch * mode_cmd->height; 1104 if (!vmw_kms_validate_mode_vram(dev_priv,
1016 if (unlikely(required_size > (u64) dev_priv->vram_size)) { 1105 mode_cmd->pitch,
1106 mode_cmd->height)) {
1017 DRM_ERROR("VRAM size is too small for requested mode.\n"); 1107 DRM_ERROR("VRAM size is too small for requested mode.\n");
1018 return ERR_PTR(-ENOMEM); 1108 return ERR_PTR(-ENOMEM);
1019 } 1109 }
@@ -1033,46 +1123,29 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
1033 return ERR_PTR(-ENOENT); 1123 return ERR_PTR(-ENOENT);
1034 } 1124 }
1035 1125
1036 /** 1126 /* returns either a dmabuf or surface */
1037 * End conditioned code. 1127 ret = vmw_user_lookup_handle(dev_priv, tfile,
1038 */ 1128 mode_cmd->handle,
1039 1129 &surface, &bo);
1040 ret = vmw_user_surface_lookup_handle(dev_priv, tfile,
1041 mode_cmd->handle, &surface);
1042 if (ret) 1130 if (ret)
1043 goto try_dmabuf; 1131 goto err_out;
1044 1132
1045 if (!surface->scanout) 1133 /* Create the new framebuffer depending one what we got back */
1046 goto err_not_scanout; 1134 if (bo)
1047 1135 ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, bo, &vfb,
1048 ret = vmw_kms_new_framebuffer_surface(dev_priv, file_priv, surface, 1136 mode_cmd);
1049 &vfb, mode_cmd); 1137 else if (surface)
1050 1138 ret = vmw_kms_new_framebuffer_surface(dev_priv, file_priv,
1051 /* vmw_user_surface_lookup takes one ref so does new_fb */ 1139 surface, &vfb, mode_cmd);
1052 vmw_surface_unreference(&surface); 1140 else
1053 1141 BUG();
1054 if (ret) {
1055 DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
1056 ttm_base_object_unref(&user_obj);
1057 return ERR_PTR(ret);
1058 } else
1059 vfb->user_obj = user_obj;
1060 return &vfb->base;
1061
1062try_dmabuf:
1063 DRM_INFO("%s: trying buffer\n", __func__);
1064
1065 ret = vmw_user_dmabuf_lookup(tfile, mode_cmd->handle, &bo);
1066 if (ret) {
1067 DRM_ERROR("failed to find buffer: %i\n", ret);
1068 return ERR_PTR(-ENOENT);
1069 }
1070
1071 ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, bo, &vfb,
1072 mode_cmd);
1073 1142
1074 /* vmw_user_dmabuf_lookup takes one ref so does new_fb */ 1143err_out:
1075 vmw_dmabuf_unreference(&bo); 1144 /* vmw_user_lookup_handle takes one ref so does new_fb */
1145 if (bo)
1146 vmw_dmabuf_unreference(&bo);
1147 if (surface)
1148 vmw_surface_unreference(&surface);
1076 1149
1077 if (ret) { 1150 if (ret) {
1078 DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret); 1151 DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
@@ -1082,14 +1155,6 @@ try_dmabuf:
1082 vfb->user_obj = user_obj; 1155 vfb->user_obj = user_obj;
1083 1156
1084 return &vfb->base; 1157 return &vfb->base;
1085
1086err_not_scanout:
1087 DRM_ERROR("surface not marked as scanout\n");
1088 /* vmw_user_surface_lookup takes one ref */
1089 vmw_surface_unreference(&surface);
1090 ttm_base_object_unref(&user_obj);
1091
1092 return ERR_PTR(-EINVAL);
1093} 1158}
1094 1159
1095static struct drm_mode_config_funcs vmw_kms_funcs = { 1160static struct drm_mode_config_funcs vmw_kms_funcs = {
@@ -1106,10 +1171,12 @@ int vmw_kms_present(struct vmw_private *dev_priv,
1106 uint32_t num_clips) 1171 uint32_t num_clips)
1107{ 1172{
1108 struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS]; 1173 struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
1174 struct drm_clip_rect *tmp;
1109 struct drm_crtc *crtc; 1175 struct drm_crtc *crtc;
1110 size_t fifo_size; 1176 size_t fifo_size;
1111 int i, k, num_units; 1177 int i, k, num_units;
1112 int ret = 0; /* silence warning */ 1178 int ret = 0; /* silence warning */
1179 int left, right, top, bottom;
1113 1180
1114 struct { 1181 struct {
1115 SVGA3dCmdHeader header; 1182 SVGA3dCmdHeader header;
@@ -1127,60 +1194,95 @@ int vmw_kms_present(struct vmw_private *dev_priv,
1127 BUG_ON(surface == NULL); 1194 BUG_ON(surface == NULL);
1128 BUG_ON(!clips || !num_clips); 1195 BUG_ON(!clips || !num_clips);
1129 1196
1197 tmp = kzalloc(sizeof(*tmp) * num_clips, GFP_KERNEL);
1198 if (unlikely(tmp == NULL)) {
1199 DRM_ERROR("Temporary cliprect memory alloc failed.\n");
1200 return -ENOMEM;
1201 }
1202
1130 fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips; 1203 fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
1131 cmd = kmalloc(fifo_size, GFP_KERNEL); 1204 cmd = kmalloc(fifo_size, GFP_KERNEL);
1132 if (unlikely(cmd == NULL)) { 1205 if (unlikely(cmd == NULL)) {
1133 DRM_ERROR("Failed to allocate temporary fifo memory.\n"); 1206 DRM_ERROR("Failed to allocate temporary fifo memory.\n");
1134 return -ENOMEM; 1207 ret = -ENOMEM;
1208 goto out_free_tmp;
1209 }
1210
1211 left = clips->x;
1212 right = clips->x + clips->w;
1213 top = clips->y;
1214 bottom = clips->y + clips->h;
1215
1216 for (i = 1; i < num_clips; i++) {
1217 left = min_t(int, left, (int)clips[i].x);
1218 right = max_t(int, right, (int)clips[i].x + clips[i].w);
1219 top = min_t(int, top, (int)clips[i].y);
1220 bottom = max_t(int, bottom, (int)clips[i].y + clips[i].h);
1135 } 1221 }
1136 1222
1137 /* only need to do this once */ 1223 /* only need to do this once */
1138 memset(cmd, 0, fifo_size); 1224 memset(cmd, 0, fifo_size);
1139 cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN); 1225 cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
1140 cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
1141
1142 cmd->body.srcRect.left = 0;
1143 cmd->body.srcRect.right = surface->sizes[0].width;
1144 cmd->body.srcRect.top = 0;
1145 cmd->body.srcRect.bottom = surface->sizes[0].height;
1146 1226
1147 blits = (SVGASignedRect *)&cmd[1]; 1227 blits = (SVGASignedRect *)&cmd[1];
1228
1229 cmd->body.srcRect.left = left;
1230 cmd->body.srcRect.right = right;
1231 cmd->body.srcRect.top = top;
1232 cmd->body.srcRect.bottom = bottom;
1233
1148 for (i = 0; i < num_clips; i++) { 1234 for (i = 0; i < num_clips; i++) {
1149 blits[i].left = clips[i].x; 1235 tmp[i].x1 = clips[i].x - left;
1150 blits[i].right = clips[i].x + clips[i].w; 1236 tmp[i].x2 = clips[i].x + clips[i].w - left;
1151 blits[i].top = clips[i].y; 1237 tmp[i].y1 = clips[i].y - top;
1152 blits[i].bottom = clips[i].y + clips[i].h; 1238 tmp[i].y2 = clips[i].y + clips[i].h - top;
1153 } 1239 }
1154 1240
1155 for (k = 0; k < num_units; k++) { 1241 for (k = 0; k < num_units; k++) {
1156 struct vmw_display_unit *unit = units[k]; 1242 struct vmw_display_unit *unit = units[k];
1157 int clip_x1 = destX - unit->crtc.x; 1243 struct vmw_clip_rect clip;
1158 int clip_y1 = destY - unit->crtc.y; 1244 int num;
1159 int clip_x2 = clip_x1 + surface->sizes[0].width; 1245
1160 int clip_y2 = clip_y1 + surface->sizes[0].height; 1246 clip.x1 = left + destX - unit->crtc.x;
1247 clip.y1 = top + destY - unit->crtc.y;
1248 clip.x2 = right + destX - unit->crtc.x;
1249 clip.y2 = bottom + destY - unit->crtc.y;
1161 1250
1162 /* skip any crtcs that misses the clip region */ 1251 /* skip any crtcs that misses the clip region */
1163 if (clip_x1 >= unit->crtc.mode.hdisplay || 1252 if (clip.x1 >= unit->crtc.mode.hdisplay ||
1164 clip_y1 >= unit->crtc.mode.vdisplay || 1253 clip.y1 >= unit->crtc.mode.vdisplay ||
1165 clip_x2 <= 0 || clip_y2 <= 0) 1254 clip.x2 <= 0 || clip.y2 <= 0)
1166 continue; 1255 continue;
1167 1256
1257 /*
1258 * In order for the clip rects to be correctly scaled
1259 * the src and dest rects needs to be the same size.
1260 */
1261 cmd->body.destRect.left = clip.x1;
1262 cmd->body.destRect.right = clip.x2;
1263 cmd->body.destRect.top = clip.y1;
1264 cmd->body.destRect.bottom = clip.y2;
1265
1266 /* create a clip rect of the crtc in dest coords */
1267 clip.x2 = unit->crtc.mode.hdisplay - clip.x1;
1268 clip.y2 = unit->crtc.mode.vdisplay - clip.y1;
1269 clip.x1 = 0 - clip.x1;
1270 clip.y1 = 0 - clip.y1;
1271
1168 /* need to reset sid as it is changed by execbuf */ 1272 /* need to reset sid as it is changed by execbuf */
1169 cmd->body.srcImage.sid = sid; 1273 cmd->body.srcImage.sid = sid;
1170
1171 cmd->body.destScreenId = unit->unit; 1274 cmd->body.destScreenId = unit->unit;
1172 1275
1173 /* 1276 /* clip and write blits to cmd stream */
1174 * The blit command is a lot more resilient then the 1277 vmw_clip_cliprects(tmp, num_clips, clip, blits, &num);
1175 * readback command when it comes to clip rects. So its
1176 * okay to go out of bounds.
1177 */
1178 1278
1179 cmd->body.destRect.left = clip_x1; 1279 /* if no cliprects hit skip this */
1180 cmd->body.destRect.right = clip_x2; 1280 if (num == 0)
1181 cmd->body.destRect.top = clip_y1; 1281 continue;
1182 cmd->body.destRect.bottom = clip_y2;
1183 1282
1283 /* recalculate package length */
1284 fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num;
1285 cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
1184 ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd, 1286 ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
1185 fifo_size, 0, NULL); 1287 fifo_size, 0, NULL);
1186 1288
@@ -1189,6 +1291,8 @@ int vmw_kms_present(struct vmw_private *dev_priv,
1189 } 1291 }
1190 1292
1191 kfree(cmd); 1293 kfree(cmd);
1294out_free_tmp:
1295 kfree(tmp);
1192 1296
1193 return ret; 1297 return ret;
1194} 1298}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index af8e6e5bd964..e1cb8556355f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -62,9 +62,14 @@ struct vmw_framebuffer {
62int vmw_cursor_update_image(struct vmw_private *dev_priv, 62int vmw_cursor_update_image(struct vmw_private *dev_priv,
63 u32 *image, u32 width, u32 height, 63 u32 *image, u32 width, u32 height,
64 u32 hotspotX, u32 hotspotY); 64 u32 hotspotX, u32 hotspotY);
65int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
66 struct vmw_dma_buffer *dmabuf,
67 u32 width, u32 height,
68 u32 hotspotX, u32 hotspotY);
65void vmw_cursor_update_position(struct vmw_private *dev_priv, 69void vmw_cursor_update_position(struct vmw_private *dev_priv,
66 bool show, int x, int y); 70 bool show, int x, int y);
67 71
72
68/** 73/**
69 * Base class display unit. 74 * Base class display unit.
70 * 75 *
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 90c5e3928491..8f8dbd43c33d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -74,9 +74,10 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv)
74{ 74{
75 struct vmw_legacy_display *lds = dev_priv->ldu_priv; 75 struct vmw_legacy_display *lds = dev_priv->ldu_priv;
76 struct vmw_legacy_display_unit *entry; 76 struct vmw_legacy_display_unit *entry;
77 struct vmw_display_unit *du = NULL;
77 struct drm_framebuffer *fb = NULL; 78 struct drm_framebuffer *fb = NULL;
78 struct drm_crtc *crtc = NULL; 79 struct drm_crtc *crtc = NULL;
79 int i = 0; 80 int i = 0, ret;
80 81
81 /* If there is no display topology the host just assumes 82 /* If there is no display topology the host just assumes
82 * that the guest will set the same layout as the host. 83 * that the guest will set the same layout as the host.
@@ -129,6 +130,25 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv)
129 130
130 lds->last_num_active = lds->num_active; 131 lds->last_num_active = lds->num_active;
131 132
133
134 /* Find the first du with a cursor. */
135 list_for_each_entry(entry, &lds->active, active) {
136 du = &entry->base;
137
138 if (!du->cursor_dmabuf)
139 continue;
140
141 ret = vmw_cursor_update_dmabuf(dev_priv,
142 du->cursor_dmabuf,
143 64, 64,
144 du->hotspot_x,
145 du->hotspot_y);
146 if (ret == 0)
147 break;
148
149 DRM_ERROR("Could not update cursor image\n");
150 }
151
132 return 0; 152 return 0;
133} 153}
134 154
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 86c5e4cceb31..1c7f09e26819 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1190,6 +1190,29 @@ void vmw_resource_unreserve(struct list_head *list)
1190 write_unlock(lock); 1190 write_unlock(lock);
1191} 1191}
1192 1192
1193/**
1194 * Helper function that looks either a surface or dmabuf.
1195 *
1196 * The pointer this pointed at by out_surf and out_buf needs to be null.
1197 */
1198int vmw_user_lookup_handle(struct vmw_private *dev_priv,
1199 struct ttm_object_file *tfile,
1200 uint32_t handle,
1201 struct vmw_surface **out_surf,
1202 struct vmw_dma_buffer **out_buf)
1203{
1204 int ret;
1205
1206 BUG_ON(*out_surf || *out_buf);
1207
1208 ret = vmw_user_surface_lookup_handle(dev_priv, tfile, handle, out_surf);
1209 if (!ret)
1210 return 0;
1211
1212 ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf);
1213 return ret;
1214}
1215
1193 1216
1194int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv, 1217int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
1195 struct ttm_object_file *tfile, 1218 struct ttm_object_file *tfile,
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 8cebef49aeac..18936ac9d51c 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -893,6 +893,13 @@ static int __devinit pch_i2c_probe(struct pci_dev *pdev,
893 /* Set the number of I2C channel instance */ 893 /* Set the number of I2C channel instance */
894 adap_info->ch_num = id->driver_data; 894 adap_info->ch_num = id->driver_data;
895 895
896 ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
897 KBUILD_MODNAME, adap_info);
898 if (ret) {
899 pch_pci_err(pdev, "request_irq FAILED\n");
900 goto err_request_irq;
901 }
902
896 for (i = 0; i < adap_info->ch_num; i++) { 903 for (i = 0; i < adap_info->ch_num; i++) {
897 pch_adap = &adap_info->pch_data[i].pch_adapter; 904 pch_adap = &adap_info->pch_data[i].pch_adapter;
898 adap_info->pch_i2c_suspended = false; 905 adap_info->pch_i2c_suspended = false;
@@ -910,28 +917,23 @@ static int __devinit pch_i2c_probe(struct pci_dev *pdev,
910 917
911 pch_adap->dev.parent = &pdev->dev; 918 pch_adap->dev.parent = &pdev->dev;
912 919
920 pch_i2c_init(&adap_info->pch_data[i]);
913 ret = i2c_add_adapter(pch_adap); 921 ret = i2c_add_adapter(pch_adap);
914 if (ret) { 922 if (ret) {
915 pch_pci_err(pdev, "i2c_add_adapter[ch:%d] FAILED\n", i); 923 pch_pci_err(pdev, "i2c_add_adapter[ch:%d] FAILED\n", i);
916 goto err_i2c_add_adapter; 924 goto err_add_adapter;
917 } 925 }
918
919 pch_i2c_init(&adap_info->pch_data[i]);
920 }
921 ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
922 KBUILD_MODNAME, adap_info);
923 if (ret) {
924 pch_pci_err(pdev, "request_irq FAILED\n");
925 goto err_i2c_add_adapter;
926 } 926 }
927 927
928 pci_set_drvdata(pdev, adap_info); 928 pci_set_drvdata(pdev, adap_info);
929 pch_pci_dbg(pdev, "returns %d.\n", ret); 929 pch_pci_dbg(pdev, "returns %d.\n", ret);
930 return 0; 930 return 0;
931 931
932err_i2c_add_adapter: 932err_add_adapter:
933 for (j = 0; j < i; j++) 933 for (j = 0; j < i; j++)
934 i2c_del_adapter(&adap_info->pch_data[j].pch_adapter); 934 i2c_del_adapter(&adap_info->pch_data[j].pch_adapter);
935 free_irq(pdev->irq, adap_info);
936err_request_irq:
935 pci_iounmap(pdev, base_addr); 937 pci_iounmap(pdev, base_addr);
936err_pci_iomap: 938err_pci_iomap:
937 pci_release_regions(pdev); 939 pci_release_regions(pdev);
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index a43d0023446a..fa23faa20f0e 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1047,13 +1047,14 @@ omap_i2c_probe(struct platform_device *pdev)
1047 * size. This is to ensure that we can handle the status on int 1047 * size. This is to ensure that we can handle the status on int
1048 * call back latencies. 1048 * call back latencies.
1049 */ 1049 */
1050 if (dev->rev >= OMAP_I2C_REV_ON_3530_4430) { 1050
1051 dev->fifo_size = 0; 1051 dev->fifo_size = (dev->fifo_size / 2);
1052
1053 if (dev->rev >= OMAP_I2C_REV_ON_3530_4430)
1052 dev->b_hw = 0; /* Disable hardware fixes */ 1054 dev->b_hw = 0; /* Disable hardware fixes */
1053 } else { 1055 else
1054 dev->fifo_size = (dev->fifo_size / 2);
1055 dev->b_hw = 1; /* Enable hardware fixes */ 1056 dev->b_hw = 1; /* Enable hardware fixes */
1056 } 1057
1057 /* calculate wakeup latency constraint for MPU */ 1058 /* calculate wakeup latency constraint for MPU */
1058 if (dev->set_mpu_wkup_lat != NULL) 1059 if (dev->set_mpu_wkup_lat != NULL)
1059 dev->latency = (1000000 * dev->fifo_size) / 1060 dev->latency = (1000000 * dev->fifo_size) /
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 2754cef86a06..4c1718081685 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -534,6 +534,7 @@ static int s3c24xx_i2c_doxfer(struct s3c24xx_i2c *i2c,
534 534
535 /* first, try busy waiting briefly */ 535 /* first, try busy waiting briefly */
536 do { 536 do {
537 cpu_relax();
537 iicstat = readl(i2c->regs + S3C2410_IICSTAT); 538 iicstat = readl(i2c->regs + S3C2410_IICSTAT);
538 } while ((iicstat & S3C2410_IICSTAT_START) && --spins); 539 } while ((iicstat & S3C2410_IICSTAT_START) && --spins);
539 540
@@ -786,7 +787,7 @@ static void s3c24xx_i2c_dt_gpio_free(struct s3c24xx_i2c *i2c)
786#else 787#else
787static int s3c24xx_i2c_parse_dt_gpio(struct s3c24xx_i2c *i2c) 788static int s3c24xx_i2c_parse_dt_gpio(struct s3c24xx_i2c *i2c)
788{ 789{
789 return -EINVAL; 790 return 0;
790} 791}
791 792
792static void s3c24xx_i2c_dt_gpio_free(struct s3c24xx_i2c *i2c) 793static void s3c24xx_i2c_dt_gpio_free(struct s3c24xx_i2c *i2c)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 75ff821c0af0..d0d4aa9f4802 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2513,6 +2513,9 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2513 2513
2514 req.private_data_len = sizeof(struct cma_hdr) + 2514 req.private_data_len = sizeof(struct cma_hdr) +
2515 conn_param->private_data_len; 2515 conn_param->private_data_len;
2516 if (req.private_data_len < conn_param->private_data_len)
2517 return -EINVAL;
2518
2516 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2519 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2517 if (!req.private_data) 2520 if (!req.private_data)
2518 return -ENOMEM; 2521 return -ENOMEM;
@@ -2562,6 +2565,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2562 memset(&req, 0, sizeof req); 2565 memset(&req, 0, sizeof req);
2563 offset = cma_user_data_offset(id_priv->id.ps); 2566 offset = cma_user_data_offset(id_priv->id.ps);
2564 req.private_data_len = offset + conn_param->private_data_len; 2567 req.private_data_len = offset + conn_param->private_data_len;
2568 if (req.private_data_len < conn_param->private_data_len)
2569 return -EINVAL;
2570
2565 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2571 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2566 if (!private_data) 2572 if (!private_data)
2567 return -ENOMEM; 2573 return -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 77f3dbc0aaa1..18836cdf1e10 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1244,7 +1244,8 @@ err_reg:
1244 1244
1245err_counter: 1245err_counter:
1246 for (; i; --i) 1246 for (; i; --i)
1247 mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); 1247 if (ibdev->counters[i - 1] != -1)
1248 mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
1248 1249
1249err_map: 1250err_map:
1250 iounmap(ibdev->uar_map); 1251 iounmap(ibdev->uar_map);
@@ -1275,7 +1276,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1275 } 1276 }
1276 iounmap(ibdev->uar_map); 1277 iounmap(ibdev->uar_map);
1277 for (p = 0; p < ibdev->num_ports; ++p) 1278 for (p = 0; p < ibdev->num_ports; ++p)
1278 mlx4_counter_free(ibdev->dev, ibdev->counters[p]); 1279 if (ibdev->counters[p] != -1)
1280 mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
1279 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) 1281 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
1280 mlx4_CLOSE_PORT(dev, p); 1282 mlx4_CLOSE_PORT(dev, p);
1281 1283
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 574600ef5b42..a7403248d83d 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1285,7 +1285,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1285 strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); 1285 strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
1286 ctxt_fp(fp) = rcd; 1286 ctxt_fp(fp) = rcd;
1287 qib_stats.sps_ctxts++; 1287 qib_stats.sps_ctxts++;
1288 dd->freectxts++; 1288 dd->freectxts--;
1289 ret = 0; 1289 ret = 0;
1290 goto bail; 1290 goto bail;
1291 1291
@@ -1794,7 +1794,7 @@ static int qib_close(struct inode *in, struct file *fp)
1794 if (dd->pageshadow) 1794 if (dd->pageshadow)
1795 unlock_expected_tids(rcd); 1795 unlock_expected_tids(rcd);
1796 qib_stats.sps_ctxts--; 1796 qib_stats.sps_ctxts--;
1797 dd->freectxts--; 1797 dd->freectxts++;
1798 } 1798 }
1799 1799
1800 mutex_unlock(&qib_mutex); 1800 mutex_unlock(&qib_mutex);
diff --git a/drivers/input/misc/cma3000_d0x.c b/drivers/input/misc/cma3000_d0x.c
index 80793f1608eb..06517e60e50c 100644
--- a/drivers/input/misc/cma3000_d0x.c
+++ b/drivers/input/misc/cma3000_d0x.c
@@ -115,8 +115,8 @@ static void decode_mg(struct cma3000_accl_data *data, int *datax,
115static irqreturn_t cma3000_thread_irq(int irq, void *dev_id) 115static irqreturn_t cma3000_thread_irq(int irq, void *dev_id)
116{ 116{
117 struct cma3000_accl_data *data = dev_id; 117 struct cma3000_accl_data *data = dev_id;
118 int datax, datay, dataz; 118 int datax, datay, dataz, intr_status;
119 u8 ctrl, mode, range, intr_status; 119 u8 ctrl, mode, range;
120 120
121 intr_status = CMA3000_READ(data, CMA3000_INTSTATUS, "interrupt status"); 121 intr_status = CMA3000_READ(data, CMA3000_INTSTATUS, "interrupt status");
122 if (intr_status < 0) 122 if (intr_status < 0)
diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
index c5b12d2e955a..86d6f39178b0 100644
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -2,7 +2,7 @@
2 * Finger Sensing Pad PS/2 mouse driver. 2 * Finger Sensing Pad PS/2 mouse driver.
3 * 3 *
4 * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd. 4 * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd.
5 * Copyright (C) 2005-2010 Tai-hwa Liang, Sentelic Corporation. 5 * Copyright (C) 2005-2011 Tai-hwa Liang, Sentelic Corporation.
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -162,7 +162,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
162 ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2); 162 ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2);
163 163
164 if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) 164 if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
165 return -1; 165 goto out;
166 166
167 if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) { 167 if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) {
168 /* inversion is required */ 168 /* inversion is required */
@@ -261,7 +261,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
261 ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2); 261 ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2);
262 262
263 if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) 263 if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
264 return -1; 264 goto out;
265 265
266 if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) { 266 if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) {
267 ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2); 267 ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2);
@@ -309,7 +309,7 @@ static int fsp_get_buttons(struct psmouse *psmouse, int *btn)
309 }; 309 };
310 int val; 310 int val;
311 311
312 if (fsp_reg_read(psmouse, FSP_REG_TMOD_STATUS1, &val) == -1) 312 if (fsp_reg_read(psmouse, FSP_REG_TMOD_STATUS, &val) == -1)
313 return -EIO; 313 return -EIO;
314 314
315 *btn = buttons[(val & 0x30) >> 4]; 315 *btn = buttons[(val & 0x30) >> 4];
diff --git a/drivers/input/mouse/sentelic.h b/drivers/input/mouse/sentelic.h
index ed1395ac7b8b..2e4af24f8c15 100644
--- a/drivers/input/mouse/sentelic.h
+++ b/drivers/input/mouse/sentelic.h
@@ -2,7 +2,7 @@
2 * Finger Sensing Pad PS/2 mouse driver. 2 * Finger Sensing Pad PS/2 mouse driver.
3 * 3 *
4 * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd. 4 * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd.
5 * Copyright (C) 2005-2009 Tai-hwa Liang, Sentelic Corporation. 5 * Copyright (C) 2005-2011 Tai-hwa Liang, Sentelic Corporation.
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
@@ -33,6 +33,7 @@
33/* Finger-sensing Pad control registers */ 33/* Finger-sensing Pad control registers */
34#define FSP_REG_SYSCTL1 0x10 34#define FSP_REG_SYSCTL1 0x10
35#define FSP_BIT_EN_REG_CLK BIT(5) 35#define FSP_BIT_EN_REG_CLK BIT(5)
36#define FSP_REG_TMOD_STATUS 0x20
36#define FSP_REG_OPC_QDOWN 0x31 37#define FSP_REG_OPC_QDOWN 0x31
37#define FSP_BIT_EN_OPC_TAG BIT(7) 38#define FSP_BIT_EN_OPC_TAG BIT(7)
38#define FSP_REG_OPTZ_XLO 0x34 39#define FSP_REG_OPTZ_XLO 0x34
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index c080b828e5dc..a6dcd18e9adf 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -24,6 +24,7 @@
24 */ 24 */
25 25
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/delay.h>
27#include <linux/dmi.h> 28#include <linux/dmi.h>
28#include <linux/input/mt.h> 29#include <linux/input/mt.h>
29#include <linux/serio.h> 30#include <linux/serio.h>
@@ -1220,6 +1221,16 @@ static int synaptics_reconnect(struct psmouse *psmouse)
1220 1221
1221 do { 1222 do {
1222 psmouse_reset(psmouse); 1223 psmouse_reset(psmouse);
1224 if (retry) {
1225 /*
1226 * On some boxes, right after resuming, the touchpad
1227 * needs some time to finish initializing (I assume
1228 * it needs time to calibrate) and start responding
1229 * to Synaptics-specific queries, so let's wait a
1230 * bit.
1231 */
1232 ssleep(1);
1233 }
1223 error = synaptics_detect(psmouse, 0); 1234 error = synaptics_detect(psmouse, 0);
1224 } while (error && ++retry < 3); 1235 } while (error && ++retry < 3);
1225 1236
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index da0d8761e778..2ee47d01a3b4 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -1470,6 +1470,9 @@ static const struct wacom_features wacom_features_0xE3 =
1470static const struct wacom_features wacom_features_0xE6 = 1470static const struct wacom_features wacom_features_0xE6 =
1471 { "Wacom ISDv4 E6", WACOM_PKGLEN_TPC2FG, 27760, 15694, 255, 1471 { "Wacom ISDv4 E6", WACOM_PKGLEN_TPC2FG, 27760, 15694, 255,
1472 0, TABLETPC2FG, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; 1472 0, TABLETPC2FG, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
1473static const struct wacom_features wacom_features_0xEC =
1474 { "Wacom ISDv4 EC", WACOM_PKGLEN_GRAPHIRE, 25710, 14500, 255,
1475 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
1473static const struct wacom_features wacom_features_0x47 = 1476static const struct wacom_features wacom_features_0x47 =
1474 { "Wacom Intuos2 6x8", WACOM_PKGLEN_INTUOS, 20320, 16240, 1023, 1477 { "Wacom Intuos2 6x8", WACOM_PKGLEN_INTUOS, 20320, 16240, 1023,
1475 31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; 1478 31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1611,6 +1614,7 @@ const struct usb_device_id wacom_ids[] = {
1611 { USB_DEVICE_WACOM(0xE2) }, 1614 { USB_DEVICE_WACOM(0xE2) },
1612 { USB_DEVICE_WACOM(0xE3) }, 1615 { USB_DEVICE_WACOM(0xE3) },
1613 { USB_DEVICE_WACOM(0xE6) }, 1616 { USB_DEVICE_WACOM(0xE6) },
1617 { USB_DEVICE_WACOM(0xEC) },
1614 { USB_DEVICE_WACOM(0x47) }, 1618 { USB_DEVICE_WACOM(0x47) },
1615 { USB_DEVICE_LENOVO(0x6004) }, 1619 { USB_DEVICE_LENOVO(0x6004) },
1616 { } 1620 { }
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a004c3945c67..31053a951c34 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -41,6 +41,7 @@
41#include <linux/tboot.h> 41#include <linux/tboot.h>
42#include <linux/dmi.h> 42#include <linux/dmi.h>
43#include <linux/pci-ats.h> 43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
44#include <asm/cacheflush.h> 45#include <asm/cacheflush.h>
45#include <asm/iommu.h> 46#include <asm/iommu.h>
46 47
@@ -405,6 +406,9 @@ int dmar_disabled = 0;
405int dmar_disabled = 1; 406int dmar_disabled = 1;
406#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ 407#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
407 408
409int intel_iommu_enabled = 0;
410EXPORT_SYMBOL_GPL(intel_iommu_enabled);
411
408static int dmar_map_gfx = 1; 412static int dmar_map_gfx = 1;
409static int dmar_forcedac; 413static int dmar_forcedac;
410static int intel_iommu_strict; 414static int intel_iommu_strict;
@@ -2185,18 +2189,6 @@ static inline void iommu_prepare_isa(void)
2185 2189
2186static int md_domain_init(struct dmar_domain *domain, int guest_width); 2190static int md_domain_init(struct dmar_domain *domain, int guest_width);
2187 2191
2188static int __init si_domain_work_fn(unsigned long start_pfn,
2189 unsigned long end_pfn, void *datax)
2190{
2191 int *ret = datax;
2192
2193 *ret = iommu_domain_identity_map(si_domain,
2194 (uint64_t)start_pfn << PAGE_SHIFT,
2195 (uint64_t)end_pfn << PAGE_SHIFT);
2196 return *ret;
2197
2198}
2199
2200static int __init si_domain_init(int hw) 2192static int __init si_domain_init(int hw)
2201{ 2193{
2202 struct dmar_drhd_unit *drhd; 2194 struct dmar_drhd_unit *drhd;
@@ -2228,9 +2220,15 @@ static int __init si_domain_init(int hw)
2228 return 0; 2220 return 0;
2229 2221
2230 for_each_online_node(nid) { 2222 for_each_online_node(nid) {
2231 work_with_active_regions(nid, si_domain_work_fn, &ret); 2223 unsigned long start_pfn, end_pfn;
2232 if (ret) 2224 int i;
2233 return ret; 2225
2226 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2227 ret = iommu_domain_identity_map(si_domain,
2228 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2229 if (ret)
2230 return ret;
2231 }
2234 } 2232 }
2235 2233
2236 return 0; 2234 return 0;
@@ -3647,6 +3645,8 @@ int __init intel_iommu_init(void)
3647 3645
3648 bus_register_notifier(&pci_bus_type, &device_nb); 3646 bus_register_notifier(&pci_bus_type, &device_nb);
3649 3647
3648 intel_iommu_enabled = 1;
3649
3650 return 0; 3650 return 0;
3651} 3651}
3652 3652
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2fb2963df553..5b5fa5cdaa31 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -90,7 +90,7 @@ struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
90 if (bus == NULL || bus->iommu_ops == NULL) 90 if (bus == NULL || bus->iommu_ops == NULL)
91 return NULL; 91 return NULL;
92 92
93 domain = kmalloc(sizeof(*domain), GFP_KERNEL); 93 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
94 if (!domain) 94 if (!domain)
95 return NULL; 95 return NULL;
96 96
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index b6907118283a..6d03774b176e 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1393,9 +1393,6 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1393 atomic_read(&bitmap->behind_writes), 1393 atomic_read(&bitmap->behind_writes),
1394 bitmap->mddev->bitmap_info.max_write_behind); 1394 bitmap->mddev->bitmap_info.max_write_behind);
1395 } 1395 }
1396 if (bitmap->mddev->degraded)
1397 /* Never clear bits or update events_cleared when degraded */
1398 success = 0;
1399 1396
1400 while (sectors) { 1397 while (sectors) {
1401 sector_t blocks; 1398 sector_t blocks;
@@ -1409,7 +1406,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1409 return; 1406 return;
1410 } 1407 }
1411 1408
1412 if (success && 1409 if (success && !bitmap->mddev->degraded &&
1413 bitmap->events_cleared < bitmap->mddev->events) { 1410 bitmap->events_cleared < bitmap->mddev->events) {
1414 bitmap->events_cleared = bitmap->mddev->events; 1411 bitmap->events_cleared = bitmap->mddev->events;
1415 bitmap->need_sync = 1; 1412 bitmap->need_sync = 1;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index c3273efd08cb..627456542fb3 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -230,6 +230,7 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
230 return -EINVAL; 230 return -EINVAL;
231 231
232 rdev->raid_disk = rdev->saved_raid_disk; 232 rdev->raid_disk = rdev->saved_raid_disk;
233 rdev->saved_raid_disk = -1;
233 234
234 newconf = linear_conf(mddev,mddev->raid_disks+1); 235 newconf = linear_conf(mddev,mddev->raid_disks+1);
235 236
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ee981737edfc..f47f1f8ac44b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7360,8 +7360,7 @@ static int remove_and_add_spares(struct mddev *mddev)
7360 spares++; 7360 spares++;
7361 md_new_event(mddev); 7361 md_new_event(mddev);
7362 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7362 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7363 } else 7363 }
7364 break;
7365 } 7364 }
7366 } 7365 }
7367 } 7366 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 31670f8d6b65..858fdbb7eb07 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3065,11 +3065,17 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3065 } 3065 }
3066 } else if (test_bit(In_sync, &rdev->flags)) 3066 } else if (test_bit(In_sync, &rdev->flags))
3067 set_bit(R5_Insync, &dev->flags); 3067 set_bit(R5_Insync, &dev->flags);
3068 else { 3068 else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
3069 /* in sync if before recovery_offset */ 3069 /* in sync if before recovery_offset */
3070 if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) 3070 set_bit(R5_Insync, &dev->flags);
3071 set_bit(R5_Insync, &dev->flags); 3071 else if (test_bit(R5_UPTODATE, &dev->flags) &&
3072 } 3072 test_bit(R5_Expanded, &dev->flags))
3073 /* If we've reshaped into here, we assume it is Insync.
3074 * We will shortly update recovery_offset to make
3075 * it official.
3076 */
3077 set_bit(R5_Insync, &dev->flags);
3078
3073 if (rdev && test_bit(R5_WriteError, &dev->flags)) { 3079 if (rdev && test_bit(R5_WriteError, &dev->flags)) {
3074 clear_bit(R5_Insync, &dev->flags); 3080 clear_bit(R5_Insync, &dev->flags);
3075 if (!test_bit(Faulty, &rdev->flags)) { 3081 if (!test_bit(Faulty, &rdev->flags)) {
diff --git a/drivers/media/common/tuners/mxl5007t.c b/drivers/media/common/tuners/mxl5007t.c
index 7eb1bf75cd07..5d02221e99dd 100644
--- a/drivers/media/common/tuners/mxl5007t.c
+++ b/drivers/media/common/tuners/mxl5007t.c
@@ -488,9 +488,10 @@ static int mxl5007t_write_regs(struct mxl5007t_state *state,
488 488
489static int mxl5007t_read_reg(struct mxl5007t_state *state, u8 reg, u8 *val) 489static int mxl5007t_read_reg(struct mxl5007t_state *state, u8 reg, u8 *val)
490{ 490{
491 u8 buf[2] = { 0xfb, reg };
491 struct i2c_msg msg[] = { 492 struct i2c_msg msg[] = {
492 { .addr = state->i2c_props.addr, .flags = 0, 493 { .addr = state->i2c_props.addr, .flags = 0,
493 .buf = &reg, .len = 1 }, 494 .buf = buf, .len = 2 },
494 { .addr = state->i2c_props.addr, .flags = I2C_M_RD, 495 { .addr = state->i2c_props.addr, .flags = I2C_M_RD,
495 .buf = val, .len = 1 }, 496 .buf = val, .len = 1 },
496 }; 497 };
diff --git a/drivers/media/common/tuners/tda18218.c b/drivers/media/common/tuners/tda18218.c
index aacfe2387e28..4fc29730a12c 100644
--- a/drivers/media/common/tuners/tda18218.c
+++ b/drivers/media/common/tuners/tda18218.c
@@ -141,7 +141,7 @@ static int tda18218_set_params(struct dvb_frontend *fe,
141 switch (params->u.ofdm.bandwidth) { 141 switch (params->u.ofdm.bandwidth) {
142 case BANDWIDTH_6_MHZ: 142 case BANDWIDTH_6_MHZ:
143 LP_Fc = 0; 143 LP_Fc = 0;
144 LO_Frac = params->frequency + 4000000; 144 LO_Frac = params->frequency + 3000000;
145 break; 145 break;
146 case BANDWIDTH_7_MHZ: 146 case BANDWIDTH_7_MHZ:
147 LP_Fc = 1; 147 LP_Fc = 1;
diff --git a/drivers/media/rc/ati_remote.c b/drivers/media/rc/ati_remote.c
index 303f22ea04c0..01bb8daf4b09 100644
--- a/drivers/media/rc/ati_remote.c
+++ b/drivers/media/rc/ati_remote.c
@@ -189,7 +189,7 @@ struct ati_remote {
189 dma_addr_t inbuf_dma; 189 dma_addr_t inbuf_dma;
190 dma_addr_t outbuf_dma; 190 dma_addr_t outbuf_dma;
191 191
192 unsigned char old_data[2]; /* Detect duplicate events */ 192 unsigned char old_data; /* Detect duplicate events */
193 unsigned long old_jiffies; 193 unsigned long old_jiffies;
194 unsigned long acc_jiffies; /* handle acceleration */ 194 unsigned long acc_jiffies; /* handle acceleration */
195 unsigned long first_jiffies; 195 unsigned long first_jiffies;
@@ -221,35 +221,35 @@ struct ati_remote {
221/* Translation table from hardware messages to input events. */ 221/* Translation table from hardware messages to input events. */
222static const struct { 222static const struct {
223 short kind; 223 short kind;
224 unsigned char data1, data2; 224 unsigned char data;
225 int type; 225 int type;
226 unsigned int code; 226 unsigned int code;
227 int value; 227 int value;
228} ati_remote_tbl[] = { 228} ati_remote_tbl[] = {
229 /* Directional control pad axes */ 229 /* Directional control pad axes */
230 {KIND_ACCEL, 0x35, 0x70, EV_REL, REL_X, -1}, /* left */ 230 {KIND_ACCEL, 0x70, EV_REL, REL_X, -1}, /* left */
231 {KIND_ACCEL, 0x36, 0x71, EV_REL, REL_X, 1}, /* right */ 231 {KIND_ACCEL, 0x71, EV_REL, REL_X, 1}, /* right */
232 {KIND_ACCEL, 0x37, 0x72, EV_REL, REL_Y, -1}, /* up */ 232 {KIND_ACCEL, 0x72, EV_REL, REL_Y, -1}, /* up */
233 {KIND_ACCEL, 0x38, 0x73, EV_REL, REL_Y, 1}, /* down */ 233 {KIND_ACCEL, 0x73, EV_REL, REL_Y, 1}, /* down */
234 /* Directional control pad diagonals */ 234 /* Directional control pad diagonals */
235 {KIND_LU, 0x39, 0x74, EV_REL, 0, 0}, /* left up */ 235 {KIND_LU, 0x74, EV_REL, 0, 0}, /* left up */
236 {KIND_RU, 0x3a, 0x75, EV_REL, 0, 0}, /* right up */ 236 {KIND_RU, 0x75, EV_REL, 0, 0}, /* right up */
237 {KIND_LD, 0x3c, 0x77, EV_REL, 0, 0}, /* left down */ 237 {KIND_LD, 0x77, EV_REL, 0, 0}, /* left down */
238 {KIND_RD, 0x3b, 0x76, EV_REL, 0, 0}, /* right down */ 238 {KIND_RD, 0x76, EV_REL, 0, 0}, /* right down */
239 239
240 /* "Mouse button" buttons */ 240 /* "Mouse button" buttons */
241 {KIND_LITERAL, 0x3d, 0x78, EV_KEY, BTN_LEFT, 1}, /* left btn down */ 241 {KIND_LITERAL, 0x78, EV_KEY, BTN_LEFT, 1}, /* left btn down */
242 {KIND_LITERAL, 0x3e, 0x79, EV_KEY, BTN_LEFT, 0}, /* left btn up */ 242 {KIND_LITERAL, 0x79, EV_KEY, BTN_LEFT, 0}, /* left btn up */
243 {KIND_LITERAL, 0x41, 0x7c, EV_KEY, BTN_RIGHT, 1},/* right btn down */ 243 {KIND_LITERAL, 0x7c, EV_KEY, BTN_RIGHT, 1},/* right btn down */
244 {KIND_LITERAL, 0x42, 0x7d, EV_KEY, BTN_RIGHT, 0},/* right btn up */ 244 {KIND_LITERAL, 0x7d, EV_KEY, BTN_RIGHT, 0},/* right btn up */
245 245
246 /* Artificial "doubleclick" events are generated by the hardware. 246 /* Artificial "doubleclick" events are generated by the hardware.
247 * They are mapped to the "side" and "extra" mouse buttons here. */ 247 * They are mapped to the "side" and "extra" mouse buttons here. */
248 {KIND_FILTERED, 0x3f, 0x7a, EV_KEY, BTN_SIDE, 1}, /* left dblclick */ 248 {KIND_FILTERED, 0x7a, EV_KEY, BTN_SIDE, 1}, /* left dblclick */
249 {KIND_FILTERED, 0x43, 0x7e, EV_KEY, BTN_EXTRA, 1},/* right dblclick */ 249 {KIND_FILTERED, 0x7e, EV_KEY, BTN_EXTRA, 1},/* right dblclick */
250 250
251 /* Non-mouse events are handled by rc-core */ 251 /* Non-mouse events are handled by rc-core */
252 {KIND_END, 0x00, 0x00, EV_MAX + 1, 0, 0} 252 {KIND_END, 0x00, EV_MAX + 1, 0, 0}
253}; 253};
254 254
255/* Local function prototypes */ 255/* Local function prototypes */
@@ -397,25 +397,6 @@ static int ati_remote_sendpacket(struct ati_remote *ati_remote, u16 cmd, unsigne
397} 397}
398 398
399/* 399/*
400 * ati_remote_event_lookup
401 */
402static int ati_remote_event_lookup(int rem, unsigned char d1, unsigned char d2)
403{
404 int i;
405
406 for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++) {
407 /*
408 * Decide if the table entry matches the remote input.
409 */
410 if (ati_remote_tbl[i].data1 == d1 &&
411 ati_remote_tbl[i].data2 == d2)
412 return i;
413
414 }
415 return -1;
416}
417
418/*
419 * ati_remote_compute_accel 400 * ati_remote_compute_accel
420 * 401 *
421 * Implements acceleration curve for directional control pad 402 * Implements acceleration curve for directional control pad
@@ -463,7 +444,15 @@ static void ati_remote_input_report(struct urb *urb)
463 int index = -1; 444 int index = -1;
464 int acc; 445 int acc;
465 int remote_num; 446 int remote_num;
466 unsigned char scancode[2]; 447 unsigned char scancode;
448 int i;
449
450 /*
451 * data[0] = 0x14
452 * data[1] = data[2] + data[3] + 0xd5 (a checksum byte)
453 * data[2] = the key code (with toggle bit in MSB with some models)
454 * data[3] = channel << 4 (the low 4 bits must be zero)
455 */
467 456
468 /* Deal with strange looking inputs */ 457 /* Deal with strange looking inputs */
469 if ( (urb->actual_length != 4) || (data[0] != 0x14) || 458 if ( (urb->actual_length != 4) || (data[0] != 0x14) ||
@@ -472,6 +461,13 @@ static void ati_remote_input_report(struct urb *urb)
472 return; 461 return;
473 } 462 }
474 463
464 if (data[1] != ((data[2] + data[3] + 0xd5) & 0xff)) {
465 dbginfo(&ati_remote->interface->dev,
466 "wrong checksum in input: %02x %02x %02x %02x\n",
467 data[0], data[1], data[2], data[3]);
468 return;
469 }
470
475 /* Mask unwanted remote channels. */ 471 /* Mask unwanted remote channels. */
476 /* note: remote_num is 0-based, channel 1 on remote == 0 here */ 472 /* note: remote_num is 0-based, channel 1 on remote == 0 here */
477 remote_num = (data[3] >> 4) & 0x0f; 473 remote_num = (data[3] >> 4) & 0x0f;
@@ -482,31 +478,30 @@ static void ati_remote_input_report(struct urb *urb)
482 return; 478 return;
483 } 479 }
484 480
485 scancode[0] = (((data[1] - ((remote_num + 1) << 4)) & 0xf0) | (data[1] & 0x0f));
486
487 /* 481 /*
488 * Some devices (e.g. SnapStream Firefly) use 8080 as toggle code, 482 * MSB is a toggle code, though only used by some devices
489 * so we have to clear them. The first bit is a bit tricky as the 483 * (e.g. SnapStream Firefly)
490 * "non-toggled" state depends on remote_num, so we xor it with the
491 * second bit which is only used for toggle.
492 */ 484 */
493 scancode[0] ^= (data[2] & 0x80); 485 scancode = data[2] & 0x7f;
494
495 scancode[1] = data[2] & ~0x80;
496 486
497 /* Look up event code index in mouse translation table. */ 487 /* Look up event code index in the mouse translation table. */
498 index = ati_remote_event_lookup(remote_num, scancode[0], scancode[1]); 488 for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++) {
489 if (scancode == ati_remote_tbl[i].data) {
490 index = i;
491 break;
492 }
493 }
499 494
500 if (index >= 0) { 495 if (index >= 0) {
501 dbginfo(&ati_remote->interface->dev, 496 dbginfo(&ati_remote->interface->dev,
502 "channel 0x%02x; mouse data %02x,%02x; index %d; keycode %d\n", 497 "channel 0x%02x; mouse data %02x; index %d; keycode %d\n",
503 remote_num, data[1], data[2], index, ati_remote_tbl[index].code); 498 remote_num, data[2], index, ati_remote_tbl[index].code);
504 if (!dev) 499 if (!dev)
505 return; /* no mouse device */ 500 return; /* no mouse device */
506 } else 501 } else
507 dbginfo(&ati_remote->interface->dev, 502 dbginfo(&ati_remote->interface->dev,
508 "channel 0x%02x; key data %02x,%02x, scancode %02x,%02x\n", 503 "channel 0x%02x; key data %02x, scancode %02x\n",
509 remote_num, data[1], data[2], scancode[0], scancode[1]); 504 remote_num, data[2], scancode);
510 505
511 506
512 if (index >= 0 && ati_remote_tbl[index].kind == KIND_LITERAL) { 507 if (index >= 0 && ati_remote_tbl[index].kind == KIND_LITERAL) {
@@ -523,8 +518,7 @@ static void ati_remote_input_report(struct urb *urb)
523 unsigned long now = jiffies; 518 unsigned long now = jiffies;
524 519
525 /* Filter duplicate events which happen "too close" together. */ 520 /* Filter duplicate events which happen "too close" together. */
526 if (ati_remote->old_data[0] == data[1] && 521 if (ati_remote->old_data == data[2] &&
527 ati_remote->old_data[1] == data[2] &&
528 time_before(now, ati_remote->old_jiffies + 522 time_before(now, ati_remote->old_jiffies +
529 msecs_to_jiffies(repeat_filter))) { 523 msecs_to_jiffies(repeat_filter))) {
530 ati_remote->repeat_count++; 524 ati_remote->repeat_count++;
@@ -533,8 +527,7 @@ static void ati_remote_input_report(struct urb *urb)
533 ati_remote->first_jiffies = now; 527 ati_remote->first_jiffies = now;
534 } 528 }
535 529
536 ati_remote->old_data[0] = data[1]; 530 ati_remote->old_data = data[2];
537 ati_remote->old_data[1] = data[2];
538 ati_remote->old_jiffies = now; 531 ati_remote->old_jiffies = now;
539 532
540 /* Ensure we skip at least the 4 first duplicate events (generated 533 /* Ensure we skip at least the 4 first duplicate events (generated
@@ -549,14 +542,13 @@ static void ati_remote_input_report(struct urb *urb)
549 542
550 if (index < 0) { 543 if (index < 0) {
551 /* Not a mouse event, hand it to rc-core. */ 544 /* Not a mouse event, hand it to rc-core. */
552 u32 rc_code = (scancode[0] << 8) | scancode[1];
553 545
554 /* 546 /*
555 * We don't use the rc-core repeat handling yet as 547 * We don't use the rc-core repeat handling yet as
556 * it would cause ghost repeats which would be a 548 * it would cause ghost repeats which would be a
557 * regression for this driver. 549 * regression for this driver.
558 */ 550 */
559 rc_keydown_notimeout(ati_remote->rdev, rc_code, 551 rc_keydown_notimeout(ati_remote->rdev, scancode,
560 data[2]); 552 data[2]);
561 rc_keyup(ati_remote->rdev); 553 rc_keyup(ati_remote->rdev);
562 return; 554 return;
@@ -607,8 +599,7 @@ static void ati_remote_input_report(struct urb *urb)
607 input_sync(dev); 599 input_sync(dev);
608 600
609 ati_remote->old_jiffies = jiffies; 601 ati_remote->old_jiffies = jiffies;
610 ati_remote->old_data[0] = data[1]; 602 ati_remote->old_data = data[2];
611 ati_remote->old_data[1] = data[2];
612 } 603 }
613} 604}
614 605
diff --git a/drivers/media/rc/keymaps/rc-ati-x10.c b/drivers/media/rc/keymaps/rc-ati-x10.c
index e1b8b2605c48..81506440eded 100644
--- a/drivers/media/rc/keymaps/rc-ati-x10.c
+++ b/drivers/media/rc/keymaps/rc-ati-x10.c
@@ -27,55 +27,55 @@
27#include <media/rc-map.h> 27#include <media/rc-map.h>
28 28
29static struct rc_map_table ati_x10[] = { 29static struct rc_map_table ati_x10[] = {
30 { 0xd20d, KEY_1 }, 30 { 0x0d, KEY_1 },
31 { 0xd30e, KEY_2 }, 31 { 0x0e, KEY_2 },
32 { 0xd40f, KEY_3 }, 32 { 0x0f, KEY_3 },
33 { 0xd510, KEY_4 }, 33 { 0x10, KEY_4 },
34 { 0xd611, KEY_5 }, 34 { 0x11, KEY_5 },
35 { 0xd712, KEY_6 }, 35 { 0x12, KEY_6 },
36 { 0xd813, KEY_7 }, 36 { 0x13, KEY_7 },
37 { 0xd914, KEY_8 }, 37 { 0x14, KEY_8 },
38 { 0xda15, KEY_9 }, 38 { 0x15, KEY_9 },
39 { 0xdc17, KEY_0 }, 39 { 0x17, KEY_0 },
40 { 0xc500, KEY_A }, 40 { 0x00, KEY_A },
41 { 0xc601, KEY_B }, 41 { 0x01, KEY_B },
42 { 0xde19, KEY_C }, 42 { 0x19, KEY_C },
43 { 0xe01b, KEY_D }, 43 { 0x1b, KEY_D },
44 { 0xe621, KEY_E }, 44 { 0x21, KEY_E },
45 { 0xe823, KEY_F }, 45 { 0x23, KEY_F },
46 46
47 { 0xdd18, KEY_KPENTER }, /* "check" */ 47 { 0x18, KEY_KPENTER }, /* "check" */
48 { 0xdb16, KEY_MENU }, /* "menu" */ 48 { 0x16, KEY_MENU }, /* "menu" */
49 { 0xc702, KEY_POWER }, /* Power */ 49 { 0x02, KEY_POWER }, /* Power */
50 { 0xc803, KEY_TV }, /* TV */ 50 { 0x03, KEY_TV }, /* TV */
51 { 0xc904, KEY_DVD }, /* DVD */ 51 { 0x04, KEY_DVD }, /* DVD */
52 { 0xca05, KEY_WWW }, /* WEB */ 52 { 0x05, KEY_WWW }, /* WEB */
53 { 0xcb06, KEY_BOOKMARKS }, /* "book" */ 53 { 0x06, KEY_BOOKMARKS }, /* "book" */
54 { 0xcc07, KEY_EDIT }, /* "hand" */ 54 { 0x07, KEY_EDIT }, /* "hand" */
55 { 0xe11c, KEY_COFFEE }, /* "timer" */ 55 { 0x1c, KEY_COFFEE }, /* "timer" */
56 { 0xe520, KEY_FRONT }, /* "max" */ 56 { 0x20, KEY_FRONT }, /* "max" */
57 { 0xe21d, KEY_LEFT }, /* left */ 57 { 0x1d, KEY_LEFT }, /* left */
58 { 0xe41f, KEY_RIGHT }, /* right */ 58 { 0x1f, KEY_RIGHT }, /* right */
59 { 0xe722, KEY_DOWN }, /* down */ 59 { 0x22, KEY_DOWN }, /* down */
60 { 0xdf1a, KEY_UP }, /* up */ 60 { 0x1a, KEY_UP }, /* up */
61 { 0xe31e, KEY_OK }, /* "OK" */ 61 { 0x1e, KEY_OK }, /* "OK" */
62 { 0xce09, KEY_VOLUMEDOWN }, /* VOL + */ 62 { 0x09, KEY_VOLUMEDOWN }, /* VOL + */
63 { 0xcd08, KEY_VOLUMEUP }, /* VOL - */ 63 { 0x08, KEY_VOLUMEUP }, /* VOL - */
64 { 0xcf0a, KEY_MUTE }, /* MUTE */ 64 { 0x0a, KEY_MUTE }, /* MUTE */
65 { 0xd00b, KEY_CHANNELUP }, /* CH + */ 65 { 0x0b, KEY_CHANNELUP }, /* CH + */
66 { 0xd10c, KEY_CHANNELDOWN },/* CH - */ 66 { 0x0c, KEY_CHANNELDOWN },/* CH - */
67 { 0xec27, KEY_RECORD }, /* ( o) red */ 67 { 0x27, KEY_RECORD }, /* ( o) red */
68 { 0xea25, KEY_PLAY }, /* ( >) */ 68 { 0x25, KEY_PLAY }, /* ( >) */
69 { 0xe924, KEY_REWIND }, /* (<<) */ 69 { 0x24, KEY_REWIND }, /* (<<) */
70 { 0xeb26, KEY_FORWARD }, /* (>>) */ 70 { 0x26, KEY_FORWARD }, /* (>>) */
71 { 0xed28, KEY_STOP }, /* ([]) */ 71 { 0x28, KEY_STOP }, /* ([]) */
72 { 0xee29, KEY_PAUSE }, /* ('') */ 72 { 0x29, KEY_PAUSE }, /* ('') */
73 { 0xf02b, KEY_PREVIOUS }, /* (<-) */ 73 { 0x2b, KEY_PREVIOUS }, /* (<-) */
74 { 0xef2a, KEY_NEXT }, /* (>+) */ 74 { 0x2a, KEY_NEXT }, /* (>+) */
75 { 0xf22d, KEY_INFO }, /* PLAYING */ 75 { 0x2d, KEY_INFO }, /* PLAYING */
76 { 0xf32e, KEY_HOME }, /* TOP */ 76 { 0x2e, KEY_HOME }, /* TOP */
77 { 0xf42f, KEY_END }, /* END */ 77 { 0x2f, KEY_END }, /* END */
78 { 0xf530, KEY_SELECT }, /* SELECT */ 78 { 0x30, KEY_SELECT }, /* SELECT */
79}; 79};
80 80
81static struct rc_map_list ati_x10_map = { 81static struct rc_map_list ati_x10_map = {
diff --git a/drivers/media/rc/keymaps/rc-medion-x10.c b/drivers/media/rc/keymaps/rc-medion-x10.c
index 09e2cc01d110..479cdb897810 100644
--- a/drivers/media/rc/keymaps/rc-medion-x10.c
+++ b/drivers/media/rc/keymaps/rc-medion-x10.c
@@ -25,70 +25,70 @@
25#include <media/rc-map.h> 25#include <media/rc-map.h>
26 26
27static struct rc_map_table medion_x10[] = { 27static struct rc_map_table medion_x10[] = {
28 { 0xf12c, KEY_TV }, /* TV */ 28 { 0x2c, KEY_TV }, /* TV */
29 { 0xf22d, KEY_VCR }, /* VCR */ 29 { 0x2d, KEY_VCR }, /* VCR */
30 { 0xc904, KEY_DVD }, /* DVD */ 30 { 0x04, KEY_DVD }, /* DVD */
31 { 0xcb06, KEY_AUDIO }, /* MUSIC */ 31 { 0x06, KEY_AUDIO }, /* MUSIC */
32 32
33 { 0xf32e, KEY_RADIO }, /* RADIO */ 33 { 0x2e, KEY_RADIO }, /* RADIO */
34 { 0xca05, KEY_DIRECTORY }, /* PHOTO */ 34 { 0x05, KEY_DIRECTORY }, /* PHOTO */
35 { 0xf42f, KEY_INFO }, /* TV-PREVIEW */ 35 { 0x2f, KEY_INFO }, /* TV-PREVIEW */
36 { 0xf530, KEY_LIST }, /* CHANNEL-LST */ 36 { 0x30, KEY_LIST }, /* CHANNEL-LST */
37 37
38 { 0xe01b, KEY_SETUP }, /* SETUP */ 38 { 0x1b, KEY_SETUP }, /* SETUP */
39 { 0xf631, KEY_VIDEO }, /* VIDEO DESKTOP */ 39 { 0x31, KEY_VIDEO }, /* VIDEO DESKTOP */
40 40
41 { 0xcd08, KEY_VOLUMEDOWN }, /* VOL - */ 41 { 0x08, KEY_VOLUMEDOWN }, /* VOL - */
42 { 0xce09, KEY_VOLUMEUP }, /* VOL + */ 42 { 0x09, KEY_VOLUMEUP }, /* VOL + */
43 { 0xd00b, KEY_CHANNELUP }, /* CHAN + */ 43 { 0x0b, KEY_CHANNELUP }, /* CHAN + */
44 { 0xd10c, KEY_CHANNELDOWN }, /* CHAN - */ 44 { 0x0c, KEY_CHANNELDOWN }, /* CHAN - */
45 { 0xc500, KEY_MUTE }, /* MUTE */ 45 { 0x00, KEY_MUTE }, /* MUTE */
46 46
47 { 0xf732, KEY_RED }, /* red */ 47 { 0x32, KEY_RED }, /* red */
48 { 0xf833, KEY_GREEN }, /* green */ 48 { 0x33, KEY_GREEN }, /* green */
49 { 0xf934, KEY_YELLOW }, /* yellow */ 49 { 0x34, KEY_YELLOW }, /* yellow */
50 { 0xfa35, KEY_BLUE }, /* blue */ 50 { 0x35, KEY_BLUE }, /* blue */
51 { 0xdb16, KEY_TEXT }, /* TXT */ 51 { 0x16, KEY_TEXT }, /* TXT */
52 52
53 { 0xd20d, KEY_1 }, 53 { 0x0d, KEY_1 },
54 { 0xd30e, KEY_2 }, 54 { 0x0e, KEY_2 },
55 { 0xd40f, KEY_3 }, 55 { 0x0f, KEY_3 },
56 { 0xd510, KEY_4 }, 56 { 0x10, KEY_4 },
57 { 0xd611, KEY_5 }, 57 { 0x11, KEY_5 },
58 { 0xd712, KEY_6 }, 58 { 0x12, KEY_6 },
59 { 0xd813, KEY_7 }, 59 { 0x13, KEY_7 },
60 { 0xd914, KEY_8 }, 60 { 0x14, KEY_8 },
61 { 0xda15, KEY_9 }, 61 { 0x15, KEY_9 },
62 { 0xdc17, KEY_0 }, 62 { 0x17, KEY_0 },
63 { 0xe11c, KEY_SEARCH }, /* TV/RAD, CH SRC */ 63 { 0x1c, KEY_SEARCH }, /* TV/RAD, CH SRC */
64 { 0xe520, KEY_DELETE }, /* DELETE */ 64 { 0x20, KEY_DELETE }, /* DELETE */
65 65
66 { 0xfb36, KEY_KEYBOARD }, /* RENAME */ 66 { 0x36, KEY_KEYBOARD }, /* RENAME */
67 { 0xdd18, KEY_SCREEN }, /* SNAPSHOT */ 67 { 0x18, KEY_SCREEN }, /* SNAPSHOT */
68 68
69 { 0xdf1a, KEY_UP }, /* up */ 69 { 0x1a, KEY_UP }, /* up */
70 { 0xe722, KEY_DOWN }, /* down */ 70 { 0x22, KEY_DOWN }, /* down */
71 { 0xe21d, KEY_LEFT }, /* left */ 71 { 0x1d, KEY_LEFT }, /* left */
72 { 0xe41f, KEY_RIGHT }, /* right */ 72 { 0x1f, KEY_RIGHT }, /* right */
73 { 0xe31e, KEY_OK }, /* OK */ 73 { 0x1e, KEY_OK }, /* OK */
74 74
75 { 0xfc37, KEY_SELECT }, /* ACQUIRE IMAGE */ 75 { 0x37, KEY_SELECT }, /* ACQUIRE IMAGE */
76 { 0xfd38, KEY_EDIT }, /* EDIT IMAGE */ 76 { 0x38, KEY_EDIT }, /* EDIT IMAGE */
77 77
78 { 0xe924, KEY_REWIND }, /* rewind (<<) */ 78 { 0x24, KEY_REWIND }, /* rewind (<<) */
79 { 0xea25, KEY_PLAY }, /* play ( >) */ 79 { 0x25, KEY_PLAY }, /* play ( >) */
80 { 0xeb26, KEY_FORWARD }, /* forward (>>) */ 80 { 0x26, KEY_FORWARD }, /* forward (>>) */
81 { 0xec27, KEY_RECORD }, /* record ( o) */ 81 { 0x27, KEY_RECORD }, /* record ( o) */
82 { 0xed28, KEY_STOP }, /* stop ([]) */ 82 { 0x28, KEY_STOP }, /* stop ([]) */
83 { 0xee29, KEY_PAUSE }, /* pause ('') */ 83 { 0x29, KEY_PAUSE }, /* pause ('') */
84 84
85 { 0xe621, KEY_PREVIOUS }, /* prev */ 85 { 0x21, KEY_PREVIOUS }, /* prev */
86 { 0xfe39, KEY_SWITCHVIDEOMODE }, /* F SCR */ 86 { 0x39, KEY_SWITCHVIDEOMODE }, /* F SCR */
87 { 0xe823, KEY_NEXT }, /* next */ 87 { 0x23, KEY_NEXT }, /* next */
88 { 0xde19, KEY_MENU }, /* MENU */ 88 { 0x19, KEY_MENU }, /* MENU */
89 { 0xff3a, KEY_LANGUAGE }, /* AUDIO */ 89 { 0x3a, KEY_LANGUAGE }, /* AUDIO */
90 90
91 { 0xc702, KEY_POWER }, /* POWER */ 91 { 0x02, KEY_POWER }, /* POWER */
92}; 92};
93 93
94static struct rc_map_list medion_x10_map = { 94static struct rc_map_list medion_x10_map = {
diff --git a/drivers/media/rc/keymaps/rc-snapstream-firefly.c b/drivers/media/rc/keymaps/rc-snapstream-firefly.c
index ef146520931c..c7f33ec719b4 100644
--- a/drivers/media/rc/keymaps/rc-snapstream-firefly.c
+++ b/drivers/media/rc/keymaps/rc-snapstream-firefly.c
@@ -22,63 +22,63 @@
22#include <media/rc-map.h> 22#include <media/rc-map.h>
23 23
24static struct rc_map_table snapstream_firefly[] = { 24static struct rc_map_table snapstream_firefly[] = {
25 { 0xf12c, KEY_ZOOM }, /* Maximize */ 25 { 0x2c, KEY_ZOOM }, /* Maximize */
26 { 0xc702, KEY_CLOSE }, 26 { 0x02, KEY_CLOSE },
27 27
28 { 0xd20d, KEY_1 }, 28 { 0x0d, KEY_1 },
29 { 0xd30e, KEY_2 }, 29 { 0x0e, KEY_2 },
30 { 0xd40f, KEY_3 }, 30 { 0x0f, KEY_3 },
31 { 0xd510, KEY_4 }, 31 { 0x10, KEY_4 },
32 { 0xd611, KEY_5 }, 32 { 0x11, KEY_5 },
33 { 0xd712, KEY_6 }, 33 { 0x12, KEY_6 },
34 { 0xd813, KEY_7 }, 34 { 0x13, KEY_7 },
35 { 0xd914, KEY_8 }, 35 { 0x14, KEY_8 },
36 { 0xda15, KEY_9 }, 36 { 0x15, KEY_9 },
37 { 0xdc17, KEY_0 }, 37 { 0x17, KEY_0 },
38 { 0xdb16, KEY_BACK }, 38 { 0x16, KEY_BACK },
39 { 0xdd18, KEY_KPENTER }, /* ent */ 39 { 0x18, KEY_KPENTER }, /* ent */
40 40
41 { 0xce09, KEY_VOLUMEUP }, 41 { 0x09, KEY_VOLUMEUP },
42 { 0xcd08, KEY_VOLUMEDOWN }, 42 { 0x08, KEY_VOLUMEDOWN },
43 { 0xcf0a, KEY_MUTE }, 43 { 0x0a, KEY_MUTE },
44 { 0xd00b, KEY_CHANNELUP }, 44 { 0x0b, KEY_CHANNELUP },
45 { 0xd10c, KEY_CHANNELDOWN }, 45 { 0x0c, KEY_CHANNELDOWN },
46 { 0xc500, KEY_VENDOR }, /* firefly */ 46 { 0x00, KEY_VENDOR }, /* firefly */
47 47
48 { 0xf32e, KEY_INFO }, 48 { 0x2e, KEY_INFO },
49 { 0xf42f, KEY_OPTION }, 49 { 0x2f, KEY_OPTION },
50 50
51 { 0xe21d, KEY_LEFT }, 51 { 0x1d, KEY_LEFT },
52 { 0xe41f, KEY_RIGHT }, 52 { 0x1f, KEY_RIGHT },
53 { 0xe722, KEY_DOWN }, 53 { 0x22, KEY_DOWN },
54 { 0xdf1a, KEY_UP }, 54 { 0x1a, KEY_UP },
55 { 0xe31e, KEY_OK }, 55 { 0x1e, KEY_OK },
56 56
57 { 0xe11c, KEY_MENU }, 57 { 0x1c, KEY_MENU },
58 { 0xe520, KEY_EXIT }, 58 { 0x20, KEY_EXIT },
59 59
60 { 0xec27, KEY_RECORD }, 60 { 0x27, KEY_RECORD },
61 { 0xea25, KEY_PLAY }, 61 { 0x25, KEY_PLAY },
62 { 0xed28, KEY_STOP }, 62 { 0x28, KEY_STOP },
63 { 0xe924, KEY_REWIND }, 63 { 0x24, KEY_REWIND },
64 { 0xeb26, KEY_FORWARD }, 64 { 0x26, KEY_FORWARD },
65 { 0xee29, KEY_PAUSE }, 65 { 0x29, KEY_PAUSE },
66 { 0xf02b, KEY_PREVIOUS }, 66 { 0x2b, KEY_PREVIOUS },
67 { 0xef2a, KEY_NEXT }, 67 { 0x2a, KEY_NEXT },
68 68
69 { 0xcb06, KEY_AUDIO }, /* Music */ 69 { 0x06, KEY_AUDIO }, /* Music */
70 { 0xca05, KEY_IMAGES }, /* Photos */ 70 { 0x05, KEY_IMAGES }, /* Photos */
71 { 0xc904, KEY_DVD }, 71 { 0x04, KEY_DVD },
72 { 0xc803, KEY_TV }, 72 { 0x03, KEY_TV },
73 { 0xcc07, KEY_VIDEO }, 73 { 0x07, KEY_VIDEO },
74 74
75 { 0xc601, KEY_HELP }, 75 { 0x01, KEY_HELP },
76 { 0xf22d, KEY_MODE }, /* Mouse */ 76 { 0x2d, KEY_MODE }, /* Mouse */
77 77
78 { 0xde19, KEY_A }, 78 { 0x19, KEY_A },
79 { 0xe01b, KEY_B }, 79 { 0x1b, KEY_B },
80 { 0xe621, KEY_C }, 80 { 0x21, KEY_C },
81 { 0xe823, KEY_D }, 81 { 0x23, KEY_D },
82}; 82};
83 83
84static struct rc_map_list snapstream_firefly_map = { 84static struct rc_map_list snapstream_firefly_map = {
diff --git a/drivers/media/video/au0828/au0828-cards.c b/drivers/media/video/au0828/au0828-cards.c
index 39fc923fc46b..1c6015a04f96 100644
--- a/drivers/media/video/au0828/au0828-cards.c
+++ b/drivers/media/video/au0828/au0828-cards.c
@@ -162,11 +162,14 @@ static void hauppauge_eeprom(struct au0828_dev *dev, u8 *eeprom_data)
162 switch (tv.model) { 162 switch (tv.model) {
163 case 72000: /* WinTV-HVR950q (Retail, IR, ATSC/QAM */ 163 case 72000: /* WinTV-HVR950q (Retail, IR, ATSC/QAM */
164 case 72001: /* WinTV-HVR950q (Retail, IR, ATSC/QAM and analog video */ 164 case 72001: /* WinTV-HVR950q (Retail, IR, ATSC/QAM and analog video */
165 case 72101: /* WinTV-HVR950q (Retail, IR, ATSC/QAM and analog video */
166 case 72201: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */
165 case 72211: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */ 167 case 72211: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */
166 case 72221: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */ 168 case 72221: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */
167 case 72231: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */ 169 case 72231: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */
168 case 72241: /* WinTV-HVR950q (OEM, No IR, ATSC/QAM and analog video */ 170 case 72241: /* WinTV-HVR950q (OEM, No IR, ATSC/QAM and analog video */
169 case 72251: /* WinTV-HVR950q (Retail, IR, ATSC/QAM and analog video */ 171 case 72251: /* WinTV-HVR950q (Retail, IR, ATSC/QAM and analog video */
172 case 72261: /* WinTV-HVR950q (OEM, IR, ATSC/QAM and analog video */
170 case 72301: /* WinTV-HVR850 (Retail, IR, ATSC and analog video */ 173 case 72301: /* WinTV-HVR850 (Retail, IR, ATSC and analog video */
171 case 72500: /* WinTV-HVR950q (OEM, No IR, ATSC/QAM */ 174 case 72500: /* WinTV-HVR950q (OEM, No IR, ATSC/QAM */
172 break; 175 break;
@@ -324,6 +327,10 @@ struct usb_device_id au0828_usb_id_table[] = {
324 .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q_MXL }, 327 .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q_MXL },
325 { USB_DEVICE(0x2040, 0x8200), 328 { USB_DEVICE(0x2040, 0x8200),
326 .driver_info = AU0828_BOARD_HAUPPAUGE_WOODBURY }, 329 .driver_info = AU0828_BOARD_HAUPPAUGE_WOODBURY },
330 { USB_DEVICE(0x2040, 0x7260),
331 .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q },
332 { USB_DEVICE(0x2040, 0x7213),
333 .driver_info = AU0828_BOARD_HAUPPAUGE_HVR950Q },
327 { }, 334 { },
328}; 335};
329 336
diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index 881e04c7ffe6..2ca10dfec91f 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c
@@ -838,13 +838,13 @@ static int gspca_init_transfer(struct gspca_dev *gspca_dev)
838 gspca_dev->usb_err = 0; 838 gspca_dev->usb_err = 0;
839 839
840 /* do the specific subdriver stuff before endpoint selection */ 840 /* do the specific subdriver stuff before endpoint selection */
841 gspca_dev->alt = 0; 841 intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface);
842 gspca_dev->alt = gspca_dev->cam.bulk ? intf->num_altsetting : 0;
842 if (gspca_dev->sd_desc->isoc_init) { 843 if (gspca_dev->sd_desc->isoc_init) {
843 ret = gspca_dev->sd_desc->isoc_init(gspca_dev); 844 ret = gspca_dev->sd_desc->isoc_init(gspca_dev);
844 if (ret < 0) 845 if (ret < 0)
845 goto unlock; 846 goto unlock;
846 } 847 }
847 intf = usb_ifnum_to_if(gspca_dev->dev, gspca_dev->iface);
848 xfer = gspca_dev->cam.bulk ? USB_ENDPOINT_XFER_BULK 848 xfer = gspca_dev->cam.bulk ? USB_ENDPOINT_XFER_BULK
849 : USB_ENDPOINT_XFER_ISOC; 849 : USB_ENDPOINT_XFER_ISOC;
850 850
@@ -957,7 +957,7 @@ retry:
957 ret = -EIO; 957 ret = -EIO;
958 goto out; 958 goto out;
959 } 959 }
960 alt = ep_tb[--alt_idx].alt; 960 gspca_dev->alt = ep_tb[--alt_idx].alt;
961 } 961 }
962 } 962 }
963out: 963out:
diff --git a/drivers/media/video/m5mols/m5mols.h b/drivers/media/video/m5mols/m5mols.h
index 89d09a8914f8..82c8817bd32d 100644
--- a/drivers/media/video/m5mols/m5mols.h
+++ b/drivers/media/video/m5mols/m5mols.h
@@ -162,7 +162,6 @@ struct m5mols_version {
162 * @pad: media pad 162 * @pad: media pad
163 * @ffmt: current fmt according to resolution type 163 * @ffmt: current fmt according to resolution type
164 * @res_type: current resolution type 164 * @res_type: current resolution type
165 * @code: current code
166 * @irq_waitq: waitqueue for the capture 165 * @irq_waitq: waitqueue for the capture
167 * @work_irq: workqueue for the IRQ 166 * @work_irq: workqueue for the IRQ
168 * @flags: state variable for the interrupt handler 167 * @flags: state variable for the interrupt handler
@@ -192,7 +191,6 @@ struct m5mols_info {
192 struct media_pad pad; 191 struct media_pad pad;
193 struct v4l2_mbus_framefmt ffmt[M5MOLS_RESTYPE_MAX]; 192 struct v4l2_mbus_framefmt ffmt[M5MOLS_RESTYPE_MAX];
194 int res_type; 193 int res_type;
195 enum v4l2_mbus_pixelcode code;
196 wait_queue_head_t irq_waitq; 194 wait_queue_head_t irq_waitq;
197 struct work_struct work_irq; 195 struct work_struct work_irq;
198 unsigned long flags; 196 unsigned long flags;
diff --git a/drivers/media/video/m5mols/m5mols_core.c b/drivers/media/video/m5mols/m5mols_core.c
index 05ab3700647e..e0f09e531800 100644
--- a/drivers/media/video/m5mols/m5mols_core.c
+++ b/drivers/media/video/m5mols/m5mols_core.c
@@ -334,7 +334,7 @@ int m5mols_mode(struct m5mols_info *info, u8 mode)
334 int ret = -EINVAL; 334 int ret = -EINVAL;
335 u8 reg; 335 u8 reg;
336 336
337 if (mode < REG_PARAMETER && mode > REG_CAPTURE) 337 if (mode < REG_PARAMETER || mode > REG_CAPTURE)
338 return ret; 338 return ret;
339 339
340 ret = m5mols_read_u8(sd, SYSTEM_SYSMODE, &reg); 340 ret = m5mols_read_u8(sd, SYSTEM_SYSMODE, &reg);
@@ -511,9 +511,6 @@ static int m5mols_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh,
511 struct m5mols_info *info = to_m5mols(sd); 511 struct m5mols_info *info = to_m5mols(sd);
512 struct v4l2_mbus_framefmt *format; 512 struct v4l2_mbus_framefmt *format;
513 513
514 if (fmt->pad != 0)
515 return -EINVAL;
516
517 format = __find_format(info, fh, fmt->which, info->res_type); 514 format = __find_format(info, fh, fmt->which, info->res_type);
518 if (!format) 515 if (!format)
519 return -EINVAL; 516 return -EINVAL;
@@ -532,9 +529,6 @@ static int m5mols_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh,
532 u32 resolution = 0; 529 u32 resolution = 0;
533 int ret; 530 int ret;
534 531
535 if (fmt->pad != 0)
536 return -EINVAL;
537
538 ret = __find_resolution(sd, format, &type, &resolution); 532 ret = __find_resolution(sd, format, &type, &resolution);
539 if (ret < 0) 533 if (ret < 0)
540 return ret; 534 return ret;
@@ -543,13 +537,14 @@ static int m5mols_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh,
543 if (!sfmt) 537 if (!sfmt)
544 return 0; 538 return 0;
545 539
546 *sfmt = m5mols_default_ffmt[type]; 540
547 sfmt->width = format->width; 541 format->code = m5mols_default_ffmt[type].code;
548 sfmt->height = format->height; 542 format->colorspace = V4L2_COLORSPACE_JPEG;
543 format->field = V4L2_FIELD_NONE;
549 544
550 if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE) { 545 if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE) {
546 *sfmt = *format;
551 info->resolution = resolution; 547 info->resolution = resolution;
552 info->code = format->code;
553 info->res_type = type; 548 info->res_type = type;
554 } 549 }
555 550
@@ -626,13 +621,14 @@ static int m5mols_start_monitor(struct m5mols_info *info)
626static int m5mols_s_stream(struct v4l2_subdev *sd, int enable) 621static int m5mols_s_stream(struct v4l2_subdev *sd, int enable)
627{ 622{
628 struct m5mols_info *info = to_m5mols(sd); 623 struct m5mols_info *info = to_m5mols(sd);
624 u32 code = info->ffmt[info->res_type].code;
629 625
630 if (enable) { 626 if (enable) {
631 int ret = -EINVAL; 627 int ret = -EINVAL;
632 628
633 if (is_code(info->code, M5MOLS_RESTYPE_MONITOR)) 629 if (is_code(code, M5MOLS_RESTYPE_MONITOR))
634 ret = m5mols_start_monitor(info); 630 ret = m5mols_start_monitor(info);
635 if (is_code(info->code, M5MOLS_RESTYPE_CAPTURE)) 631 if (is_code(code, M5MOLS_RESTYPE_CAPTURE))
636 ret = m5mols_start_capture(info); 632 ret = m5mols_start_capture(info);
637 633
638 return ret; 634 return ret;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index cf2c0fb95f2f..398f96ffd35e 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -955,6 +955,7 @@ static int mt9m111_probe(struct i2c_client *client,
955 mt9m111->rect.height = MT9M111_MAX_HEIGHT; 955 mt9m111->rect.height = MT9M111_MAX_HEIGHT;
956 mt9m111->fmt = &mt9m111_colour_fmts[0]; 956 mt9m111->fmt = &mt9m111_colour_fmts[0];
957 mt9m111->lastpage = -1; 957 mt9m111->lastpage = -1;
958 mutex_init(&mt9m111->power_lock);
958 959
959 ret = mt9m111_video_probe(client); 960 ret = mt9m111_video_probe(client);
960 if (ret) { 961 if (ret) {
diff --git a/drivers/media/video/mt9t112.c b/drivers/media/video/mt9t112.c
index 32114a3c0ca7..7b34b11daf24 100644
--- a/drivers/media/video/mt9t112.c
+++ b/drivers/media/video/mt9t112.c
@@ -1083,8 +1083,10 @@ static int mt9t112_probe(struct i2c_client *client,
1083 v4l2_i2c_subdev_init(&priv->subdev, client, &mt9t112_subdev_ops); 1083 v4l2_i2c_subdev_init(&priv->subdev, client, &mt9t112_subdev_ops);
1084 1084
1085 ret = mt9t112_camera_probe(client); 1085 ret = mt9t112_camera_probe(client);
1086 if (ret) 1086 if (ret) {
1087 kfree(priv); 1087 kfree(priv);
1088 return ret;
1089 }
1088 1090
1089 /* Cannot fail: using the default supported pixel code */ 1091 /* Cannot fail: using the default supported pixel code */
1090 mt9t112_set_params(priv, &rect, V4L2_MBUS_FMT_UYVY8_2X8); 1092 mt9t112_set_params(priv, &rect, V4L2_MBUS_FMT_UYVY8_2X8);
diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c
index 9c5c19f142de..ee0d0b39cd17 100644
--- a/drivers/media/video/omap/omap_vout.c
+++ b/drivers/media/video/omap/omap_vout.c
@@ -38,6 +38,7 @@
38#include <linux/irq.h> 38#include <linux/irq.h>
39#include <linux/videodev2.h> 39#include <linux/videodev2.h>
40#include <linux/dma-mapping.h> 40#include <linux/dma-mapping.h>
41#include <linux/slab.h>
41 42
42#include <media/videobuf-dma-contig.h> 43#include <media/videobuf-dma-contig.h>
43#include <media/v4l2-device.h> 44#include <media/v4l2-device.h>
@@ -2169,6 +2170,14 @@ static int __init omap_vout_probe(struct platform_device *pdev)
2169 vid_dev->num_displays = 0; 2170 vid_dev->num_displays = 0;
2170 for_each_dss_dev(dssdev) { 2171 for_each_dss_dev(dssdev) {
2171 omap_dss_get_device(dssdev); 2172 omap_dss_get_device(dssdev);
2173
2174 if (!dssdev->driver) {
2175 dev_warn(&pdev->dev, "no driver for display: %s\n",
2176 dssdev->name);
2177 omap_dss_put_device(dssdev);
2178 continue;
2179 }
2180
2172 vid_dev->displays[vid_dev->num_displays++] = dssdev; 2181 vid_dev->displays[vid_dev->num_displays++] = dssdev;
2173 } 2182 }
2174 2183
diff --git a/drivers/media/video/omap1_camera.c b/drivers/media/video/omap1_camera.c
index e87ae2f634b2..6a6cf388bae4 100644
--- a/drivers/media/video/omap1_camera.c
+++ b/drivers/media/video/omap1_camera.c
@@ -24,6 +24,7 @@
24#include <linux/clk.h> 24#include <linux/clk.h>
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
26#include <linux/interrupt.h> 26#include <linux/interrupt.h>
27#include <linux/module.h>
27#include <linux/platform_device.h> 28#include <linux/platform_device.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
29 30
diff --git a/drivers/media/video/omap24xxcam-dma.c b/drivers/media/video/omap24xxcam-dma.c
index 1d54b86c936b..3ea38a8def8e 100644
--- a/drivers/media/video/omap24xxcam-dma.c
+++ b/drivers/media/video/omap24xxcam-dma.c
@@ -506,7 +506,7 @@ int omap24xxcam_sgdma_queue(struct omap24xxcam_sgdma *sgdma,
506 unsigned long flags; 506 unsigned long flags;
507 struct sgdma_state *sg_state; 507 struct sgdma_state *sg_state;
508 508
509 if ((sglen < 0) || ((sglen > 0) & !sglist)) 509 if ((sglen < 0) || ((sglen > 0) && !sglist))
510 return -EINVAL; 510 return -EINVAL;
511 511
512 spin_lock_irqsave(&sgdma->lock, flags); 512 spin_lock_irqsave(&sgdma->lock, flags);
diff --git a/drivers/media/video/omap3isp/ispccdc.c b/drivers/media/video/omap3isp/ispccdc.c
index b0b0fa5a3572..54a4a3f22e2e 100644
--- a/drivers/media/video/omap3isp/ispccdc.c
+++ b/drivers/media/video/omap3isp/ispccdc.c
@@ -1408,7 +1408,7 @@ static void ccdc_hs_vs_isr(struct isp_ccdc_device *ccdc)
1408{ 1408{
1409 struct isp_pipeline *pipe = 1409 struct isp_pipeline *pipe =
1410 to_isp_pipeline(&ccdc->video_out.video.entity); 1410 to_isp_pipeline(&ccdc->video_out.video.entity);
1411 struct video_device *vdev = &ccdc->subdev.devnode; 1411 struct video_device *vdev = ccdc->subdev.devnode;
1412 struct v4l2_event event; 1412 struct v4l2_event event;
1413 1413
1414 memset(&event, 0, sizeof(event)); 1414 memset(&event, 0, sizeof(event));
diff --git a/drivers/media/video/omap3isp/ispstat.c b/drivers/media/video/omap3isp/ispstat.c
index 68d539456c55..bc0b2c7349b9 100644
--- a/drivers/media/video/omap3isp/ispstat.c
+++ b/drivers/media/video/omap3isp/ispstat.c
@@ -496,7 +496,7 @@ static int isp_stat_bufs_alloc(struct ispstat *stat, u32 size)
496 496
497static void isp_stat_queue_event(struct ispstat *stat, int err) 497static void isp_stat_queue_event(struct ispstat *stat, int err)
498{ 498{
499 struct video_device *vdev = &stat->subdev.devnode; 499 struct video_device *vdev = stat->subdev.devnode;
500 struct v4l2_event event; 500 struct v4l2_event event;
501 struct omap3isp_stat_event_status *status = (void *)event.u.data; 501 struct omap3isp_stat_event_status *status = (void *)event.u.data;
502 502
diff --git a/drivers/media/video/omap3isp/ispvideo.c b/drivers/media/video/omap3isp/ispvideo.c
index d1000723c5ae..f2290578448c 100644
--- a/drivers/media/video/omap3isp/ispvideo.c
+++ b/drivers/media/video/omap3isp/ispvideo.c
@@ -26,6 +26,7 @@
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <linux/clk.h> 27#include <linux/clk.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/module.h>
29#include <linux/pagemap.h> 30#include <linux/pagemap.h>
30#include <linux/scatterlist.h> 31#include <linux/scatterlist.h>
31#include <linux/sched.h> 32#include <linux/sched.h>
diff --git a/drivers/media/video/ov6650.c b/drivers/media/video/ov6650.c
index 9f2d26b1d4cb..6806345ec2f0 100644
--- a/drivers/media/video/ov6650.c
+++ b/drivers/media/video/ov6650.c
@@ -540,7 +540,7 @@ static u8 to_clkrc(struct v4l2_fract *timeperframe,
540static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) 540static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf)
541{ 541{
542 struct i2c_client *client = v4l2_get_subdevdata(sd); 542 struct i2c_client *client = v4l2_get_subdevdata(sd);
543 struct soc_camera_device *icd = (struct soc_camera_device *)sd->grp_id; 543 struct soc_camera_device *icd = v4l2_get_subdev_hostdata(sd);
544 struct soc_camera_sense *sense = icd->sense; 544 struct soc_camera_sense *sense = icd->sense;
545 struct ov6650 *priv = to_ov6650(client); 545 struct ov6650 *priv = to_ov6650(client);
546 bool half_scale = !is_unscaled_ok(mf->width, mf->height, &priv->rect); 546 bool half_scale = !is_unscaled_ok(mf->width, mf->height, &priv->rect);
diff --git a/drivers/media/video/s5p-fimc/fimc-capture.c b/drivers/media/video/s5p-fimc/fimc-capture.c
index c8d91b0cd9bd..2cc3b9166724 100644
--- a/drivers/media/video/s5p-fimc/fimc-capture.c
+++ b/drivers/media/video/s5p-fimc/fimc-capture.c
@@ -98,6 +98,10 @@ static int fimc_capture_state_cleanup(struct fimc_dev *fimc, bool suspend)
98 vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR); 98 vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR);
99 } 99 }
100 set_bit(ST_CAPT_SUSPENDED, &fimc->state); 100 set_bit(ST_CAPT_SUSPENDED, &fimc->state);
101
102 fimc_hw_reset(fimc);
103 cap->buf_index = 0;
104
101 spin_unlock_irqrestore(&fimc->slock, flags); 105 spin_unlock_irqrestore(&fimc->slock, flags);
102 106
103 if (streaming) 107 if (streaming)
@@ -137,7 +141,7 @@ int fimc_capture_config_update(struct fimc_ctx *ctx)
137 struct fimc_dev *fimc = ctx->fimc_dev; 141 struct fimc_dev *fimc = ctx->fimc_dev;
138 int ret; 142 int ret;
139 143
140 if (test_bit(ST_CAPT_APPLY_CFG, &fimc->state)) 144 if (!test_bit(ST_CAPT_APPLY_CFG, &fimc->state))
141 return 0; 145 return 0;
142 146
143 spin_lock(&ctx->slock); 147 spin_lock(&ctx->slock);
@@ -150,7 +154,7 @@ int fimc_capture_config_update(struct fimc_ctx *ctx)
150 fimc_hw_set_rotation(ctx); 154 fimc_hw_set_rotation(ctx);
151 fimc_prepare_dma_offset(ctx, &ctx->d_frame); 155 fimc_prepare_dma_offset(ctx, &ctx->d_frame);
152 fimc_hw_set_out_dma(ctx); 156 fimc_hw_set_out_dma(ctx);
153 set_bit(ST_CAPT_APPLY_CFG, &fimc->state); 157 clear_bit(ST_CAPT_APPLY_CFG, &fimc->state);
154 } 158 }
155 spin_unlock(&ctx->slock); 159 spin_unlock(&ctx->slock);
156 return ret; 160 return ret;
@@ -164,7 +168,6 @@ static int start_streaming(struct vb2_queue *q, unsigned int count)
164 int min_bufs; 168 int min_bufs;
165 int ret; 169 int ret;
166 170
167 fimc_hw_reset(fimc);
168 vid_cap->frame_count = 0; 171 vid_cap->frame_count = 0;
169 172
170 ret = fimc_init_capture(fimc); 173 ret = fimc_init_capture(fimc);
@@ -523,7 +526,7 @@ static struct fimc_fmt *fimc_capture_try_format(struct fimc_ctx *ctx,
523 max_w = rotation ? pl->out_rot_en_w : pl->out_rot_dis_w; 526 max_w = rotation ? pl->out_rot_en_w : pl->out_rot_dis_w;
524 min_w = ctx->state & FIMC_DST_CROP ? dst->width : var->min_out_pixsize; 527 min_w = ctx->state & FIMC_DST_CROP ? dst->width : var->min_out_pixsize;
525 min_h = ctx->state & FIMC_DST_CROP ? dst->height : var->min_out_pixsize; 528 min_h = ctx->state & FIMC_DST_CROP ? dst->height : var->min_out_pixsize;
526 if (fimc->id == 1 && var->pix_hoff) 529 if (var->min_vsize_align == 1 && !rotation)
527 align_h = fimc_fmt_is_rgb(ffmt->color) ? 0 : 1; 530 align_h = fimc_fmt_is_rgb(ffmt->color) ? 0 : 1;
528 531
529 depth = fimc_get_format_depth(ffmt); 532 depth = fimc_get_format_depth(ffmt);
@@ -1239,6 +1242,7 @@ static int fimc_subdev_set_fmt(struct v4l2_subdev *sd,
1239 1242
1240 mutex_lock(&fimc->lock); 1243 mutex_lock(&fimc->lock);
1241 set_frame_bounds(ff, mf->width, mf->height); 1244 set_frame_bounds(ff, mf->width, mf->height);
1245 fimc->vid_cap.mf = *mf;
1242 ff->fmt = ffmt; 1246 ff->fmt = ffmt;
1243 1247
1244 /* Reset the crop rectangle if required. */ 1248 /* Reset the crop rectangle if required. */
@@ -1375,7 +1379,7 @@ static void fimc_destroy_capture_subdev(struct fimc_dev *fimc)
1375 media_entity_cleanup(&sd->entity); 1379 media_entity_cleanup(&sd->entity);
1376 v4l2_device_unregister_subdev(sd); 1380 v4l2_device_unregister_subdev(sd);
1377 kfree(sd); 1381 kfree(sd);
1378 sd = NULL; 1382 fimc->vid_cap.subdev = NULL;
1379} 1383}
1380 1384
1381/* Set default format at the sensor and host interface */ 1385/* Set default format at the sensor and host interface */
diff --git a/drivers/media/video/s5p-fimc/fimc-core.c b/drivers/media/video/s5p-fimc/fimc-core.c
index 19ca6db38b2f..07c6254faee3 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.c
+++ b/drivers/media/video/s5p-fimc/fimc-core.c
@@ -37,7 +37,7 @@ static char *fimc_clocks[MAX_FIMC_CLOCKS] = {
37static struct fimc_fmt fimc_formats[] = { 37static struct fimc_fmt fimc_formats[] = {
38 { 38 {
39 .name = "RGB565", 39 .name = "RGB565",
40 .fourcc = V4L2_PIX_FMT_RGB565X, 40 .fourcc = V4L2_PIX_FMT_RGB565,
41 .depth = { 16 }, 41 .depth = { 16 },
42 .color = S5P_FIMC_RGB565, 42 .color = S5P_FIMC_RGB565,
43 .memplanes = 1, 43 .memplanes = 1,
@@ -1038,12 +1038,11 @@ static int fimc_try_fmt_mplane(struct fimc_ctx *ctx, struct v4l2_format *f)
1038 mod_x = 6; /* 64 x 32 pixels tile */ 1038 mod_x = 6; /* 64 x 32 pixels tile */
1039 mod_y = 5; 1039 mod_y = 5;
1040 } else { 1040 } else {
1041 if (fimc->id == 1 && variant->pix_hoff) 1041 if (variant->min_vsize_align == 1)
1042 mod_y = fimc_fmt_is_rgb(fmt->color) ? 0 : 1; 1042 mod_y = fimc_fmt_is_rgb(fmt->color) ? 0 : 1;
1043 else 1043 else
1044 mod_y = mod_x; 1044 mod_y = ffs(variant->min_vsize_align) - 1;
1045 } 1045 }
1046 dbg("mod_x: %d, mod_y: %d, max_w: %d", mod_x, mod_y, max_w);
1047 1046
1048 v4l_bound_align_image(&pix->width, 16, max_w, mod_x, 1047 v4l_bound_align_image(&pix->width, 16, max_w, mod_x,
1049 &pix->height, 8, variant->pix_limit->scaler_dis_w, mod_y, 0); 1048 &pix->height, 8, variant->pix_limit->scaler_dis_w, mod_y, 0);
@@ -1226,10 +1225,10 @@ static int fimc_m2m_try_crop(struct fimc_ctx *ctx, struct v4l2_crop *cr)
1226 fimc->variant->min_inp_pixsize : fimc->variant->min_out_pixsize; 1225 fimc->variant->min_inp_pixsize : fimc->variant->min_out_pixsize;
1227 1226
1228 /* Get pixel alignment constraints. */ 1227 /* Get pixel alignment constraints. */
1229 if (fimc->id == 1 && fimc->variant->pix_hoff) 1228 if (fimc->variant->min_vsize_align == 1)
1230 halign = fimc_fmt_is_rgb(f->fmt->color) ? 0 : 1; 1229 halign = fimc_fmt_is_rgb(f->fmt->color) ? 0 : 1;
1231 else 1230 else
1232 halign = ffs(min_size) - 1; 1231 halign = ffs(fimc->variant->min_vsize_align) - 1;
1233 1232
1234 for (i = 0; i < f->fmt->colplanes; i++) 1233 for (i = 0; i < f->fmt->colplanes; i++)
1235 depth += f->fmt->depth[i]; 1234 depth += f->fmt->depth[i];
@@ -1615,7 +1614,6 @@ static int fimc_probe(struct platform_device *pdev)
1615 pdata = pdev->dev.platform_data; 1614 pdata = pdev->dev.platform_data;
1616 fimc->pdata = pdata; 1615 fimc->pdata = pdata;
1617 1616
1618 set_bit(ST_LPM, &fimc->state);
1619 1617
1620 init_waitqueue_head(&fimc->irq_queue); 1618 init_waitqueue_head(&fimc->irq_queue);
1621 spin_lock_init(&fimc->slock); 1619 spin_lock_init(&fimc->slock);
@@ -1707,8 +1705,6 @@ static int fimc_runtime_resume(struct device *dev)
1707 /* Enable clocks and perform basic initalization */ 1705 /* Enable clocks and perform basic initalization */
1708 clk_enable(fimc->clock[CLK_GATE]); 1706 clk_enable(fimc->clock[CLK_GATE]);
1709 fimc_hw_reset(fimc); 1707 fimc_hw_reset(fimc);
1710 if (fimc->variant->out_buf_count > 4)
1711 fimc_hw_set_dma_seq(fimc, 0xF);
1712 1708
1713 /* Resume the capture or mem-to-mem device */ 1709 /* Resume the capture or mem-to-mem device */
1714 if (fimc_capture_busy(fimc)) 1710 if (fimc_capture_busy(fimc))
@@ -1750,8 +1746,6 @@ static int fimc_resume(struct device *dev)
1750 return 0; 1746 return 0;
1751 } 1747 }
1752 fimc_hw_reset(fimc); 1748 fimc_hw_reset(fimc);
1753 if (fimc->variant->out_buf_count > 4)
1754 fimc_hw_set_dma_seq(fimc, 0xF);
1755 spin_unlock_irqrestore(&fimc->slock, flags); 1749 spin_unlock_irqrestore(&fimc->slock, flags);
1756 1750
1757 if (fimc_capture_busy(fimc)) 1751 if (fimc_capture_busy(fimc))
@@ -1780,7 +1774,6 @@ static int __devexit fimc_remove(struct platform_device *pdev)
1780 struct fimc_dev *fimc = platform_get_drvdata(pdev); 1774 struct fimc_dev *fimc = platform_get_drvdata(pdev);
1781 1775
1782 pm_runtime_disable(&pdev->dev); 1776 pm_runtime_disable(&pdev->dev);
1783 fimc_runtime_suspend(&pdev->dev);
1784 pm_runtime_set_suspended(&pdev->dev); 1777 pm_runtime_set_suspended(&pdev->dev);
1785 1778
1786 vb2_dma_contig_cleanup_ctx(fimc->alloc_ctx); 1779 vb2_dma_contig_cleanup_ctx(fimc->alloc_ctx);
@@ -1840,6 +1833,7 @@ static struct samsung_fimc_variant fimc0_variant_s5p = {
1840 .min_inp_pixsize = 16, 1833 .min_inp_pixsize = 16,
1841 .min_out_pixsize = 16, 1834 .min_out_pixsize = 16,
1842 .hor_offs_align = 8, 1835 .hor_offs_align = 8,
1836 .min_vsize_align = 16,
1843 .out_buf_count = 4, 1837 .out_buf_count = 4,
1844 .pix_limit = &s5p_pix_limit[0], 1838 .pix_limit = &s5p_pix_limit[0],
1845}; 1839};
@@ -1849,6 +1843,7 @@ static struct samsung_fimc_variant fimc2_variant_s5p = {
1849 .min_inp_pixsize = 16, 1843 .min_inp_pixsize = 16,
1850 .min_out_pixsize = 16, 1844 .min_out_pixsize = 16,
1851 .hor_offs_align = 8, 1845 .hor_offs_align = 8,
1846 .min_vsize_align = 16,
1852 .out_buf_count = 4, 1847 .out_buf_count = 4,
1853 .pix_limit = &s5p_pix_limit[1], 1848 .pix_limit = &s5p_pix_limit[1],
1854}; 1849};
@@ -1861,6 +1856,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv210 = {
1861 .min_inp_pixsize = 16, 1856 .min_inp_pixsize = 16,
1862 .min_out_pixsize = 16, 1857 .min_out_pixsize = 16,
1863 .hor_offs_align = 8, 1858 .hor_offs_align = 8,
1859 .min_vsize_align = 16,
1864 .out_buf_count = 4, 1860 .out_buf_count = 4,
1865 .pix_limit = &s5p_pix_limit[1], 1861 .pix_limit = &s5p_pix_limit[1],
1866}; 1862};
@@ -1874,6 +1870,7 @@ static struct samsung_fimc_variant fimc1_variant_s5pv210 = {
1874 .min_inp_pixsize = 16, 1870 .min_inp_pixsize = 16,
1875 .min_out_pixsize = 16, 1871 .min_out_pixsize = 16,
1876 .hor_offs_align = 1, 1872 .hor_offs_align = 1,
1873 .min_vsize_align = 1,
1877 .out_buf_count = 4, 1874 .out_buf_count = 4,
1878 .pix_limit = &s5p_pix_limit[2], 1875 .pix_limit = &s5p_pix_limit[2],
1879}; 1876};
@@ -1884,6 +1881,7 @@ static struct samsung_fimc_variant fimc2_variant_s5pv210 = {
1884 .min_inp_pixsize = 16, 1881 .min_inp_pixsize = 16,
1885 .min_out_pixsize = 16, 1882 .min_out_pixsize = 16,
1886 .hor_offs_align = 8, 1883 .hor_offs_align = 8,
1884 .min_vsize_align = 16,
1887 .out_buf_count = 4, 1885 .out_buf_count = 4,
1888 .pix_limit = &s5p_pix_limit[2], 1886 .pix_limit = &s5p_pix_limit[2],
1889}; 1887};
@@ -1898,6 +1896,7 @@ static struct samsung_fimc_variant fimc0_variant_exynos4 = {
1898 .min_inp_pixsize = 16, 1896 .min_inp_pixsize = 16,
1899 .min_out_pixsize = 16, 1897 .min_out_pixsize = 16,
1900 .hor_offs_align = 2, 1898 .hor_offs_align = 2,
1899 .min_vsize_align = 1,
1901 .out_buf_count = 32, 1900 .out_buf_count = 32,
1902 .pix_limit = &s5p_pix_limit[1], 1901 .pix_limit = &s5p_pix_limit[1],
1903}; 1902};
@@ -1910,6 +1909,7 @@ static struct samsung_fimc_variant fimc3_variant_exynos4 = {
1910 .min_inp_pixsize = 16, 1909 .min_inp_pixsize = 16,
1911 .min_out_pixsize = 16, 1910 .min_out_pixsize = 16,
1912 .hor_offs_align = 2, 1911 .hor_offs_align = 2,
1912 .min_vsize_align = 1,
1913 .out_buf_count = 32, 1913 .out_buf_count = 32,
1914 .pix_limit = &s5p_pix_limit[3], 1914 .pix_limit = &s5p_pix_limit[3],
1915}; 1915};
diff --git a/drivers/media/video/s5p-fimc/fimc-core.h b/drivers/media/video/s5p-fimc/fimc-core.h
index a6936dad5b10..c7f01c47b20f 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.h
+++ b/drivers/media/video/s5p-fimc/fimc-core.h
@@ -377,6 +377,7 @@ struct fimc_pix_limit {
377 * @min_inp_pixsize: minimum input pixel size 377 * @min_inp_pixsize: minimum input pixel size
378 * @min_out_pixsize: minimum output pixel size 378 * @min_out_pixsize: minimum output pixel size
379 * @hor_offs_align: horizontal pixel offset aligment 379 * @hor_offs_align: horizontal pixel offset aligment
380 * @min_vsize_align: minimum vertical pixel size alignment
380 * @out_buf_count: the number of buffers in output DMA sequence 381 * @out_buf_count: the number of buffers in output DMA sequence
381 */ 382 */
382struct samsung_fimc_variant { 383struct samsung_fimc_variant {
@@ -390,6 +391,7 @@ struct samsung_fimc_variant {
390 u16 min_inp_pixsize; 391 u16 min_inp_pixsize;
391 u16 min_out_pixsize; 392 u16 min_out_pixsize;
392 u16 hor_offs_align; 393 u16 hor_offs_align;
394 u16 min_vsize_align;
393 u16 out_buf_count; 395 u16 out_buf_count;
394}; 396};
395 397
diff --git a/drivers/media/video/s5p-fimc/fimc-mdevice.c b/drivers/media/video/s5p-fimc/fimc-mdevice.c
index cc337b1de913..615c862f0360 100644
--- a/drivers/media/video/s5p-fimc/fimc-mdevice.c
+++ b/drivers/media/video/s5p-fimc/fimc-mdevice.c
@@ -220,6 +220,7 @@ static struct v4l2_subdev *fimc_md_register_sensor(struct fimc_md *fmd,
220 sd = v4l2_i2c_new_subdev_board(&fmd->v4l2_dev, adapter, 220 sd = v4l2_i2c_new_subdev_board(&fmd->v4l2_dev, adapter,
221 s_info->pdata->board_info, NULL); 221 s_info->pdata->board_info, NULL);
222 if (IS_ERR_OR_NULL(sd)) { 222 if (IS_ERR_OR_NULL(sd)) {
223 i2c_put_adapter(adapter);
223 v4l2_err(&fmd->v4l2_dev, "Failed to acquire subdev\n"); 224 v4l2_err(&fmd->v4l2_dev, "Failed to acquire subdev\n");
224 return NULL; 225 return NULL;
225 } 226 }
@@ -234,12 +235,15 @@ static struct v4l2_subdev *fimc_md_register_sensor(struct fimc_md *fmd,
234static void fimc_md_unregister_sensor(struct v4l2_subdev *sd) 235static void fimc_md_unregister_sensor(struct v4l2_subdev *sd)
235{ 236{
236 struct i2c_client *client = v4l2_get_subdevdata(sd); 237 struct i2c_client *client = v4l2_get_subdevdata(sd);
238 struct i2c_adapter *adapter;
237 239
238 if (!client) 240 if (!client)
239 return; 241 return;
240 v4l2_device_unregister_subdev(sd); 242 v4l2_device_unregister_subdev(sd);
243 adapter = client->adapter;
241 i2c_unregister_device(client); 244 i2c_unregister_device(client);
242 i2c_put_adapter(client->adapter); 245 if (adapter)
246 i2c_put_adapter(adapter);
243} 247}
244 248
245static int fimc_md_register_sensor_entities(struct fimc_md *fmd) 249static int fimc_md_register_sensor_entities(struct fimc_md *fmd)
@@ -381,20 +385,28 @@ static void fimc_md_unregister_entities(struct fimc_md *fmd)
381 385
382static int fimc_md_register_video_nodes(struct fimc_md *fmd) 386static int fimc_md_register_video_nodes(struct fimc_md *fmd)
383{ 387{
388 struct video_device *vdev;
384 int i, ret = 0; 389 int i, ret = 0;
385 390
386 for (i = 0; i < FIMC_MAX_DEVS && !ret; i++) { 391 for (i = 0; i < FIMC_MAX_DEVS && !ret; i++) {
387 if (!fmd->fimc[i]) 392 if (!fmd->fimc[i])
388 continue; 393 continue;
389 394
390 if (fmd->fimc[i]->m2m.vfd) 395 vdev = fmd->fimc[i]->m2m.vfd;
391 ret = video_register_device(fmd->fimc[i]->m2m.vfd, 396 if (vdev) {
392 VFL_TYPE_GRABBER, -1); 397 ret = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
393 if (ret) 398 if (ret)
394 break; 399 break;
395 if (fmd->fimc[i]->vid_cap.vfd) 400 v4l2_info(&fmd->v4l2_dev, "Registered %s as /dev/%s\n",
396 ret = video_register_device(fmd->fimc[i]->vid_cap.vfd, 401 vdev->name, video_device_node_name(vdev));
397 VFL_TYPE_GRABBER, -1); 402 }
403
404 vdev = fmd->fimc[i]->vid_cap.vfd;
405 if (vdev == NULL)
406 continue;
407 ret = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
408 v4l2_info(&fmd->v4l2_dev, "Registered %s as /dev/%s\n",
409 vdev->name, video_device_node_name(vdev));
398 } 410 }
399 411
400 return ret; 412 return ret;
@@ -502,7 +514,7 @@ static int fimc_md_create_links(struct fimc_md *fmd)
502 if (WARN(csis == NULL, 514 if (WARN(csis == NULL,
503 "MIPI-CSI interface specified " 515 "MIPI-CSI interface specified "
504 "but s5p-csis module is not loaded!\n")) 516 "but s5p-csis module is not loaded!\n"))
505 continue; 517 return -EINVAL;
506 518
507 ret = media_entity_create_link(&sensor->entity, 0, 519 ret = media_entity_create_link(&sensor->entity, 0,
508 &csis->entity, CSIS_PAD_SINK, 520 &csis->entity, CSIS_PAD_SINK,
@@ -742,9 +754,6 @@ static int __devinit fimc_md_probe(struct platform_device *pdev)
742 struct fimc_md *fmd; 754 struct fimc_md *fmd;
743 int ret; 755 int ret;
744 756
745 if (WARN(!pdev->dev.platform_data, "Platform data not specified!\n"))
746 return -EINVAL;
747
748 fmd = kzalloc(sizeof(struct fimc_md), GFP_KERNEL); 757 fmd = kzalloc(sizeof(struct fimc_md), GFP_KERNEL);
749 if (!fmd) 758 if (!fmd)
750 return -ENOMEM; 759 return -ENOMEM;
@@ -782,9 +791,11 @@ static int __devinit fimc_md_probe(struct platform_device *pdev)
782 if (ret) 791 if (ret)
783 goto err3; 792 goto err3;
784 793
785 ret = fimc_md_register_sensor_entities(fmd); 794 if (pdev->dev.platform_data) {
786 if (ret) 795 ret = fimc_md_register_sensor_entities(fmd);
787 goto err3; 796 if (ret)
797 goto err3;
798 }
788 ret = fimc_md_create_links(fmd); 799 ret = fimc_md_create_links(fmd);
789 if (ret) 800 if (ret)
790 goto err3; 801 goto err3;
diff --git a/drivers/media/video/s5p-fimc/fimc-reg.c b/drivers/media/video/s5p-fimc/fimc-reg.c
index 20e664e34163..44f5c2d1920b 100644
--- a/drivers/media/video/s5p-fimc/fimc-reg.c
+++ b/drivers/media/video/s5p-fimc/fimc-reg.c
@@ -35,6 +35,9 @@ void fimc_hw_reset(struct fimc_dev *dev)
35 cfg = readl(dev->regs + S5P_CIGCTRL); 35 cfg = readl(dev->regs + S5P_CIGCTRL);
36 cfg &= ~S5P_CIGCTRL_SWRST; 36 cfg &= ~S5P_CIGCTRL_SWRST;
37 writel(cfg, dev->regs + S5P_CIGCTRL); 37 writel(cfg, dev->regs + S5P_CIGCTRL);
38
39 if (dev->variant->out_buf_count > 4)
40 fimc_hw_set_dma_seq(dev, 0xF);
38} 41}
39 42
40static u32 fimc_hw_get_in_flip(struct fimc_ctx *ctx) 43static u32 fimc_hw_get_in_flip(struct fimc_ctx *ctx)
@@ -251,7 +254,14 @@ static void fimc_hw_set_scaler(struct fimc_ctx *ctx)
251 struct fimc_scaler *sc = &ctx->scaler; 254 struct fimc_scaler *sc = &ctx->scaler;
252 struct fimc_frame *src_frame = &ctx->s_frame; 255 struct fimc_frame *src_frame = &ctx->s_frame;
253 struct fimc_frame *dst_frame = &ctx->d_frame; 256 struct fimc_frame *dst_frame = &ctx->d_frame;
254 u32 cfg = 0; 257
258 u32 cfg = readl(dev->regs + S5P_CISCCTRL);
259
260 cfg &= ~(S5P_CISCCTRL_CSCR2Y_WIDE | S5P_CISCCTRL_CSCY2R_WIDE |
261 S5P_CISCCTRL_SCALEUP_H | S5P_CISCCTRL_SCALEUP_V |
262 S5P_CISCCTRL_SCALERBYPASS | S5P_CISCCTRL_ONE2ONE |
263 S5P_CISCCTRL_INRGB_FMT_MASK | S5P_CISCCTRL_OUTRGB_FMT_MASK |
264 S5P_CISCCTRL_INTERLACE | S5P_CISCCTRL_RGB_EXT);
255 265
256 if (!(ctx->flags & FIMC_COLOR_RANGE_NARROW)) 266 if (!(ctx->flags & FIMC_COLOR_RANGE_NARROW))
257 cfg |= (S5P_CISCCTRL_CSCR2Y_WIDE | S5P_CISCCTRL_CSCY2R_WIDE); 267 cfg |= (S5P_CISCCTRL_CSCR2Y_WIDE | S5P_CISCCTRL_CSCY2R_WIDE);
@@ -308,9 +318,9 @@ void fimc_hw_set_mainscaler(struct fimc_ctx *ctx)
308 fimc_hw_set_scaler(ctx); 318 fimc_hw_set_scaler(ctx);
309 319
310 cfg = readl(dev->regs + S5P_CISCCTRL); 320 cfg = readl(dev->regs + S5P_CISCCTRL);
321 cfg &= ~(S5P_CISCCTRL_MHRATIO_MASK | S5P_CISCCTRL_MVRATIO_MASK);
311 322
312 if (variant->has_mainscaler_ext) { 323 if (variant->has_mainscaler_ext) {
313 cfg &= ~(S5P_CISCCTRL_MHRATIO_MASK | S5P_CISCCTRL_MVRATIO_MASK);
314 cfg |= S5P_CISCCTRL_MHRATIO_EXT(sc->main_hratio); 324 cfg |= S5P_CISCCTRL_MHRATIO_EXT(sc->main_hratio);
315 cfg |= S5P_CISCCTRL_MVRATIO_EXT(sc->main_vratio); 325 cfg |= S5P_CISCCTRL_MVRATIO_EXT(sc->main_vratio);
316 writel(cfg, dev->regs + S5P_CISCCTRL); 326 writel(cfg, dev->regs + S5P_CISCCTRL);
@@ -323,7 +333,6 @@ void fimc_hw_set_mainscaler(struct fimc_ctx *ctx)
323 cfg |= S5P_CIEXTEN_MVRATIO_EXT(sc->main_vratio); 333 cfg |= S5P_CIEXTEN_MVRATIO_EXT(sc->main_vratio);
324 writel(cfg, dev->regs + S5P_CIEXTEN); 334 writel(cfg, dev->regs + S5P_CIEXTEN);
325 } else { 335 } else {
326 cfg &= ~(S5P_CISCCTRL_MHRATIO_MASK | S5P_CISCCTRL_MVRATIO_MASK);
327 cfg |= S5P_CISCCTRL_MHRATIO(sc->main_hratio); 336 cfg |= S5P_CISCCTRL_MHRATIO(sc->main_hratio);
328 cfg |= S5P_CISCCTRL_MVRATIO(sc->main_vratio); 337 cfg |= S5P_CISCCTRL_MVRATIO(sc->main_vratio);
329 writel(cfg, dev->regs + S5P_CISCCTRL); 338 writel(cfg, dev->regs + S5P_CISCCTRL);
diff --git a/drivers/media/video/s5p-mfc/s5p_mfc_enc.c b/drivers/media/video/s5p-mfc/s5p_mfc_enc.c
index 1e8cdb77d4b8..dff9dc798795 100644
--- a/drivers/media/video/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/video/s5p-mfc/s5p_mfc_enc.c
@@ -61,7 +61,7 @@ static struct s5p_mfc_fmt formats[] = {
61 .num_planes = 1, 61 .num_planes = 1,
62 }, 62 },
63 { 63 {
64 .name = "H264 Encoded Stream", 64 .name = "H263 Encoded Stream",
65 .fourcc = V4L2_PIX_FMT_H263, 65 .fourcc = V4L2_PIX_FMT_H263,
66 .codec_mode = S5P_FIMV_CODEC_H263_ENC, 66 .codec_mode = S5P_FIMV_CODEC_H263_ENC,
67 .type = MFC_FMT_ENC, 67 .type = MFC_FMT_ENC,
diff --git a/drivers/media/video/s5p-tv/mixer_video.c b/drivers/media/video/s5p-tv/mixer_video.c
index e16d3a4bc1dc..b47d0c06ecf5 100644
--- a/drivers/media/video/s5p-tv/mixer_video.c
+++ b/drivers/media/video/s5p-tv/mixer_video.c
@@ -16,6 +16,7 @@
16#include <media/v4l2-ioctl.h> 16#include <media/v4l2-ioctl.h>
17#include <linux/videodev2.h> 17#include <linux/videodev2.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/module.h>
19#include <linux/version.h> 20#include <linux/version.h>
20#include <linux/timer.h> 21#include <linux/timer.h>
21#include <media/videobuf2-dma-contig.h> 22#include <media/videobuf2-dma-contig.h>
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index f390682629cf..c51decfcae19 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -566,8 +566,10 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
566 ret = sh_mobile_ceu_soft_reset(pcdev); 566 ret = sh_mobile_ceu_soft_reset(pcdev);
567 567
568 csi2_sd = find_csi2(pcdev); 568 csi2_sd = find_csi2(pcdev);
569 if (csi2_sd) 569 if (csi2_sd) {
570 csi2_sd->grp_id = (long)icd; 570 csi2_sd->grp_id = soc_camera_grp_id(icd);
571 v4l2_set_subdev_hostdata(csi2_sd, icd);
572 }
571 573
572 ret = v4l2_subdev_call(csi2_sd, core, s_power, 1); 574 ret = v4l2_subdev_call(csi2_sd, core, s_power, 1);
573 if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV) { 575 if (ret < 0 && ret != -ENOIOCTLCMD && ret != -ENODEV) {
@@ -768,7 +770,7 @@ static struct v4l2_subdev *find_bus_subdev(struct sh_mobile_ceu_dev *pcdev,
768{ 770{
769 if (pcdev->csi2_pdev) { 771 if (pcdev->csi2_pdev) {
770 struct v4l2_subdev *csi2_sd = find_csi2(pcdev); 772 struct v4l2_subdev *csi2_sd = find_csi2(pcdev);
771 if (csi2_sd && csi2_sd->grp_id == (u32)icd) 773 if (csi2_sd && csi2_sd->grp_id == soc_camera_grp_id(icd))
772 return csi2_sd; 774 return csi2_sd;
773 } 775 }
774 776
@@ -1089,8 +1091,9 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int
1089 /* Try 2560x1920, 1280x960, 640x480, 320x240 */ 1091 /* Try 2560x1920, 1280x960, 640x480, 320x240 */
1090 mf.width = 2560 >> shift; 1092 mf.width = 2560 >> shift;
1091 mf.height = 1920 >> shift; 1093 mf.height = 1920 >> shift;
1092 ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video, 1094 ret = v4l2_device_call_until_err(sd->v4l2_dev,
1093 s_mbus_fmt, &mf); 1095 soc_camera_grp_id(icd), video,
1096 s_mbus_fmt, &mf);
1094 if (ret < 0) 1097 if (ret < 0)
1095 return ret; 1098 return ret;
1096 shift++; 1099 shift++;
@@ -1389,7 +1392,8 @@ static int client_s_fmt(struct soc_camera_device *icd,
1389 bool ceu_1to1; 1392 bool ceu_1to1;
1390 int ret; 1393 int ret;
1391 1394
1392 ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video, 1395 ret = v4l2_device_call_until_err(sd->v4l2_dev,
1396 soc_camera_grp_id(icd), video,
1393 s_mbus_fmt, mf); 1397 s_mbus_fmt, mf);
1394 if (ret < 0) 1398 if (ret < 0)
1395 return ret; 1399 return ret;
@@ -1426,8 +1430,9 @@ static int client_s_fmt(struct soc_camera_device *icd,
1426 tmp_h = min(2 * tmp_h, max_height); 1430 tmp_h = min(2 * tmp_h, max_height);
1427 mf->width = tmp_w; 1431 mf->width = tmp_w;
1428 mf->height = tmp_h; 1432 mf->height = tmp_h;
1429 ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video, 1433 ret = v4l2_device_call_until_err(sd->v4l2_dev,
1430 s_mbus_fmt, mf); 1434 soc_camera_grp_id(icd), video,
1435 s_mbus_fmt, mf);
1431 dev_geo(dev, "Camera scaled to %ux%u\n", 1436 dev_geo(dev, "Camera scaled to %ux%u\n",
1432 mf->width, mf->height); 1437 mf->width, mf->height);
1433 if (ret < 0) { 1438 if (ret < 0) {
@@ -1580,8 +1585,9 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
1580 } 1585 }
1581 1586
1582 if (interm_width < icd->user_width || interm_height < icd->user_height) { 1587 if (interm_width < icd->user_width || interm_height < icd->user_height) {
1583 ret = v4l2_device_call_until_err(sd->v4l2_dev, (int)icd, video, 1588 ret = v4l2_device_call_until_err(sd->v4l2_dev,
1584 s_mbus_fmt, &mf); 1589 soc_camera_grp_id(icd), video,
1590 s_mbus_fmt, &mf);
1585 if (ret < 0) 1591 if (ret < 0)
1586 return ret; 1592 return ret;
1587 1593
@@ -1867,7 +1873,8 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
1867 mf.code = xlate->code; 1873 mf.code = xlate->code;
1868 mf.colorspace = pix->colorspace; 1874 mf.colorspace = pix->colorspace;
1869 1875
1870 ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video, try_mbus_fmt, &mf); 1876 ret = v4l2_device_call_until_err(sd->v4l2_dev, soc_camera_grp_id(icd),
1877 video, try_mbus_fmt, &mf);
1871 if (ret < 0) 1878 if (ret < 0)
1872 return ret; 1879 return ret;
1873 1880
@@ -1891,8 +1898,9 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
1891 */ 1898 */
1892 mf.width = 2560; 1899 mf.width = 2560;
1893 mf.height = 1920; 1900 mf.height = 1920;
1894 ret = v4l2_device_call_until_err(sd->v4l2_dev, (long)icd, video, 1901 ret = v4l2_device_call_until_err(sd->v4l2_dev,
1895 try_mbus_fmt, &mf); 1902 soc_camera_grp_id(icd), video,
1903 try_mbus_fmt, &mf);
1896 if (ret < 0) { 1904 if (ret < 0) {
1897 /* Shouldn't actually happen... */ 1905 /* Shouldn't actually happen... */
1898 dev_err(icd->parent, 1906 dev_err(icd->parent,
diff --git a/drivers/media/video/sh_mobile_csi2.c b/drivers/media/video/sh_mobile_csi2.c
index ea4f0473ed3b..8a652b53ff7e 100644
--- a/drivers/media/video/sh_mobile_csi2.c
+++ b/drivers/media/video/sh_mobile_csi2.c
@@ -143,7 +143,7 @@ static int sh_csi2_s_mbus_config(struct v4l2_subdev *sd,
143 const struct v4l2_mbus_config *cfg) 143 const struct v4l2_mbus_config *cfg)
144{ 144{
145 struct sh_csi2 *priv = container_of(sd, struct sh_csi2, subdev); 145 struct sh_csi2 *priv = container_of(sd, struct sh_csi2, subdev);
146 struct soc_camera_device *icd = (struct soc_camera_device *)sd->grp_id; 146 struct soc_camera_device *icd = v4l2_get_subdev_hostdata(sd);
147 struct v4l2_subdev *client_sd = soc_camera_to_subdev(icd); 147 struct v4l2_subdev *client_sd = soc_camera_to_subdev(icd);
148 struct v4l2_mbus_config client_cfg = {.type = V4L2_MBUS_CSI2, 148 struct v4l2_mbus_config client_cfg = {.type = V4L2_MBUS_CSI2,
149 .flags = priv->mipi_flags}; 149 .flags = priv->mipi_flags};
@@ -202,7 +202,7 @@ static void sh_csi2_hwinit(struct sh_csi2 *priv)
202static int sh_csi2_client_connect(struct sh_csi2 *priv) 202static int sh_csi2_client_connect(struct sh_csi2 *priv)
203{ 203{
204 struct sh_csi2_pdata *pdata = priv->pdev->dev.platform_data; 204 struct sh_csi2_pdata *pdata = priv->pdev->dev.platform_data;
205 struct soc_camera_device *icd = (struct soc_camera_device *)priv->subdev.grp_id; 205 struct soc_camera_device *icd = v4l2_get_subdev_hostdata(&priv->subdev);
206 struct v4l2_subdev *client_sd = soc_camera_to_subdev(icd); 206 struct v4l2_subdev *client_sd = soc_camera_to_subdev(icd);
207 struct device *dev = v4l2_get_subdevdata(&priv->subdev); 207 struct device *dev = v4l2_get_subdevdata(&priv->subdev);
208 struct v4l2_mbus_config cfg; 208 struct v4l2_mbus_config cfg;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index b72580c38957..62e4312515cb 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -1103,7 +1103,8 @@ static int soc_camera_probe(struct soc_camera_device *icd)
1103 } 1103 }
1104 1104
1105 sd = soc_camera_to_subdev(icd); 1105 sd = soc_camera_to_subdev(icd);
1106 sd->grp_id = (long)icd; 1106 sd->grp_id = soc_camera_grp_id(icd);
1107 v4l2_set_subdev_hostdata(sd, icd);
1107 1108
1108 if (v4l2_ctrl_add_handler(&icd->ctrl_handler, sd->ctrl_handler)) 1109 if (v4l2_ctrl_add_handler(&icd->ctrl_handler, sd->ctrl_handler))
1109 goto ectrl; 1110 goto ectrl;
diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c
index 43c0ebb81956..b7b2d3483fd4 100644
--- a/drivers/mfd/ab5500-debugfs.c
+++ b/drivers/mfd/ab5500-debugfs.c
@@ -4,7 +4,7 @@
4 * Debugfs support for the AB5500 MFD driver 4 * Debugfs support for the AB5500 MFD driver
5 */ 5 */
6 6
7#include <linux/export.h> 7#include <linux/module.h>
8#include <linux/debugfs.h> 8#include <linux/debugfs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/mfd/ab5500/ab5500.h> 10#include <linux/mfd/ab5500/ab5500.h>
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 1e9173804ede..d3d572b2317b 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -620,6 +620,7 @@ static struct resource __devinitdata ab8500_fg_resources[] = {
620 620
621static struct resource __devinitdata ab8500_chargalg_resources[] = {}; 621static struct resource __devinitdata ab8500_chargalg_resources[] = {};
622 622
623#ifdef CONFIG_DEBUG_FS
623static struct resource __devinitdata ab8500_debug_resources[] = { 624static struct resource __devinitdata ab8500_debug_resources[] = {
624 { 625 {
625 .name = "IRQ_FIRST", 626 .name = "IRQ_FIRST",
@@ -634,6 +635,7 @@ static struct resource __devinitdata ab8500_debug_resources[] = {
634 .flags = IORESOURCE_IRQ, 635 .flags = IORESOURCE_IRQ,
635 }, 636 },
636}; 637};
638#endif
637 639
638static struct resource __devinitdata ab8500_usb_resources[] = { 640static struct resource __devinitdata ab8500_usb_resources[] = {
639 { 641 {
diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c
index f1d88483112c..8d816cce8322 100644
--- a/drivers/mfd/adp5520.c
+++ b/drivers/mfd/adp5520.c
@@ -109,7 +109,7 @@ int adp5520_set_bits(struct device *dev, int reg, uint8_t bit_mask)
109 109
110 ret = __adp5520_read(chip->client, reg, &reg_val); 110 ret = __adp5520_read(chip->client, reg, &reg_val);
111 111
112 if (!ret && ((reg_val & bit_mask) == 0)) { 112 if (!ret && ((reg_val & bit_mask) != bit_mask)) {
113 reg_val |= bit_mask; 113 reg_val |= bit_mask;
114 ret = __adp5520_write(chip->client, reg, reg_val); 114 ret = __adp5520_write(chip->client, reg, reg_val);
115 } 115 }
diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index 1b79c37fd599..1924b857a0fb 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c
@@ -182,7 +182,7 @@ int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
182 if (ret) 182 if (ret)
183 goto out; 183 goto out;
184 184
185 if ((reg_val & bit_mask) == 0) { 185 if ((reg_val & bit_mask) != bit_mask) {
186 reg_val |= bit_mask; 186 reg_val |= bit_mask;
187 ret = __da903x_write(chip->client, reg, reg_val); 187 ret = __da903x_write(chip->client, reg, reg_val);
188 } 188 }
@@ -549,6 +549,7 @@ static int __devexit da903x_remove(struct i2c_client *client)
549 struct da903x_chip *chip = i2c_get_clientdata(client); 549 struct da903x_chip *chip = i2c_get_clientdata(client);
550 550
551 da903x_remove_subdevs(chip); 551 da903x_remove_subdevs(chip);
552 free_irq(client->irq, chip);
552 kfree(chip); 553 kfree(chip);
553 return 0; 554 return 0;
554} 555}
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index 1e9ee533eacb..ef39528088f2 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -16,6 +16,7 @@
16 */ 16 */
17 17
18#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/io.h>
19#include <linux/irq.h> 20#include <linux/irq.h>
20#include <linux/interrupt.h> 21#include <linux/interrupt.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index bba26d96c240..a5ddf31b60ca 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -197,7 +197,7 @@ int tps6586x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
197 if (ret) 197 if (ret)
198 goto out; 198 goto out;
199 199
200 if ((reg_val & bit_mask) == 0) { 200 if ((reg_val & bit_mask) != bit_mask) {
201 reg_val |= bit_mask; 201 reg_val |= bit_mask;
202 ret = __tps6586x_write(to_i2c_client(dev), reg, reg_val); 202 ret = __tps6586x_write(to_i2c_client(dev), reg, reg_val);
203 } 203 }
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 6f5b8cf2f652..c1da84bc1573 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -120,7 +120,7 @@ int tps65910_clear_bits(struct tps65910 *tps65910, u8 reg, u8 mask)
120 goto out; 120 goto out;
121 } 121 }
122 122
123 data &= mask; 123 data &= ~mask;
124 err = tps65910_i2c_write(tps65910, reg, 1, &data); 124 err = tps65910_i2c_write(tps65910, reg, 1, &data);
125 if (err) 125 if (err)
126 dev_err(tps65910->dev, "write to reg %x failed\n", reg); 126 dev_err(tps65910->dev, "write to reg %x failed\n", reg);
diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index bfbd66021afd..61e70cfaa774 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -363,13 +363,13 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
363 pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no); 363 pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
364 return -EPERM; 364 return -EPERM;
365 } 365 }
366 sid = twl_map[mod_no].sid;
367 twl = &twl_modules[sid];
368
369 if (unlikely(!inuse)) { 366 if (unlikely(!inuse)) {
370 pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid); 367 pr_err("%s: not initialized\n", DRIVER_NAME);
371 return -EPERM; 368 return -EPERM;
372 } 369 }
370 sid = twl_map[mod_no].sid;
371 twl = &twl_modules[sid];
372
373 mutex_lock(&twl->xfer_lock); 373 mutex_lock(&twl->xfer_lock);
374 /* 374 /*
375 * [MSG1]: fill the register address data 375 * [MSG1]: fill the register address data
@@ -420,13 +420,13 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
420 pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no); 420 pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
421 return -EPERM; 421 return -EPERM;
422 } 422 }
423 sid = twl_map[mod_no].sid;
424 twl = &twl_modules[sid];
425
426 if (unlikely(!inuse)) { 423 if (unlikely(!inuse)) {
427 pr_err("%s: client %d is not initialized\n", DRIVER_NAME, sid); 424 pr_err("%s: not initialized\n", DRIVER_NAME);
428 return -EPERM; 425 return -EPERM;
429 } 426 }
427 sid = twl_map[mod_no].sid;
428 twl = &twl_modules[sid];
429
430 mutex_lock(&twl->xfer_lock); 430 mutex_lock(&twl->xfer_lock);
431 /* [MSG1] fill the register address data */ 431 /* [MSG1] fill the register address data */
432 msg = &twl->xfer_msg[0]; 432 msg = &twl->xfer_msg[0];
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index f062c8cc6c38..29f11e0765fe 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -432,6 +432,7 @@ struct sih_agent {
432 u32 edge_change; 432 u32 edge_change;
433 433
434 struct mutex irq_lock; 434 struct mutex irq_lock;
435 char *irq_name;
435}; 436};
436 437
437/*----------------------------------------------------------------------*/ 438/*----------------------------------------------------------------------*/
@@ -589,7 +590,7 @@ static inline int sih_read_isr(const struct sih *sih)
589 * Generic handler for SIH interrupts ... we "know" this is called 590 * Generic handler for SIH interrupts ... we "know" this is called
590 * in task context, with IRQs enabled. 591 * in task context, with IRQs enabled.
591 */ 592 */
592static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc) 593static irqreturn_t handle_twl4030_sih(int irq, void *data)
593{ 594{
594 struct sih_agent *agent = irq_get_handler_data(irq); 595 struct sih_agent *agent = irq_get_handler_data(irq);
595 const struct sih *sih = agent->sih; 596 const struct sih *sih = agent->sih;
@@ -602,7 +603,7 @@ static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
602 pr_err("twl4030: %s SIH, read ISR error %d\n", 603 pr_err("twl4030: %s SIH, read ISR error %d\n",
603 sih->name, isr); 604 sih->name, isr);
604 /* REVISIT: recover; eventually mask it all, etc */ 605 /* REVISIT: recover; eventually mask it all, etc */
605 return; 606 return IRQ_HANDLED;
606 } 607 }
607 608
608 while (isr) { 609 while (isr) {
@@ -616,6 +617,7 @@ static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
616 pr_err("twl4030: %s SIH, invalid ISR bit %d\n", 617 pr_err("twl4030: %s SIH, invalid ISR bit %d\n",
617 sih->name, irq); 618 sih->name, irq);
618 } 619 }
620 return IRQ_HANDLED;
619} 621}
620 622
621static unsigned twl4030_irq_next; 623static unsigned twl4030_irq_next;
@@ -668,18 +670,19 @@ int twl4030_sih_setup(int module)
668 activate_irq(irq); 670 activate_irq(irq);
669 } 671 }
670 672
671 status = irq_base;
672 twl4030_irq_next += i; 673 twl4030_irq_next += i;
673 674
674 /* replace generic PIH handler (handle_simple_irq) */ 675 /* replace generic PIH handler (handle_simple_irq) */
675 irq = sih_mod + twl4030_irq_base; 676 irq = sih_mod + twl4030_irq_base;
676 irq_set_handler_data(irq, agent); 677 irq_set_handler_data(irq, agent);
677 irq_set_chained_handler(irq, handle_twl4030_sih); 678 agent->irq_name = kasprintf(GFP_KERNEL, "twl4030_%s", sih->name);
679 status = request_threaded_irq(irq, NULL, handle_twl4030_sih, 0,
680 agent->irq_name ?: sih->name, NULL);
678 681
679 pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name, 682 pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name,
680 irq, irq_base, twl4030_irq_next - 1); 683 irq, irq_base, twl4030_irq_next - 1);
681 684
682 return status; 685 return status < 0 ? status : irq_base;
683} 686}
684 687
685/* FIXME need a call to reverse twl4030_sih_setup() ... */ 688/* FIXME need a call to reverse twl4030_sih_setup() ... */
@@ -733,8 +736,9 @@ int twl4030_init_irq(int irq_num, unsigned irq_base, unsigned irq_end)
733 } 736 }
734 737
735 /* install an irq handler to demultiplex the TWL4030 interrupt */ 738 /* install an irq handler to demultiplex the TWL4030 interrupt */
736 status = request_threaded_irq(irq_num, NULL, handle_twl4030_pih, 0, 739 status = request_threaded_irq(irq_num, NULL, handle_twl4030_pih,
737 "TWL4030-PIH", NULL); 740 IRQF_ONESHOT,
741 "TWL4030-PIH", NULL);
738 if (status < 0) { 742 if (status < 0) {
739 pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status); 743 pr_err("twl4030: could not claim irq%d: %d\n", irq_num, status);
740 goto fail_rqirq; 744 goto fail_rqirq;
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 5d6ba132837e..61894fced8ea 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -239,6 +239,7 @@ static int wm8994_suspend(struct device *dev)
239 239
240 switch (wm8994->type) { 240 switch (wm8994->type) {
241 case WM8958: 241 case WM8958:
242 case WM1811:
242 ret = wm8994_reg_read(wm8994, WM8958_MIC_DETECT_1); 243 ret = wm8994_reg_read(wm8994, WM8958_MIC_DETECT_1);
243 if (ret < 0) { 244 if (ret < 0) {
244 dev_err(dev, "Failed to read power status: %d\n", ret); 245 dev_err(dev, "Failed to read power status: %d\n", ret);
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index e8a5eb38748b..d31c78b72b0f 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -302,17 +302,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
302 host->max_blk_size = 512; 302 host->max_blk_size = 512;
303 host->max_blk_count = PAGE_CACHE_SIZE / 512; 303 host->max_blk_count = PAGE_CACHE_SIZE / 512;
304 304
305 /*
306 * Enable runtime power management by default. This flag was added due
307 * to runtime power management causing disruption for some users, but
308 * the power on/off code has been improved since then.
309 *
310 * We'll enable this flag by default as an experiment, and if no
311 * problems are reported, we will follow up later and remove the flag
312 * altogether.
313 */
314 host->caps = MMC_CAP_POWER_OFF_CARD;
315
316 return host; 305 return host;
317 306
318free: 307free:
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 50b5f9926f64..0726e59fd418 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -675,7 +675,8 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
675 unsigned int status) 675 unsigned int status)
676{ 676{
677 /* First check for errors */ 677 /* First check for errors */
678 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_TXUNDERRUN|MCI_RXOVERRUN)) { 678 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
679 MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
679 u32 remain, success; 680 u32 remain, success;
680 681
681 /* Terminate the DMA transfer */ 682 /* Terminate the DMA transfer */
@@ -754,8 +755,12 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
754 } 755 }
755 756
756 if (!cmd->data || cmd->error) { 757 if (!cmd->data || cmd->error) {
757 if (host->data) 758 if (host->data) {
759 /* Terminate the DMA transfer */
760 if (dma_inprogress(host))
761 mmci_dma_data_error(host);
758 mmci_stop_data(host); 762 mmci_stop_data(host);
763 }
759 mmci_request_end(host, cmd->mrq); 764 mmci_request_end(host, cmd->mrq);
760 } else if (!(cmd->data->flags & MMC_DATA_READ)) { 765 } else if (!(cmd->data->flags & MMC_DATA_READ)) {
761 mmci_start_data(host, cmd->data); 766 mmci_start_data(host, cmd->data);
@@ -955,8 +960,9 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
955 dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status); 960 dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
956 961
957 data = host->data; 962 data = host->data;
958 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_TXUNDERRUN| 963 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
959 MCI_RXOVERRUN|MCI_DATAEND|MCI_DATABLOCKEND) && data) 964 MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND|
965 MCI_DATABLOCKEND) && data)
960 mmci_data_irq(host, data, status); 966 mmci_data_irq(host, data, status);
961 967
962 cmd = host->cmd; 968 cmd = host->cmd;
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index 87b6f079b6e0..b4257e700617 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -109,13 +109,10 @@ static struct platform_driver sdhci_cns3xxx_driver = {
109 .driver = { 109 .driver = {
110 .name = "sdhci-cns3xxx", 110 .name = "sdhci-cns3xxx",
111 .owner = THIS_MODULE, 111 .owner = THIS_MODULE,
112 .pm = SDHCI_PLTFM_PMOPS,
112 }, 113 },
113 .probe = sdhci_cns3xxx_probe, 114 .probe = sdhci_cns3xxx_probe,
114 .remove = __devexit_p(sdhci_cns3xxx_remove), 115 .remove = __devexit_p(sdhci_cns3xxx_remove),
115#ifdef CONFIG_PM
116 .suspend = sdhci_pltfm_suspend,
117 .resume = sdhci_pltfm_resume,
118#endif
119}; 116};
120 117
121static int __init sdhci_cns3xxx_init(void) 118static int __init sdhci_cns3xxx_init(void)
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index f2d29dca4420..a81312c91f70 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -82,13 +82,10 @@ static struct platform_driver sdhci_dove_driver = {
82 .driver = { 82 .driver = {
83 .name = "sdhci-dove", 83 .name = "sdhci-dove",
84 .owner = THIS_MODULE, 84 .owner = THIS_MODULE,
85 .pm = SDHCI_PLTFM_PMOPS,
85 }, 86 },
86 .probe = sdhci_dove_probe, 87 .probe = sdhci_dove_probe,
87 .remove = __devexit_p(sdhci_dove_remove), 88 .remove = __devexit_p(sdhci_dove_remove),
88#ifdef CONFIG_PM
89 .suspend = sdhci_pltfm_suspend,
90 .resume = sdhci_pltfm_resume,
91#endif
92}; 89};
93 90
94static int __init sdhci_dove_init(void) 91static int __init sdhci_dove_init(void)
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 4b976f00ea85..38ebc4ea259f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -599,14 +599,11 @@ static struct platform_driver sdhci_esdhc_imx_driver = {
599 .name = "sdhci-esdhc-imx", 599 .name = "sdhci-esdhc-imx",
600 .owner = THIS_MODULE, 600 .owner = THIS_MODULE,
601 .of_match_table = imx_esdhc_dt_ids, 601 .of_match_table = imx_esdhc_dt_ids,
602 .pm = SDHCI_PLTFM_PMOPS,
602 }, 603 },
603 .id_table = imx_esdhc_devtype, 604 .id_table = imx_esdhc_devtype,
604 .probe = sdhci_esdhc_imx_probe, 605 .probe = sdhci_esdhc_imx_probe,
605 .remove = __devexit_p(sdhci_esdhc_imx_remove), 606 .remove = __devexit_p(sdhci_esdhc_imx_remove),
606#ifdef CONFIG_PM
607 .suspend = sdhci_pltfm_suspend,
608 .resume = sdhci_pltfm_resume,
609#endif
610}; 607};
611 608
612static int __init sdhci_esdhc_imx_init(void) 609static int __init sdhci_esdhc_imx_init(void)
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 59e9d003e589..01e5f627e0f0 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -125,13 +125,10 @@ static struct platform_driver sdhci_esdhc_driver = {
125 .name = "sdhci-esdhc", 125 .name = "sdhci-esdhc",
126 .owner = THIS_MODULE, 126 .owner = THIS_MODULE,
127 .of_match_table = sdhci_esdhc_of_match, 127 .of_match_table = sdhci_esdhc_of_match,
128 .pm = SDHCI_PLTFM_PMOPS,
128 }, 129 },
129 .probe = sdhci_esdhc_probe, 130 .probe = sdhci_esdhc_probe,
130 .remove = __devexit_p(sdhci_esdhc_remove), 131 .remove = __devexit_p(sdhci_esdhc_remove),
131#ifdef CONFIG_PM
132 .suspend = sdhci_pltfm_suspend,
133 .resume = sdhci_pltfm_resume,
134#endif
135}; 132};
136 133
137static int __init sdhci_esdhc_init(void) 134static int __init sdhci_esdhc_init(void)
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 9b0d794a4f69..3619adc7d9fc 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -87,13 +87,10 @@ static struct platform_driver sdhci_hlwd_driver = {
87 .name = "sdhci-hlwd", 87 .name = "sdhci-hlwd",
88 .owner = THIS_MODULE, 88 .owner = THIS_MODULE,
89 .of_match_table = sdhci_hlwd_of_match, 89 .of_match_table = sdhci_hlwd_of_match,
90 .pm = SDHCI_PLTFM_PMOPS,
90 }, 91 },
91 .probe = sdhci_hlwd_probe, 92 .probe = sdhci_hlwd_probe,
92 .remove = __devexit_p(sdhci_hlwd_remove), 93 .remove = __devexit_p(sdhci_hlwd_remove),
93#ifdef CONFIG_PM
94 .suspend = sdhci_pltfm_suspend,
95 .resume = sdhci_pltfm_resume,
96#endif
97}; 94};
98 95
99static int __init sdhci_hlwd_init(void) 96static int __init sdhci_hlwd_init(void)
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index d833d9c2f7e3..6878a94626bc 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -54,8 +54,7 @@ struct sdhci_pci_fixes {
54 int (*probe_slot) (struct sdhci_pci_slot *); 54 int (*probe_slot) (struct sdhci_pci_slot *);
55 void (*remove_slot) (struct sdhci_pci_slot *, int); 55 void (*remove_slot) (struct sdhci_pci_slot *, int);
56 56
57 int (*suspend) (struct sdhci_pci_chip *, 57 int (*suspend) (struct sdhci_pci_chip *);
58 pm_message_t);
59 int (*resume) (struct sdhci_pci_chip *); 58 int (*resume) (struct sdhci_pci_chip *);
60}; 59};
61 60
@@ -549,7 +548,7 @@ static void jmicron_remove_slot(struct sdhci_pci_slot *slot, int dead)
549 jmicron_enable_mmc(slot->host, 0); 548 jmicron_enable_mmc(slot->host, 0);
550} 549}
551 550
552static int jmicron_suspend(struct sdhci_pci_chip *chip, pm_message_t state) 551static int jmicron_suspend(struct sdhci_pci_chip *chip)
553{ 552{
554 int i; 553 int i;
555 554
@@ -993,8 +992,9 @@ static struct sdhci_ops sdhci_pci_ops = {
993 992
994#ifdef CONFIG_PM 993#ifdef CONFIG_PM
995 994
996static int sdhci_pci_suspend(struct pci_dev *pdev, pm_message_t state) 995static int sdhci_pci_suspend(struct device *dev)
997{ 996{
997 struct pci_dev *pdev = to_pci_dev(dev);
998 struct sdhci_pci_chip *chip; 998 struct sdhci_pci_chip *chip;
999 struct sdhci_pci_slot *slot; 999 struct sdhci_pci_slot *slot;
1000 mmc_pm_flag_t slot_pm_flags; 1000 mmc_pm_flag_t slot_pm_flags;
@@ -1010,7 +1010,7 @@ static int sdhci_pci_suspend(struct pci_dev *pdev, pm_message_t state)
1010 if (!slot) 1010 if (!slot)
1011 continue; 1011 continue;
1012 1012
1013 ret = sdhci_suspend_host(slot->host, state); 1013 ret = sdhci_suspend_host(slot->host);
1014 1014
1015 if (ret) { 1015 if (ret) {
1016 for (i--; i >= 0; i--) 1016 for (i--; i >= 0; i--)
@@ -1026,7 +1026,7 @@ static int sdhci_pci_suspend(struct pci_dev *pdev, pm_message_t state)
1026 } 1026 }
1027 1027
1028 if (chip->fixes && chip->fixes->suspend) { 1028 if (chip->fixes && chip->fixes->suspend) {
1029 ret = chip->fixes->suspend(chip, state); 1029 ret = chip->fixes->suspend(chip);
1030 if (ret) { 1030 if (ret) {
1031 for (i = chip->num_slots - 1; i >= 0; i--) 1031 for (i = chip->num_slots - 1; i >= 0; i--)
1032 sdhci_resume_host(chip->slots[i]->host); 1032 sdhci_resume_host(chip->slots[i]->host);
@@ -1042,16 +1042,17 @@ static int sdhci_pci_suspend(struct pci_dev *pdev, pm_message_t state)
1042 } 1042 }
1043 pci_set_power_state(pdev, PCI_D3hot); 1043 pci_set_power_state(pdev, PCI_D3hot);
1044 } else { 1044 } else {
1045 pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); 1045 pci_enable_wake(pdev, PCI_D3hot, 0);
1046 pci_disable_device(pdev); 1046 pci_disable_device(pdev);
1047 pci_set_power_state(pdev, pci_choose_state(pdev, state)); 1047 pci_set_power_state(pdev, PCI_D3hot);
1048 } 1048 }
1049 1049
1050 return 0; 1050 return 0;
1051} 1051}
1052 1052
1053static int sdhci_pci_resume(struct pci_dev *pdev) 1053static int sdhci_pci_resume(struct device *dev)
1054{ 1054{
1055 struct pci_dev *pdev = to_pci_dev(dev);
1055 struct sdhci_pci_chip *chip; 1056 struct sdhci_pci_chip *chip;
1056 struct sdhci_pci_slot *slot; 1057 struct sdhci_pci_slot *slot;
1057 int i, ret; 1058 int i, ret;
@@ -1099,7 +1100,6 @@ static int sdhci_pci_runtime_suspend(struct device *dev)
1099 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); 1100 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
1100 struct sdhci_pci_chip *chip; 1101 struct sdhci_pci_chip *chip;
1101 struct sdhci_pci_slot *slot; 1102 struct sdhci_pci_slot *slot;
1102 pm_message_t state = { .event = PM_EVENT_SUSPEND };
1103 int i, ret; 1103 int i, ret;
1104 1104
1105 chip = pci_get_drvdata(pdev); 1105 chip = pci_get_drvdata(pdev);
@@ -1121,7 +1121,7 @@ static int sdhci_pci_runtime_suspend(struct device *dev)
1121 } 1121 }
1122 1122
1123 if (chip->fixes && chip->fixes->suspend) { 1123 if (chip->fixes && chip->fixes->suspend) {
1124 ret = chip->fixes->suspend(chip, state); 1124 ret = chip->fixes->suspend(chip);
1125 if (ret) { 1125 if (ret) {
1126 for (i = chip->num_slots - 1; i >= 0; i--) 1126 for (i = chip->num_slots - 1; i >= 0; i--)
1127 sdhci_runtime_resume_host(chip->slots[i]->host); 1127 sdhci_runtime_resume_host(chip->slots[i]->host);
@@ -1176,6 +1176,8 @@ static int sdhci_pci_runtime_idle(struct device *dev)
1176#endif 1176#endif
1177 1177
1178static const struct dev_pm_ops sdhci_pci_pm_ops = { 1178static const struct dev_pm_ops sdhci_pci_pm_ops = {
1179 .suspend = sdhci_pci_suspend,
1180 .resume = sdhci_pci_resume,
1179 .runtime_suspend = sdhci_pci_runtime_suspend, 1181 .runtime_suspend = sdhci_pci_runtime_suspend,
1180 .runtime_resume = sdhci_pci_runtime_resume, 1182 .runtime_resume = sdhci_pci_runtime_resume,
1181 .runtime_idle = sdhci_pci_runtime_idle, 1183 .runtime_idle = sdhci_pci_runtime_idle,
@@ -1428,8 +1430,6 @@ static struct pci_driver sdhci_driver = {
1428 .id_table = pci_ids, 1430 .id_table = pci_ids,
1429 .probe = sdhci_pci_probe, 1431 .probe = sdhci_pci_probe,
1430 .remove = __devexit_p(sdhci_pci_remove), 1432 .remove = __devexit_p(sdhci_pci_remove),
1431 .suspend = sdhci_pci_suspend,
1432 .resume = sdhci_pci_resume,
1433 .driver = { 1433 .driver = {
1434 .pm = &sdhci_pci_pm_ops 1434 .pm = &sdhci_pci_pm_ops
1435 }, 1435 },
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index a9e12ea05583..03970bcb3495 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -194,21 +194,25 @@ int sdhci_pltfm_unregister(struct platform_device *pdev)
194EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister); 194EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister);
195 195
196#ifdef CONFIG_PM 196#ifdef CONFIG_PM
197int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state) 197static int sdhci_pltfm_suspend(struct device *dev)
198{ 198{
199 struct sdhci_host *host = platform_get_drvdata(dev); 199 struct sdhci_host *host = dev_get_drvdata(dev);
200 200
201 return sdhci_suspend_host(host, state); 201 return sdhci_suspend_host(host);
202} 202}
203EXPORT_SYMBOL_GPL(sdhci_pltfm_suspend);
204 203
205int sdhci_pltfm_resume(struct platform_device *dev) 204static int sdhci_pltfm_resume(struct device *dev)
206{ 205{
207 struct sdhci_host *host = platform_get_drvdata(dev); 206 struct sdhci_host *host = dev_get_drvdata(dev);
208 207
209 return sdhci_resume_host(host); 208 return sdhci_resume_host(host);
210} 209}
211EXPORT_SYMBOL_GPL(sdhci_pltfm_resume); 210
211const struct dev_pm_ops sdhci_pltfm_pmops = {
212 .suspend = sdhci_pltfm_suspend,
213 .resume = sdhci_pltfm_resume,
214};
215EXPORT_SYMBOL_GPL(sdhci_pltfm_pmops);
212#endif /* CONFIG_PM */ 216#endif /* CONFIG_PM */
213 217
214static int __init sdhci_pltfm_drv_init(void) 218static int __init sdhci_pltfm_drv_init(void)
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 3a9fc3f40840..37e0e184a0bb 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -99,8 +99,10 @@ extern int sdhci_pltfm_register(struct platform_device *pdev,
99extern int sdhci_pltfm_unregister(struct platform_device *pdev); 99extern int sdhci_pltfm_unregister(struct platform_device *pdev);
100 100
101#ifdef CONFIG_PM 101#ifdef CONFIG_PM
102extern int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state); 102extern const struct dev_pm_ops sdhci_pltfm_pmops;
103extern int sdhci_pltfm_resume(struct platform_device *dev); 103#define SDHCI_PLTFM_PMOPS (&sdhci_pltfm_pmops)
104#else
105#define SDHCI_PLTFM_PMOPS NULL
104#endif 106#endif
105 107
106#endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */ 108#endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index d4bf6d30c7ba..7a039c3cb1f1 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -218,13 +218,10 @@ static struct platform_driver sdhci_pxav2_driver = {
218 .driver = { 218 .driver = {
219 .name = "sdhci-pxav2", 219 .name = "sdhci-pxav2",
220 .owner = THIS_MODULE, 220 .owner = THIS_MODULE,
221 .pm = SDHCI_PLTFM_PMOPS,
221 }, 222 },
222 .probe = sdhci_pxav2_probe, 223 .probe = sdhci_pxav2_probe,
223 .remove = __devexit_p(sdhci_pxav2_remove), 224 .remove = __devexit_p(sdhci_pxav2_remove),
224#ifdef CONFIG_PM
225 .suspend = sdhci_pltfm_suspend,
226 .resume = sdhci_pltfm_resume,
227#endif
228}; 225};
229static int __init sdhci_pxav2_init(void) 226static int __init sdhci_pxav2_init(void)
230{ 227{
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index cff4ad3e7a59..15673a7ee6a5 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -264,13 +264,10 @@ static struct platform_driver sdhci_pxav3_driver = {
264 .driver = { 264 .driver = {
265 .name = "sdhci-pxav3", 265 .name = "sdhci-pxav3",
266 .owner = THIS_MODULE, 266 .owner = THIS_MODULE,
267 .pm = SDHCI_PLTFM_PMOPS,
267 }, 268 },
268 .probe = sdhci_pxav3_probe, 269 .probe = sdhci_pxav3_probe,
269 .remove = __devexit_p(sdhci_pxav3_remove), 270 .remove = __devexit_p(sdhci_pxav3_remove),
270#ifdef CONFIG_PM
271 .suspend = sdhci_pltfm_suspend,
272 .resume = sdhci_pltfm_resume,
273#endif
274}; 271};
275static int __init sdhci_pxav3_init(void) 272static int __init sdhci_pxav3_init(void)
276{ 273{
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index cb60c4197e0a..0d33ff0d67fb 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -622,23 +622,29 @@ static int __devexit sdhci_s3c_remove(struct platform_device *pdev)
622 622
623#ifdef CONFIG_PM 623#ifdef CONFIG_PM
624 624
625static int sdhci_s3c_suspend(struct platform_device *dev, pm_message_t pm) 625static int sdhci_s3c_suspend(struct device *dev)
626{ 626{
627 struct sdhci_host *host = platform_get_drvdata(dev); 627 struct sdhci_host *host = dev_get_drvdata(dev);
628 628
629 return sdhci_suspend_host(host, pm); 629 return sdhci_suspend_host(host);
630} 630}
631 631
632static int sdhci_s3c_resume(struct platform_device *dev) 632static int sdhci_s3c_resume(struct device *dev)
633{ 633{
634 struct sdhci_host *host = platform_get_drvdata(dev); 634 struct sdhci_host *host = dev_get_drvdata(dev);
635 635
636 return sdhci_resume_host(host); 636 return sdhci_resume_host(host);
637} 637}
638 638
639static const struct dev_pm_ops sdhci_s3c_pmops = {
640 .suspend = sdhci_s3c_suspend,
641 .resume = sdhci_s3c_resume,
642};
643
644#define SDHCI_S3C_PMOPS (&sdhci_s3c_pmops)
645
639#else 646#else
640#define sdhci_s3c_suspend NULL 647#define SDHCI_S3C_PMOPS NULL
641#define sdhci_s3c_resume NULL
642#endif 648#endif
643 649
644static struct platform_driver sdhci_s3c_driver = { 650static struct platform_driver sdhci_s3c_driver = {
@@ -647,6 +653,7 @@ static struct platform_driver sdhci_s3c_driver = {
647 .driver = { 653 .driver = {
648 .owner = THIS_MODULE, 654 .owner = THIS_MODULE,
649 .name = "s3c-sdhci", 655 .name = "s3c-sdhci",
656 .pm = SDHCI_S3C_PMOPS,
650 }, 657 },
651}; 658};
652 659
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 89699e861fc1..e2e18d3f949c 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -318,13 +318,10 @@ static struct platform_driver sdhci_tegra_driver = {
318 .name = "sdhci-tegra", 318 .name = "sdhci-tegra",
319 .owner = THIS_MODULE, 319 .owner = THIS_MODULE,
320 .of_match_table = sdhci_tegra_dt_match, 320 .of_match_table = sdhci_tegra_dt_match,
321 .pm = SDHCI_PLTFM_PMOPS,
321 }, 322 },
322 .probe = sdhci_tegra_probe, 323 .probe = sdhci_tegra_probe,
323 .remove = __devexit_p(sdhci_tegra_remove), 324 .remove = __devexit_p(sdhci_tegra_remove),
324#ifdef CONFIG_PM
325 .suspend = sdhci_pltfm_suspend,
326 .resume = sdhci_pltfm_resume,
327#endif
328}; 325};
329 326
330static int __init sdhci_tegra_init(void) 327static int __init sdhci_tegra_init(void)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6d8eea323541..19ed580f2cab 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2327,7 +2327,7 @@ out:
2327 2327
2328#ifdef CONFIG_PM 2328#ifdef CONFIG_PM
2329 2329
2330int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state) 2330int sdhci_suspend_host(struct sdhci_host *host)
2331{ 2331{
2332 int ret; 2332 int ret;
2333 2333
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0a5b65460d8a..a04d4d0c6fd2 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -374,7 +374,7 @@ extern int sdhci_add_host(struct sdhci_host *host);
374extern void sdhci_remove_host(struct sdhci_host *host, int dead); 374extern void sdhci_remove_host(struct sdhci_host *host, int dead);
375 375
376#ifdef CONFIG_PM 376#ifdef CONFIG_PM
377extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state); 377extern int sdhci_suspend_host(struct sdhci_host *host);
378extern int sdhci_resume_host(struct sdhci_host *host); 378extern int sdhci_resume_host(struct sdhci_host *host);
379extern void sdhci_enable_irq_wakeups(struct sdhci_host *host); 379extern void sdhci_enable_irq_wakeups(struct sdhci_host *host);
380#endif 380#endif
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index e8f6e65183d7..2ec978bc32ba 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -259,7 +259,7 @@ static int firmware_rom_wait_states = 0x04;
259static int firmware_rom_wait_states = 0x1C; 259static int firmware_rom_wait_states = 0x1C;
260#endif 260#endif
261 261
262module_param(firmware_rom_wait_states, bool, 0644); 262module_param(firmware_rom_wait_states, int, 0644);
263MODULE_PARM_DESC(firmware_rom_wait_states, 263MODULE_PARM_DESC(firmware_rom_wait_states,
264 "ROM wait states byte=RRRIIEEE (Reserved Internal External)"); 264 "ROM wait states byte=RRRIIEEE (Reserved Internal External)");
265 265
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index 94f553489725..45876d0e5b8e 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -227,10 +227,14 @@ static int platram_probe(struct platform_device *pdev)
227 if (!err) 227 if (!err)
228 dev_info(&pdev->dev, "registered mtd device\n"); 228 dev_info(&pdev->dev, "registered mtd device\n");
229 229
230 /* add the whole device. */ 230 if (pdata->nr_partitions) {
231 err = mtd_device_register(info->mtd, NULL, 0); 231 /* add the whole device. */
232 if (err) 232 err = mtd_device_register(info->mtd, NULL, 0);
233 dev_err(&pdev->dev, "failed to register the entire device\n"); 233 if (err) {
234 dev_err(&pdev->dev,
235 "failed to register the entire device\n");
236 }
237 }
234 238
235 return err; 239 return err;
236 240
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 411a17df9fc1..2a25b6789af4 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -98,7 +98,7 @@ static int __devinit pxa2xx_flash_probe(struct platform_device *pdev)
98 } 98 }
99 info->mtd->owner = THIS_MODULE; 99 info->mtd->owner = THIS_MODULE;
100 100
101 mtd_device_parse_register(info->mtd, probes, 0, NULL, 0); 101 mtd_device_parse_register(info->mtd, probes, 0, flash->parts, flash->nr_parts);
102 102
103 platform_set_drvdata(pdev, info); 103 platform_set_drvdata(pdev, info);
104 return 0; 104 return 0;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 071b63420f0e..493ec2fcf97f 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -21,9 +21,9 @@
21#include <linux/clk.h> 21#include <linux/clk.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/module.h>
24#include <linux/mtd/gpmi-nand.h> 25#include <linux/mtd/gpmi-nand.h>
25#include <linux/mtd/partitions.h> 26#include <linux/mtd/partitions.h>
26
27#include "gpmi-nand.h" 27#include "gpmi-nand.h"
28 28
29/* add our owner bbt descriptor */ 29/* add our owner bbt descriptor */
diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index ee1713907b92..f8aacf48ecdd 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@ -188,7 +188,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
188 if (!flash_np) 188 if (!flash_np)
189 return -ENODEV; 189 return -ENODEV;
190 190
191 ppdata->of_node = flash_np; 191 ppdata.of_node = flash_np;
192 ndfc->mtd.name = kasprintf(GFP_KERNEL, "%s.%s", 192 ndfc->mtd.name = kasprintf(GFP_KERNEL, "%s.%s",
193 dev_name(&ndfc->ofdev->dev), flash_np->name); 193 dev_name(&ndfc->ofdev->dev), flash_np->name);
194 if (!ndfc->mtd.name) { 194 if (!ndfc->mtd.name) {
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 5272f9d4dda9..9de37642f09f 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -23,8 +23,8 @@ if NET_VENDOR_FREESCALE
23config FEC 23config FEC
24 bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)" 24 bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)"
25 depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \ 25 depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \
26 ARCH_MXC || ARCH_MXS) 26 ARCH_MXC || SOC_IMX28)
27 default ARCH_MXC || ARCH_MXS if ARM 27 default ARCH_MXC || SOC_IMX28 if ARM
28 select PHYLIB 28 select PHYLIB
29 ---help--- 29 ---help---
30 Say Y here if you want to use the built-in 10/100 Fast ethernet 30 Say Y here if you want to use the built-in 10/100 Fast ethernet
diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index 1124ce0a1594..c136230d50bb 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -232,6 +232,7 @@ struct fec_enet_private {
232 struct platform_device *pdev; 232 struct platform_device *pdev;
233 233
234 int opened; 234 int opened;
235 int dev_id;
235 236
236 /* Phylib and MDIO interface */ 237 /* Phylib and MDIO interface */
237 struct mii_bus *mii_bus; 238 struct mii_bus *mii_bus;
@@ -837,7 +838,7 @@ static void __inline__ fec_get_mac(struct net_device *ndev)
837 838
838 /* Adjust MAC if using macaddr */ 839 /* Adjust MAC if using macaddr */
839 if (iap == macaddr) 840 if (iap == macaddr)
840 ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->pdev->id; 841 ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id;
841} 842}
842 843
843/* ------------------------------------------------------------------------- */ 844/* ------------------------------------------------------------------------- */
@@ -953,7 +954,7 @@ static int fec_enet_mii_probe(struct net_device *ndev)
953 char mdio_bus_id[MII_BUS_ID_SIZE]; 954 char mdio_bus_id[MII_BUS_ID_SIZE];
954 char phy_name[MII_BUS_ID_SIZE + 3]; 955 char phy_name[MII_BUS_ID_SIZE + 3];
955 int phy_id; 956 int phy_id;
956 int dev_id = fep->pdev->id; 957 int dev_id = fep->dev_id;
957 958
958 fep->phy_dev = NULL; 959 fep->phy_dev = NULL;
959 960
@@ -1031,7 +1032,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
1031 * mdio interface in board design, and need to be configured by 1032 * mdio interface in board design, and need to be configured by
1032 * fec0 mii_bus. 1033 * fec0 mii_bus.
1033 */ 1034 */
1034 if ((id_entry->driver_data & FEC_QUIRK_ENET_MAC) && pdev->id > 0) { 1035 if ((id_entry->driver_data & FEC_QUIRK_ENET_MAC) && fep->dev_id > 0) {
1035 /* fec1 uses fec0 mii_bus */ 1036 /* fec1 uses fec0 mii_bus */
1036 fep->mii_bus = fec0_mii_bus; 1037 fep->mii_bus = fec0_mii_bus;
1037 return 0; 1038 return 0;
@@ -1063,7 +1064,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
1063 fep->mii_bus->read = fec_enet_mdio_read; 1064 fep->mii_bus->read = fec_enet_mdio_read;
1064 fep->mii_bus->write = fec_enet_mdio_write; 1065 fep->mii_bus->write = fec_enet_mdio_write;
1065 fep->mii_bus->reset = fec_enet_mdio_reset; 1066 fep->mii_bus->reset = fec_enet_mdio_reset;
1066 snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id + 1); 1067 snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", fep->dev_id + 1);
1067 fep->mii_bus->priv = fep; 1068 fep->mii_bus->priv = fep;
1068 fep->mii_bus->parent = &pdev->dev; 1069 fep->mii_bus->parent = &pdev->dev;
1069 1070
@@ -1521,6 +1522,7 @@ fec_probe(struct platform_device *pdev)
1521 int i, irq, ret = 0; 1522 int i, irq, ret = 0;
1522 struct resource *r; 1523 struct resource *r;
1523 const struct of_device_id *of_id; 1524 const struct of_device_id *of_id;
1525 static int dev_id;
1524 1526
1525 of_id = of_match_device(fec_dt_ids, &pdev->dev); 1527 of_id = of_match_device(fec_dt_ids, &pdev->dev);
1526 if (of_id) 1528 if (of_id)
@@ -1548,6 +1550,7 @@ fec_probe(struct platform_device *pdev)
1548 1550
1549 fep->hwp = ioremap(r->start, resource_size(r)); 1551 fep->hwp = ioremap(r->start, resource_size(r));
1550 fep->pdev = pdev; 1552 fep->pdev = pdev;
1553 fep->dev_id = dev_id++;
1551 1554
1552 if (!fep->hwp) { 1555 if (!fep->hwp) {
1553 ret = -ENOMEM; 1556 ret = -ENOMEM;
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 52f4e8ad48e7..4d9f84b8ab97 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -183,28 +183,10 @@ void fsl_pq_mdio_bus_name(char *name, struct device_node *np)
183} 183}
184EXPORT_SYMBOL_GPL(fsl_pq_mdio_bus_name); 184EXPORT_SYMBOL_GPL(fsl_pq_mdio_bus_name);
185 185
186/* Scan the bus in reverse, looking for an empty spot */
187static int fsl_pq_mdio_find_free(struct mii_bus *new_bus)
188{
189 int i;
190
191 for (i = PHY_MAX_ADDR; i > 0; i--) {
192 u32 phy_id;
193
194 if (get_phy_id(new_bus, i, &phy_id))
195 return -1;
196
197 if (phy_id == 0xffffffff)
198 break;
199 }
200
201 return i;
202}
203
204 186
205#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
206static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct device_node *np) 187static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct device_node *np)
207{ 188{
189#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
208 struct gfar __iomem *enet_regs; 190 struct gfar __iomem *enet_regs;
209 191
210 /* 192 /*
@@ -220,15 +202,15 @@ static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct devi
220 } else if (of_device_is_compatible(np, "fsl,etsec2-mdio") || 202 } else if (of_device_is_compatible(np, "fsl,etsec2-mdio") ||
221 of_device_is_compatible(np, "fsl,etsec2-tbi")) { 203 of_device_is_compatible(np, "fsl,etsec2-tbi")) {
222 return of_iomap(np, 1); 204 return of_iomap(np, 1);
223 } else 205 }
224 return NULL;
225}
226#endif 206#endif
207 return NULL;
208}
227 209
228 210
229#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
230static int get_ucc_id_for_range(u64 start, u64 end, u32 *ucc_id) 211static int get_ucc_id_for_range(u64 start, u64 end, u32 *ucc_id)
231{ 212{
213#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
232 struct device_node *np = NULL; 214 struct device_node *np = NULL;
233 int err = 0; 215 int err = 0;
234 216
@@ -261,9 +243,10 @@ static int get_ucc_id_for_range(u64 start, u64 end, u32 *ucc_id)
261 return err; 243 return err;
262 else 244 else
263 return -EINVAL; 245 return -EINVAL;
264} 246#else
247 return -ENODEV;
265#endif 248#endif
266 249}
267 250
268static int fsl_pq_mdio_probe(struct platform_device *ofdev) 251static int fsl_pq_mdio_probe(struct platform_device *ofdev)
269{ 252{
@@ -339,19 +322,13 @@ static int fsl_pq_mdio_probe(struct platform_device *ofdev)
339 of_device_is_compatible(np, "fsl,etsec2-mdio") || 322 of_device_is_compatible(np, "fsl,etsec2-mdio") ||
340 of_device_is_compatible(np, "fsl,etsec2-tbi") || 323 of_device_is_compatible(np, "fsl,etsec2-tbi") ||
341 of_device_is_compatible(np, "gianfar")) { 324 of_device_is_compatible(np, "gianfar")) {
342#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
343 tbipa = get_gfar_tbipa(regs, np); 325 tbipa = get_gfar_tbipa(regs, np);
344 if (!tbipa) { 326 if (!tbipa) {
345 err = -EINVAL; 327 err = -EINVAL;
346 goto err_free_irqs; 328 goto err_free_irqs;
347 } 329 }
348#else
349 err = -ENODEV;
350 goto err_free_irqs;
351#endif
352 } else if (of_device_is_compatible(np, "fsl,ucc-mdio") || 330 } else if (of_device_is_compatible(np, "fsl,ucc-mdio") ||
353 of_device_is_compatible(np, "ucc_geth_phy")) { 331 of_device_is_compatible(np, "ucc_geth_phy")) {
354#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
355 u32 id; 332 u32 id;
356 static u32 mii_mng_master; 333 static u32 mii_mng_master;
357 334
@@ -364,10 +341,6 @@ static int fsl_pq_mdio_probe(struct platform_device *ofdev)
364 mii_mng_master = id; 341 mii_mng_master = id;
365 ucc_set_qe_mux_mii_mng(id - 1); 342 ucc_set_qe_mux_mii_mng(id - 1);
366 } 343 }
367#else
368 err = -ENODEV;
369 goto err_free_irqs;
370#endif
371 } else { 344 } else {
372 err = -ENODEV; 345 err = -ENODEV;
373 goto err_free_irqs; 346 goto err_free_irqs;
@@ -386,16 +359,6 @@ static int fsl_pq_mdio_probe(struct platform_device *ofdev)
386 } 359 }
387 360
388 if (tbiaddr == -1) { 361 if (tbiaddr == -1) {
389 out_be32(tbipa, 0);
390
391 tbiaddr = fsl_pq_mdio_find_free(new_bus);
392 }
393
394 /*
395 * We define TBIPA at 0 to be illegal, opting to fail for boards that
396 * have PHYs at 1-31, rather than change tbipa and rescan.
397 */
398 if (tbiaddr == 0) {
399 err = -EBUSY; 362 err = -EBUSY;
400 363
401 goto err_free_irqs; 364 goto err_free_irqs;
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index c7b60839ac99..dea0cb4400e2 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -2606,6 +2606,9 @@ static int skge_up(struct net_device *dev)
2606 spin_unlock_irq(&hw->hw_lock); 2606 spin_unlock_irq(&hw->hw_lock);
2607 2607
2608 napi_enable(&skge->napi); 2608 napi_enable(&skge->napi);
2609
2610 skge_set_multicast(dev);
2611
2609 return 0; 2612 return 0;
2610 2613
2611 free_tx_ring: 2614 free_tx_ring:
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 227997d775e8..5829e0b47e7e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -147,6 +147,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
147 mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); 147 mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
148 if (priv->mdev->dev->caps.comp_pool && cq->vector) 148 if (priv->mdev->dev->caps.comp_pool && cq->vector)
149 mlx4_release_eq(priv->mdev->dev, cq->vector); 149 mlx4_release_eq(priv->mdev->dev, cq->vector);
150 cq->vector = 0;
150 cq->buf_size = 0; 151 cq->buf_size = 0;
151 cq->buf = NULL; 152 cq->buf = NULL;
152} 153}
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 67bf07819992..c8f47f17186f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -477,7 +477,6 @@ enum rtl_register_content {
477 /* Config1 register p.24 */ 477 /* Config1 register p.24 */
478 LEDS1 = (1 << 7), 478 LEDS1 = (1 << 7),
479 LEDS0 = (1 << 6), 479 LEDS0 = (1 << 6),
480 MSIEnable = (1 << 5), /* Enable Message Signaled Interrupt */
481 Speed_down = (1 << 4), 480 Speed_down = (1 << 4),
482 MEMMAP = (1 << 3), 481 MEMMAP = (1 << 3),
483 IOMAP = (1 << 2), 482 IOMAP = (1 << 2),
@@ -485,6 +484,7 @@ enum rtl_register_content {
485 PMEnable = (1 << 0), /* Power Management Enable */ 484 PMEnable = (1 << 0), /* Power Management Enable */
486 485
487 /* Config2 register p. 25 */ 486 /* Config2 register p. 25 */
487 MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */
488 PCI_Clock_66MHz = 0x01, 488 PCI_Clock_66MHz = 0x01,
489 PCI_Clock_33MHz = 0x00, 489 PCI_Clock_33MHz = 0x00,
490 490
@@ -3426,22 +3426,24 @@ static const struct rtl_cfg_info {
3426}; 3426};
3427 3427
3428/* Cfg9346_Unlock assumed. */ 3428/* Cfg9346_Unlock assumed. */
3429static unsigned rtl_try_msi(struct pci_dev *pdev, void __iomem *ioaddr, 3429static unsigned rtl_try_msi(struct rtl8169_private *tp,
3430 const struct rtl_cfg_info *cfg) 3430 const struct rtl_cfg_info *cfg)
3431{ 3431{
3432 void __iomem *ioaddr = tp->mmio_addr;
3432 unsigned msi = 0; 3433 unsigned msi = 0;
3433 u8 cfg2; 3434 u8 cfg2;
3434 3435
3435 cfg2 = RTL_R8(Config2) & ~MSIEnable; 3436 cfg2 = RTL_R8(Config2) & ~MSIEnable;
3436 if (cfg->features & RTL_FEATURE_MSI) { 3437 if (cfg->features & RTL_FEATURE_MSI) {
3437 if (pci_enable_msi(pdev)) { 3438 if (pci_enable_msi(tp->pci_dev)) {
3438 dev_info(&pdev->dev, "no MSI. Back to INTx.\n"); 3439 netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
3439 } else { 3440 } else {
3440 cfg2 |= MSIEnable; 3441 cfg2 |= MSIEnable;
3441 msi = RTL_FEATURE_MSI; 3442 msi = RTL_FEATURE_MSI;
3442 } 3443 }
3443 } 3444 }
3444 RTL_W8(Config2, cfg2); 3445 if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
3446 RTL_W8(Config2, cfg2);
3445 return msi; 3447 return msi;
3446} 3448}
3447 3449
@@ -4077,7 +4079,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
4077 tp->features |= RTL_FEATURE_WOL; 4079 tp->features |= RTL_FEATURE_WOL;
4078 if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0) 4080 if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
4079 tp->features |= RTL_FEATURE_WOL; 4081 tp->features |= RTL_FEATURE_WOL;
4080 tp->features |= rtl_try_msi(pdev, ioaddr, cfg); 4082 tp->features |= rtl_try_msi(tp, cfg);
4081 RTL_W8(Cfg9346, Cfg9346_Lock); 4083 RTL_W8(Cfg9346, Cfg9346_Lock);
4082 4084
4083 if (rtl_tbi_enabled(tp)) { 4085 if (rtl_tbi_enabled(tp)) {
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index dca9d3369cdd..c97d2f590855 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -836,11 +836,13 @@ int cpdma_chan_stop(struct cpdma_chan *chan)
836 chan_write(chan, cp, CPDMA_TEARDOWN_VALUE); 836 chan_write(chan, cp, CPDMA_TEARDOWN_VALUE);
837 837
838 /* handle completed packets */ 838 /* handle completed packets */
839 spin_unlock_irqrestore(&chan->lock, flags);
839 do { 840 do {
840 ret = __cpdma_chan_process(chan); 841 ret = __cpdma_chan_process(chan);
841 if (ret < 0) 842 if (ret < 0)
842 break; 843 break;
843 } while ((ret & CPDMA_DESC_TD_COMPLETE) == 0); 844 } while ((ret & CPDMA_DESC_TD_COMPLETE) == 0);
845 spin_lock_irqsave(&chan->lock, flags);
844 846
845 /* remaining packets haven't been tx/rx'ed, clean them up */ 847 /* remaining packets haven't been tx/rx'ed, clean them up */
846 while (chan->head) { 848 while (chan->head) {
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 89f829f5f725..f8a6853b692e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -423,10 +423,8 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
423 lock_sock(sk); 423 lock_sock(sk);
424 424
425 opt->src_addr = sp->sa_addr.pptp; 425 opt->src_addr = sp->sa_addr.pptp;
426 if (add_chan(po)) { 426 if (add_chan(po))
427 release_sock(sk);
428 error = -EBUSY; 427 error = -EBUSY;
429 }
430 428
431 release_sock(sk); 429 release_sock(sk);
432 return error; 430 return error;
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index e6fed4d4cb77..e95f0e60a9bc 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -1655,6 +1655,10 @@ static const struct usb_device_id products [] = {
1655 // ASIX 88772a 1655 // ASIX 88772a
1656 USB_DEVICE(0x0db0, 0xa877), 1656 USB_DEVICE(0x0db0, 0xa877),
1657 .driver_info = (unsigned long) &ax88772_info, 1657 .driver_info = (unsigned long) &ax88772_info,
1658}, {
1659 // Asus USB Ethernet Adapter
1660 USB_DEVICE (0x0b95, 0x7e2b),
1661 .driver_info = (unsigned long) &ax88772_info,
1658}, 1662},
1659 { }, // END 1663 { }, // END
1660}; 1664};
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 93fbe6f40898..a9c5ae75277e 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -286,7 +286,7 @@ static bool ath_complete_reset(struct ath_softc *sc, bool start)
286 ath_start_ani(common); 286 ath_start_ani(common);
287 } 287 }
288 288
289 if (ath9k_hw_ops(ah)->antdiv_comb_conf_get && sc->ant_rx != 3) { 289 if ((ah->caps.hw_caps & ATH9K_HW_CAP_ANT_DIV_COMB) && sc->ant_rx != 3) {
290 struct ath_hw_antcomb_conf div_ant_conf; 290 struct ath_hw_antcomb_conf div_ant_conf;
291 u8 lna_conf; 291 u8 lna_conf;
292 292
@@ -1843,6 +1843,9 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw,
1843 struct ath_softc *sc = hw->priv; 1843 struct ath_softc *sc = hw->priv;
1844 struct ath_node *an = (struct ath_node *) sta->drv_priv; 1844 struct ath_node *an = (struct ath_node *) sta->drv_priv;
1845 1845
1846 if (!(sc->sc_flags & SC_OP_TXAGGR))
1847 return;
1848
1846 switch (cmd) { 1849 switch (cmd) {
1847 case STA_NOTIFY_SLEEP: 1850 case STA_NOTIFY_SLEEP:
1848 an->sleeping = true; 1851 an->sleeping = true;
diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index 888abc2be3a5..528d5f3e868c 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c
@@ -1271,7 +1271,9 @@ static void ath_rc_init(struct ath_softc *sc,
1271 1271
1272 ath_rc_priv->max_valid_rate = k; 1272 ath_rc_priv->max_valid_rate = k;
1273 ath_rc_sort_validrates(rate_table, ath_rc_priv); 1273 ath_rc_sort_validrates(rate_table, ath_rc_priv);
1274 ath_rc_priv->rate_max_phy = ath_rc_priv->valid_rate_index[k-4]; 1274 ath_rc_priv->rate_max_phy = (k > 4) ?
1275 ath_rc_priv->valid_rate_index[k-4] :
1276 ath_rc_priv->valid_rate_index[k-1];
1275 ath_rc_priv->rate_table = rate_table; 1277 ath_rc_priv->rate_table = rate_table;
1276 1278
1277 ath_dbg(common, ATH_DBG_CONFIG, 1279 ath_dbg(common, ATH_DBG_CONFIG,
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index fcff923b3c18..279a53eae4c5 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -617,9 +617,19 @@ static bool pio_rx_frame(struct b43_pio_rxqueue *q)
617 const char *err_msg = NULL; 617 const char *err_msg = NULL;
618 struct b43_rxhdr_fw4 *rxhdr = 618 struct b43_rxhdr_fw4 *rxhdr =
619 (struct b43_rxhdr_fw4 *)wl->pio_scratchspace; 619 (struct b43_rxhdr_fw4 *)wl->pio_scratchspace;
620 size_t rxhdr_size = sizeof(*rxhdr);
620 621
621 BUILD_BUG_ON(sizeof(wl->pio_scratchspace) < sizeof(*rxhdr)); 622 BUILD_BUG_ON(sizeof(wl->pio_scratchspace) < sizeof(*rxhdr));
622 memset(rxhdr, 0, sizeof(*rxhdr)); 623 switch (dev->fw.hdr_format) {
624 case B43_FW_HDR_410:
625 case B43_FW_HDR_351:
626 rxhdr_size -= sizeof(rxhdr->format_598) -
627 sizeof(rxhdr->format_351);
628 break;
629 case B43_FW_HDR_598:
630 break;
631 }
632 memset(rxhdr, 0, rxhdr_size);
623 633
624 /* Check if we have data and wait for it to get ready. */ 634 /* Check if we have data and wait for it to get ready. */
625 if (q->rev >= 8) { 635 if (q->rev >= 8) {
@@ -657,11 +667,11 @@ data_ready:
657 667
658 /* Get the preamble (RX header) */ 668 /* Get the preamble (RX header) */
659 if (q->rev >= 8) { 669 if (q->rev >= 8) {
660 b43_block_read(dev, rxhdr, sizeof(*rxhdr), 670 b43_block_read(dev, rxhdr, rxhdr_size,
661 q->mmio_base + B43_PIO8_RXDATA, 671 q->mmio_base + B43_PIO8_RXDATA,
662 sizeof(u32)); 672 sizeof(u32));
663 } else { 673 } else {
664 b43_block_read(dev, rxhdr, sizeof(*rxhdr), 674 b43_block_read(dev, rxhdr, rxhdr_size,
665 q->mmio_base + B43_PIO_RXDATA, 675 q->mmio_base + B43_PIO_RXDATA,
666 sizeof(u16)); 676 sizeof(u16));
667 } 677 }
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c b/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
index a7a6def40d05..5c7c17c7166a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
@@ -606,8 +606,8 @@ int iwlagn_mac_config(struct ieee80211_hw *hw, u32 changed)
606 if (ctx->ht.enabled) { 606 if (ctx->ht.enabled) {
607 /* if HT40 is used, it should not change 607 /* if HT40 is used, it should not change
608 * after associated except channel switch */ 608 * after associated except channel switch */
609 if (iwl_is_associated_ctx(ctx) && 609 if (!ctx->ht.is_40mhz ||
610 !ctx->ht.is_40mhz) 610 !iwl_is_associated_ctx(ctx))
611 iwlagn_config_ht40(conf, ctx); 611 iwlagn_config_ht40(conf, ctx);
612 } else 612 } else
613 ctx->ht.is_40mhz = false; 613 ctx->ht.is_40mhz = false;
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
index 35a6b71f358c..df1540ca6102 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c
@@ -91,7 +91,10 @@ static void iwlagn_tx_cmd_build_basic(struct iwl_priv *priv,
91 tx_cmd->tid_tspec = qc[0] & 0xf; 91 tx_cmd->tid_tspec = qc[0] & 0xf;
92 tx_flags &= ~TX_CMD_FLG_SEQ_CTL_MSK; 92 tx_flags &= ~TX_CMD_FLG_SEQ_CTL_MSK;
93 } else { 93 } else {
94 tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK; 94 if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
95 tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK;
96 else
97 tx_flags &= ~TX_CMD_FLG_SEQ_CTL_MSK;
95 } 98 }
96 99
97 iwlagn_tx_cmd_protection(priv, info, fc, &tx_flags); 100 iwlagn_tx_cmd_protection(priv, info, fc, &tx_flags);
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index bacc06c95e7a..e0e9a3dfbc00 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2850,6 +2850,9 @@ static int iwlagn_mac_tx_sync(struct ieee80211_hw *hw,
2850 int ret; 2850 int ret;
2851 u8 sta_id; 2851 u8 sta_id;
2852 2852
2853 if (ctx->ctxid != IWL_RXON_CTX_PAN)
2854 return 0;
2855
2853 IWL_DEBUG_MAC80211(priv, "enter\n"); 2856 IWL_DEBUG_MAC80211(priv, "enter\n");
2854 mutex_lock(&priv->shrd->mutex); 2857 mutex_lock(&priv->shrd->mutex);
2855 2858
@@ -2898,6 +2901,9 @@ static void iwlagn_mac_finish_tx_sync(struct ieee80211_hw *hw,
2898 struct iwl_vif_priv *vif_priv = (void *)vif->drv_priv; 2901 struct iwl_vif_priv *vif_priv = (void *)vif->drv_priv;
2899 struct iwl_rxon_context *ctx = vif_priv->ctx; 2902 struct iwl_rxon_context *ctx = vif_priv->ctx;
2900 2903
2904 if (ctx->ctxid != IWL_RXON_CTX_PAN)
2905 return;
2906
2901 IWL_DEBUG_MAC80211(priv, "enter\n"); 2907 IWL_DEBUG_MAC80211(priv, "enter\n");
2902 mutex_lock(&priv->shrd->mutex); 2908 mutex_lock(&priv->shrd->mutex);
2903 2909
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
index ce918980e977..5f17ab8e76ba 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-pcie.c
@@ -1197,9 +1197,7 @@ static int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
1197 iwl_print_hex_dump(trans, IWL_DL_TX, (u8 *)tx_cmd->hdr, hdr_len); 1197 iwl_print_hex_dump(trans, IWL_DL_TX, (u8 *)tx_cmd->hdr, hdr_len);
1198 1198
1199 /* Set up entry for this TFD in Tx byte-count array */ 1199 /* Set up entry for this TFD in Tx byte-count array */
1200 if (is_agg) 1200 iwl_trans_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
1201 iwl_trans_txq_update_byte_cnt_tbl(trans, txq,
1202 le16_to_cpu(tx_cmd->len));
1203 1201
1204 dma_sync_single_for_device(bus(trans)->dev, txcmd_phys, firstlen, 1202 dma_sync_single_for_device(bus(trans)->dev, txcmd_phys, firstlen,
1205 DMA_BIDIRECTIONAL); 1203 DMA_BIDIRECTIONAL);
diff --git a/drivers/net/wireless/mwifiex/cmdevt.c b/drivers/net/wireless/mwifiex/cmdevt.c
index ac278156d390..6e0a3eaecf70 100644
--- a/drivers/net/wireless/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/mwifiex/cmdevt.c
@@ -939,7 +939,6 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
939{ 939{
940 struct cmd_ctrl_node *cmd_node = NULL, *tmp_node = NULL; 940 struct cmd_ctrl_node *cmd_node = NULL, *tmp_node = NULL;
941 unsigned long cmd_flags; 941 unsigned long cmd_flags;
942 unsigned long cmd_pending_q_flags;
943 unsigned long scan_pending_q_flags; 942 unsigned long scan_pending_q_flags;
944 uint16_t cancel_scan_cmd = false; 943 uint16_t cancel_scan_cmd = false;
945 944
@@ -949,12 +948,9 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
949 cmd_node = adapter->curr_cmd; 948 cmd_node = adapter->curr_cmd;
950 cmd_node->wait_q_enabled = false; 949 cmd_node->wait_q_enabled = false;
951 cmd_node->cmd_flag |= CMD_F_CANCELED; 950 cmd_node->cmd_flag |= CMD_F_CANCELED;
952 spin_lock_irqsave(&adapter->cmd_pending_q_lock,
953 cmd_pending_q_flags);
954 list_del(&cmd_node->list);
955 spin_unlock_irqrestore(&adapter->cmd_pending_q_lock,
956 cmd_pending_q_flags);
957 mwifiex_insert_cmd_to_free_q(adapter, cmd_node); 951 mwifiex_insert_cmd_to_free_q(adapter, cmd_node);
952 mwifiex_complete_cmd(adapter, adapter->curr_cmd);
953 adapter->curr_cmd = NULL;
958 spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags); 954 spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
959 } 955 }
960 956
@@ -981,7 +977,6 @@ mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
981 spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags); 977 spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, cmd_flags);
982 } 978 }
983 adapter->cmd_wait_q.status = -1; 979 adapter->cmd_wait_q.status = -1;
984 mwifiex_complete_cmd(adapter, adapter->curr_cmd);
985} 980}
986 981
987/* 982/*
diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c
index ea4a29b7e331..1679c2593b7b 100644
--- a/drivers/net/wireless/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/mwifiex/sta_ioctl.c
@@ -55,9 +55,14 @@ int mwifiex_wait_queue_complete(struct mwifiex_adapter *adapter)
55{ 55{
56 bool cancel_flag = false; 56 bool cancel_flag = false;
57 int status = adapter->cmd_wait_q.status; 57 int status = adapter->cmd_wait_q.status;
58 struct cmd_ctrl_node *cmd_queued = adapter->cmd_queued; 58 struct cmd_ctrl_node *cmd_queued;
59 59
60 if (!adapter->cmd_queued)
61 return 0;
62
63 cmd_queued = adapter->cmd_queued;
60 adapter->cmd_queued = NULL; 64 adapter->cmd_queued = NULL;
65
61 dev_dbg(adapter->dev, "cmd pending\n"); 66 dev_dbg(adapter->dev, "cmd pending\n");
62 atomic_inc(&adapter->cmd_pending); 67 atomic_inc(&adapter->cmd_pending);
63 68
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c b/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c
index 592a10ac5929..3b585aadabfc 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c
@@ -569,7 +569,7 @@ static bool _rtl92ce_phy_set_rf_power_state(struct ieee80211_hw *hw,
569 } 569 }
570 case ERFSLEEP:{ 570 case ERFSLEEP:{
571 if (ppsc->rfpwr_state == ERFOFF) 571 if (ppsc->rfpwr_state == ERFOFF)
572 break; 572 return false;
573 for (queue_id = 0, i = 0; 573 for (queue_id = 0, i = 0;
574 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) { 574 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) {
575 ring = &pcipriv->dev.tx_ring[queue_id]; 575 ring = &pcipriv->dev.tx_ring[queue_id];
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/phy.c b/drivers/net/wireless/rtlwifi/rtl8192cu/phy.c
index 72852900df84..e49cf2244c75 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/phy.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/phy.c
@@ -548,7 +548,7 @@ static bool _rtl92cu_phy_set_rf_power_state(struct ieee80211_hw *hw,
548 break; 548 break;
549 case ERFSLEEP: 549 case ERFSLEEP:
550 if (ppsc->rfpwr_state == ERFOFF) 550 if (ppsc->rfpwr_state == ERFOFF)
551 break; 551 return false;
552 for (queue_id = 0, i = 0; 552 for (queue_id = 0, i = 0;
553 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) { 553 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) {
554 ring = &pcipriv->dev.tx_ring[queue_id]; 554 ring = &pcipriv->dev.tx_ring[queue_id];
diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/rtlwifi/rtl8192de/phy.c
index 3ac7af1c5509..0883349e1c83 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192de/phy.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192de/phy.c
@@ -3374,7 +3374,7 @@ bool rtl92d_phy_set_rf_power_state(struct ieee80211_hw *hw,
3374 break; 3374 break;
3375 case ERFSLEEP: 3375 case ERFSLEEP:
3376 if (ppsc->rfpwr_state == ERFOFF) 3376 if (ppsc->rfpwr_state == ERFOFF)
3377 break; 3377 return false;
3378 3378
3379 for (queue_id = 0, i = 0; 3379 for (queue_id = 0, i = 0;
3380 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) { 3380 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) {
diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/rtlwifi/rtl8192se/phy.c
index f27171af979c..f10ac1ad9087 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/phy.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/phy.c
@@ -602,7 +602,7 @@ bool rtl92s_phy_set_rf_power_state(struct ieee80211_hw *hw,
602 } 602 }
603 case ERFSLEEP: 603 case ERFSLEEP:
604 if (ppsc->rfpwr_state == ERFOFF) 604 if (ppsc->rfpwr_state == ERFOFF)
605 break; 605 return false;
606 606
607 for (queue_id = 0, i = 0; 607 for (queue_id = 0, i = 0;
608 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) { 608 queue_id < RTL_PCI_MAX_TX_QUEUE_COUNT;) {
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index cbd5d701c7e0..63b3ec48c203 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -314,7 +314,7 @@ static const struct of_dev_auxdata *of_dev_lookup(const struct of_dev_auxdata *l
314 if (!lookup) 314 if (!lookup)
315 return NULL; 315 return NULL;
316 316
317 for(; lookup->name != NULL; lookup++) { 317 for(; lookup->compatible != NULL; lookup++) {
318 if (!of_device_is_compatible(np, lookup->compatible)) 318 if (!of_device_is_compatible(np, lookup->compatible))
319 continue; 319 continue;
320 if (of_address_to_resource(np, 0, &res)) 320 if (of_address_to_resource(np, 0, &res))
diff --git a/drivers/oprofile/nmi_timer_int.c b/drivers/oprofile/nmi_timer_int.c
new file mode 100644
index 000000000000..76f1c9357f39
--- /dev/null
+++ b/drivers/oprofile/nmi_timer_int.c
@@ -0,0 +1,173 @@
1/**
2 * @file nmi_timer_int.c
3 *
4 * @remark Copyright 2011 Advanced Micro Devices, Inc.
5 *
6 * @author Robert Richter <robert.richter@amd.com>
7 */
8
9#include <linux/init.h>
10#include <linux/smp.h>
11#include <linux/errno.h>
12#include <linux/oprofile.h>
13#include <linux/perf_event.h>
14
15#ifdef CONFIG_OPROFILE_NMI_TIMER
16
17static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events);
18static int ctr_running;
19
20static struct perf_event_attr nmi_timer_attr = {
21 .type = PERF_TYPE_HARDWARE,
22 .config = PERF_COUNT_HW_CPU_CYCLES,
23 .size = sizeof(struct perf_event_attr),
24 .pinned = 1,
25 .disabled = 1,
26};
27
28static void nmi_timer_callback(struct perf_event *event,
29 struct perf_sample_data *data,
30 struct pt_regs *regs)
31{
32 event->hw.interrupts = 0; /* don't throttle interrupts */
33 oprofile_add_sample(regs, 0);
34}
35
36static int nmi_timer_start_cpu(int cpu)
37{
38 struct perf_event *event = per_cpu(nmi_timer_events, cpu);
39
40 if (!event) {
41 event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL,
42 nmi_timer_callback, NULL);
43 if (IS_ERR(event))
44 return PTR_ERR(event);
45 per_cpu(nmi_timer_events, cpu) = event;
46 }
47
48 if (event && ctr_running)
49 perf_event_enable(event);
50
51 return 0;
52}
53
54static void nmi_timer_stop_cpu(int cpu)
55{
56 struct perf_event *event = per_cpu(nmi_timer_events, cpu);
57
58 if (event && ctr_running)
59 perf_event_disable(event);
60}
61
62static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action,
63 void *data)
64{
65 int cpu = (unsigned long)data;
66 switch (action) {
67 case CPU_DOWN_FAILED:
68 case CPU_ONLINE:
69 nmi_timer_start_cpu(cpu);
70 break;
71 case CPU_DOWN_PREPARE:
72 nmi_timer_stop_cpu(cpu);
73 break;
74 }
75 return NOTIFY_DONE;
76}
77
78static struct notifier_block nmi_timer_cpu_nb = {
79 .notifier_call = nmi_timer_cpu_notifier
80};
81
82static int nmi_timer_start(void)
83{
84 int cpu;
85
86 get_online_cpus();
87 ctr_running = 1;
88 for_each_online_cpu(cpu)
89 nmi_timer_start_cpu(cpu);
90 put_online_cpus();
91
92 return 0;
93}
94
95static void nmi_timer_stop(void)
96{
97 int cpu;
98
99 get_online_cpus();
100 for_each_online_cpu(cpu)
101 nmi_timer_stop_cpu(cpu);
102 ctr_running = 0;
103 put_online_cpus();
104}
105
106static void nmi_timer_shutdown(void)
107{
108 struct perf_event *event;
109 int cpu;
110
111 get_online_cpus();
112 unregister_cpu_notifier(&nmi_timer_cpu_nb);
113 for_each_possible_cpu(cpu) {
114 event = per_cpu(nmi_timer_events, cpu);
115 if (!event)
116 continue;
117 perf_event_disable(event);
118 per_cpu(nmi_timer_events, cpu) = NULL;
119 perf_event_release_kernel(event);
120 }
121
122 put_online_cpus();
123}
124
125static int nmi_timer_setup(void)
126{
127 int cpu, err;
128 u64 period;
129
130 /* clock cycles per tick: */
131 period = (u64)cpu_khz * 1000;
132 do_div(period, HZ);
133 nmi_timer_attr.sample_period = period;
134
135 get_online_cpus();
136 err = register_cpu_notifier(&nmi_timer_cpu_nb);
137 if (err)
138 goto out;
139 /* can't attach events to offline cpus: */
140 for_each_online_cpu(cpu) {
141 err = nmi_timer_start_cpu(cpu);
142 if (err)
143 break;
144 }
145 if (err)
146 nmi_timer_shutdown();
147out:
148 put_online_cpus();
149 return err;
150}
151
152int __init op_nmi_timer_init(struct oprofile_operations *ops)
153{
154 int err = 0;
155
156 err = nmi_timer_setup();
157 if (err)
158 return err;
159 nmi_timer_shutdown(); /* only check, don't alloc */
160
161 ops->create_files = NULL;
162 ops->setup = nmi_timer_setup;
163 ops->shutdown = nmi_timer_shutdown;
164 ops->start = nmi_timer_start;
165 ops->stop = nmi_timer_stop;
166 ops->cpu_type = "timer";
167
168 printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
169
170 return 0;
171}
172
173#endif
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index f8c752e408a6..ed2c3ec07024 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -246,37 +246,31 @@ static int __init oprofile_init(void)
246 int err; 246 int err;
247 247
248 /* always init architecture to setup backtrace support */ 248 /* always init architecture to setup backtrace support */
249 timer_mode = 0;
249 err = oprofile_arch_init(&oprofile_ops); 250 err = oprofile_arch_init(&oprofile_ops);
251 if (!err) {
252 if (!timer && !oprofilefs_register())
253 return 0;
254 oprofile_arch_exit();
255 }
250 256
251 timer_mode = err || timer; /* fall back to timer mode on errors */ 257 /* setup timer mode: */
252 if (timer_mode) { 258 timer_mode = 1;
253 if (!err) 259 /* no nmi timer mode if oprofile.timer is set */
254 oprofile_arch_exit(); 260 if (timer || op_nmi_timer_init(&oprofile_ops)) {
255 err = oprofile_timer_init(&oprofile_ops); 261 err = oprofile_timer_init(&oprofile_ops);
256 if (err) 262 if (err)
257 return err; 263 return err;
258 } 264 }
259 265
260 err = oprofilefs_register(); 266 return oprofilefs_register();
261 if (!err)
262 return 0;
263
264 /* failed */
265 if (timer_mode)
266 oprofile_timer_exit();
267 else
268 oprofile_arch_exit();
269
270 return err;
271} 267}
272 268
273 269
274static void __exit oprofile_exit(void) 270static void __exit oprofile_exit(void)
275{ 271{
276 oprofilefs_unregister(); 272 oprofilefs_unregister();
277 if (timer_mode) 273 if (!timer_mode)
278 oprofile_timer_exit();
279 else
280 oprofile_arch_exit(); 274 oprofile_arch_exit();
281} 275}
282 276
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
index 177b73de5e5f..d32ef816337c 100644
--- a/drivers/oprofile/oprof.h
+++ b/drivers/oprofile/oprof.h
@@ -35,7 +35,15 @@ struct dentry;
35 35
36void oprofile_create_files(struct super_block *sb, struct dentry *root); 36void oprofile_create_files(struct super_block *sb, struct dentry *root);
37int oprofile_timer_init(struct oprofile_operations *ops); 37int oprofile_timer_init(struct oprofile_operations *ops);
38void oprofile_timer_exit(void); 38#ifdef CONFIG_OPROFILE_NMI_TIMER
39int op_nmi_timer_init(struct oprofile_operations *ops);
40#else
41static inline int op_nmi_timer_init(struct oprofile_operations *ops)
42{
43 return -ENODEV;
44}
45#endif
46
39 47
40int oprofile_set_ulong(unsigned long *addr, unsigned long val); 48int oprofile_set_ulong(unsigned long *addr, unsigned long val);
41int oprofile_set_timeout(unsigned long time); 49int oprofile_set_timeout(unsigned long time);
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
index 89f63456646f..84a208dbed93 100644
--- a/drivers/oprofile/oprofile_files.c
+++ b/drivers/oprofile/oprofile_files.c
@@ -45,7 +45,7 @@ static ssize_t timeout_write(struct file *file, char const __user *buf,
45 return -EINVAL; 45 return -EINVAL;
46 46
47 retval = oprofilefs_ulong_from_user(&val, buf, count); 47 retval = oprofilefs_ulong_from_user(&val, buf, count);
48 if (retval) 48 if (retval <= 0)
49 return retval; 49 return retval;
50 50
51 retval = oprofile_set_timeout(val); 51 retval = oprofile_set_timeout(val);
@@ -84,7 +84,7 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou
84 return -EINVAL; 84 return -EINVAL;
85 85
86 retval = oprofilefs_ulong_from_user(&val, buf, count); 86 retval = oprofilefs_ulong_from_user(&val, buf, count);
87 if (retval) 87 if (retval <= 0)
88 return retval; 88 return retval;
89 89
90 retval = oprofile_set_ulong(&oprofile_backtrace_depth, val); 90 retval = oprofile_set_ulong(&oprofile_backtrace_depth, val);
@@ -141,9 +141,10 @@ static ssize_t enable_write(struct file *file, char const __user *buf, size_t co
141 return -EINVAL; 141 return -EINVAL;
142 142
143 retval = oprofilefs_ulong_from_user(&val, buf, count); 143 retval = oprofilefs_ulong_from_user(&val, buf, count);
144 if (retval) 144 if (retval <= 0)
145 return retval; 145 return retval;
146 146
147 retval = 0;
147 if (val) 148 if (val)
148 retval = oprofile_start(); 149 retval = oprofile_start();
149 else 150 else
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index d0de6cc2d7a5..2f0aa0f700e6 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -60,6 +60,13 @@ ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user *buf, size_t cou
60} 60}
61 61
62 62
63/*
64 * Note: If oprofilefs_ulong_from_user() returns 0, then *val remains
65 * unchanged and might be uninitialized. This follows write syscall
66 * implementation when count is zero: "If count is zero ... [and if]
67 * no errors are detected, 0 will be returned without causing any
68 * other effect." (man 2 write)
69 */
63int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count) 70int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count)
64{ 71{
65 char tmpbuf[TMPBUFSIZE]; 72 char tmpbuf[TMPBUFSIZE];
@@ -79,7 +86,7 @@ int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_
79 raw_spin_lock_irqsave(&oprofilefs_lock, flags); 86 raw_spin_lock_irqsave(&oprofilefs_lock, flags);
80 *val = simple_strtoul(tmpbuf, NULL, 0); 87 *val = simple_strtoul(tmpbuf, NULL, 0);
81 raw_spin_unlock_irqrestore(&oprofilefs_lock, flags); 88 raw_spin_unlock_irqrestore(&oprofilefs_lock, flags);
82 return 0; 89 return count;
83} 90}
84 91
85 92
@@ -99,7 +106,7 @@ static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_
99 return -EINVAL; 106 return -EINVAL;
100 107
101 retval = oprofilefs_ulong_from_user(&value, buf, count); 108 retval = oprofilefs_ulong_from_user(&value, buf, count);
102 if (retval) 109 if (retval <= 0)
103 return retval; 110 return retval;
104 111
105 retval = oprofile_set_ulong(file->private_data, value); 112 retval = oprofile_set_ulong(file->private_data, value);
diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c
index 878fba126582..93404f72dfa8 100644
--- a/drivers/oprofile/timer_int.c
+++ b/drivers/oprofile/timer_int.c
@@ -97,24 +97,24 @@ static struct notifier_block __refdata oprofile_cpu_notifier = {
97 .notifier_call = oprofile_cpu_notify, 97 .notifier_call = oprofile_cpu_notify,
98}; 98};
99 99
100int oprofile_timer_init(struct oprofile_operations *ops) 100static int oprofile_hrtimer_setup(void)
101{ 101{
102 int rc; 102 return register_hotcpu_notifier(&oprofile_cpu_notifier);
103
104 rc = register_hotcpu_notifier(&oprofile_cpu_notifier);
105 if (rc)
106 return rc;
107 ops->create_files = NULL;
108 ops->setup = NULL;
109 ops->shutdown = NULL;
110 ops->start = oprofile_hrtimer_start;
111 ops->stop = oprofile_hrtimer_stop;
112 ops->cpu_type = "timer";
113 printk(KERN_INFO "oprofile: using timer interrupt.\n");
114 return 0;
115} 103}
116 104
117void oprofile_timer_exit(void) 105static void oprofile_hrtimer_shutdown(void)
118{ 106{
119 unregister_hotcpu_notifier(&oprofile_cpu_notifier); 107 unregister_hotcpu_notifier(&oprofile_cpu_notifier);
120} 108}
109
110int oprofile_timer_init(struct oprofile_operations *ops)
111{
112 ops->create_files = NULL;
113 ops->setup = oprofile_hrtimer_setup;
114 ops->shutdown = oprofile_hrtimer_shutdown;
115 ops->start = oprofile_hrtimer_start;
116 ops->stop = oprofile_hrtimer_stop;
117 ops->cpu_type = "timer";
118 printk(KERN_INFO "oprofile: using timer interrupt.\n");
119 return 0;
120}
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 7ec56fb0bd78..b0dd08e6a9da 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -13,6 +13,7 @@
13#include <linux/export.h> 13#include <linux/export.h>
14#include <linux/pci-ats.h> 14#include <linux/pci-ats.h>
15#include <linux/pci.h> 15#include <linux/pci.h>
16#include <linux/slab.h>
16 17
17#include "pci.h" 18#include "pci.h"
18 19
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index fce1c54a0c8d..9ddf69e3bbef 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -132,6 +132,18 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
132 if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle)) 132 if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle))
133 return AE_OK; 133 return AE_OK;
134 134
135 pdev = pbus->self;
136 if (pdev && pci_is_pcie(pdev)) {
137 tmp = acpi_find_root_bridge_handle(pdev);
138 if (tmp) {
139 struct acpi_pci_root *root = acpi_pci_find_root(tmp);
140
141 if (root && (root->osc_control_set &
142 OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
143 return AE_OK;
144 }
145 }
146
135 acpi_evaluate_integer(handle, "_ADR", NULL, &adr); 147 acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
136 device = (adr >> 16) & 0xffff; 148 device = (adr >> 16) & 0xffff;
137 function = adr & 0xffff; 149 function = adr & 0xffff;
@@ -213,7 +225,6 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
213 225
214 pdev = pci_get_slot(pbus, PCI_DEVFN(device, function)); 226 pdev = pci_get_slot(pbus, PCI_DEVFN(device, function));
215 if (pdev) { 227 if (pdev) {
216 pdev->current_state = PCI_D0;
217 slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON); 228 slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON);
218 pci_dev_put(pdev); 229 pci_dev_put(pdev);
219 } 230 }
@@ -459,17 +470,8 @@ static int add_bridge(acpi_handle handle)
459{ 470{
460 acpi_status status; 471 acpi_status status;
461 unsigned long long tmp; 472 unsigned long long tmp;
462 struct acpi_pci_root *root;
463 acpi_handle dummy_handle; 473 acpi_handle dummy_handle;
464 474
465 /*
466 * We shouldn't use this bridge if PCIe native hotplug control has been
467 * granted by the BIOS for it.
468 */
469 root = acpi_pci_find_root(handle);
470 if (root && (root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
471 return -ENODEV;
472
473 /* if the bridge doesn't have _STA, we assume it is always there */ 475 /* if the bridge doesn't have _STA, we assume it is always there */
474 status = acpi_get_handle(handle, "_STA", &dummy_handle); 476 status = acpi_get_handle(handle, "_STA", &dummy_handle);
475 if (ACPI_SUCCESS(status)) { 477 if (ACPI_SUCCESS(status)) {
@@ -1385,19 +1387,11 @@ static void handle_hotplug_event_func(acpi_handle handle, u32 type,
1385static acpi_status 1387static acpi_status
1386find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv) 1388find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
1387{ 1389{
1388 struct acpi_pci_root *root;
1389 int *count = (int *)context; 1390 int *count = (int *)context;
1390 1391
1391 if (!acpi_is_root_bridge(handle)) 1392 if (!acpi_is_root_bridge(handle))
1392 return AE_OK; 1393 return AE_OK;
1393 1394
1394 root = acpi_pci_find_root(handle);
1395 if (!root)
1396 return AE_OK;
1397
1398 if (root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL)
1399 return AE_OK;
1400
1401 (*count)++; 1395 (*count)++;
1402 acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY, 1396 acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
1403 handle_hotplug_event_bridge, NULL); 1397 handle_hotplug_event_bridge, NULL);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index b82c155d7b37..1969a3ee3058 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -283,6 +283,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
283 struct resource *res; 283 struct resource *res;
284 struct pci_dev *pdev; 284 struct pci_dev *pdev;
285 struct pci_sriov *iov = dev->sriov; 285 struct pci_sriov *iov = dev->sriov;
286 int bars = 0;
286 287
287 if (!nr_virtfn) 288 if (!nr_virtfn)
288 return 0; 289 return 0;
@@ -307,6 +308,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
307 308
308 nres = 0; 309 nres = 0;
309 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 310 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
311 bars |= (1 << (i + PCI_IOV_RESOURCES));
310 res = dev->resource + PCI_IOV_RESOURCES + i; 312 res = dev->resource + PCI_IOV_RESOURCES + i;
311 if (res->parent) 313 if (res->parent)
312 nres++; 314 nres++;
@@ -324,6 +326,11 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
324 return -ENOMEM; 326 return -ENOMEM;
325 } 327 }
326 328
329 if (pci_enable_resources(dev, bars)) {
330 dev_err(&dev->dev, "SR-IOV: IOV BARS not allocated\n");
331 return -ENOMEM;
332 }
333
327 if (iov->link != dev->devfn) { 334 if (iov->link != dev->devfn) {
328 pdev = pci_get_slot(dev->bus, iov->link); 335 pdev = pci_get_slot(dev->bus, iov->link);
329 if (!pdev) 336 if (!pdev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6f45a73c6e9f..6d4a5319148d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -664,6 +664,9 @@ static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state)
664 error = platform_pci_set_power_state(dev, state); 664 error = platform_pci_set_power_state(dev, state);
665 if (!error) 665 if (!error)
666 pci_update_current_state(dev, state); 666 pci_update_current_state(dev, state);
667 /* Fall back to PCI_D0 if native PM is not supported */
668 if (!dev->pm_cap)
669 dev->current_state = PCI_D0;
667 } else { 670 } else {
668 error = -ENODEV; 671 error = -ENODEV;
669 /* Fall back to PCI_D0 if native PM is not supported */ 672 /* Fall back to PCI_D0 if native PM is not supported */
@@ -1126,7 +1129,11 @@ static int __pci_enable_device_flags(struct pci_dev *dev,
1126 if (atomic_add_return(1, &dev->enable_cnt) > 1) 1129 if (atomic_add_return(1, &dev->enable_cnt) > 1)
1127 return 0; /* already enabled */ 1130 return 0; /* already enabled */
1128 1131
1129 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) 1132 /* only skip sriov related */
1133 for (i = 0; i <= PCI_ROM_RESOURCE; i++)
1134 if (dev->resource[i].flags & flags)
1135 bars |= (1 << i);
1136 for (i = PCI_BRIDGE_RESOURCES; i < DEVICE_COUNT_RESOURCE; i++)
1130 if (dev->resource[i].flags & flags) 1137 if (dev->resource[i].flags & flags)
1131 bars |= (1 << i); 1138 bars |= (1 << i);
1132 1139
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index fa4d9f324189..8e286259a007 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -319,20 +319,6 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
319} 319}
320EXPORT_SYMBOL_GPL(rtc_read_alarm); 320EXPORT_SYMBOL_GPL(rtc_read_alarm);
321 321
322static int ___rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
323{
324 int err;
325
326 if (!rtc->ops)
327 err = -ENODEV;
328 else if (!rtc->ops->set_alarm)
329 err = -EINVAL;
330 else
331 err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
332
333 return err;
334}
335
336static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) 322static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
337{ 323{
338 struct rtc_time tm; 324 struct rtc_time tm;
@@ -356,7 +342,14 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
356 * over right here, before we set the alarm. 342 * over right here, before we set the alarm.
357 */ 343 */
358 344
359 return ___rtc_set_alarm(rtc, alarm); 345 if (!rtc->ops)
346 err = -ENODEV;
347 else if (!rtc->ops->set_alarm)
348 err = -EINVAL;
349 else
350 err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
351
352 return err;
360} 353}
361 354
362int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) 355int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
@@ -770,20 +763,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
770 return 0; 763 return 0;
771} 764}
772 765
773static void rtc_alarm_disable(struct rtc_device *rtc)
774{
775 struct rtc_wkalrm alarm;
776 struct rtc_time tm;
777
778 __rtc_read_time(rtc, &tm);
779
780 alarm.time = rtc_ktime_to_tm(ktime_add(rtc_tm_to_ktime(tm),
781 ktime_set(300, 0)));
782 alarm.enabled = 0;
783
784 ___rtc_set_alarm(rtc, &alarm);
785}
786
787/** 766/**
788 * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue 767 * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue
789 * @rtc rtc device 768 * @rtc rtc device
@@ -805,10 +784,8 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
805 struct rtc_wkalrm alarm; 784 struct rtc_wkalrm alarm;
806 int err; 785 int err;
807 next = timerqueue_getnext(&rtc->timerqueue); 786 next = timerqueue_getnext(&rtc->timerqueue);
808 if (!next) { 787 if (!next)
809 rtc_alarm_disable(rtc);
810 return; 788 return;
811 }
812 alarm.time = rtc_ktime_to_tm(next->expires); 789 alarm.time = rtc_ktime_to_tm(next->expires);
813 alarm.enabled = 1; 790 alarm.enabled = 1;
814 err = __rtc_set_alarm(rtc, &alarm); 791 err = __rtc_set_alarm(rtc, &alarm);
@@ -870,8 +847,7 @@ again:
870 err = __rtc_set_alarm(rtc, &alarm); 847 err = __rtc_set_alarm(rtc, &alarm);
871 if (err == -ETIME) 848 if (err == -ETIME)
872 goto again; 849 goto again;
873 } else 850 }
874 rtc_alarm_disable(rtc);
875 851
876 mutex_unlock(&rtc->ops_lock); 852 mutex_unlock(&rtc->ops_lock);
877} 853}
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index eda128fc1d38..64aedd8cc095 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -357,10 +357,19 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t)
357static struct rtc_class_ops m41t80_rtc_ops = { 357static struct rtc_class_ops m41t80_rtc_ops = {
358 .read_time = m41t80_rtc_read_time, 358 .read_time = m41t80_rtc_read_time,
359 .set_time = m41t80_rtc_set_time, 359 .set_time = m41t80_rtc_set_time,
360 /*
361 * XXX - m41t80 alarm functionality is reported broken.
362 * until it is fixed, don't register alarm functions.
363 *
360 .read_alarm = m41t80_rtc_read_alarm, 364 .read_alarm = m41t80_rtc_read_alarm,
361 .set_alarm = m41t80_rtc_set_alarm, 365 .set_alarm = m41t80_rtc_set_alarm,
366 */
362 .proc = m41t80_rtc_proc, 367 .proc = m41t80_rtc_proc,
368 /*
369 * See above comment on broken alarm
370 *
363 .alarm_irq_enable = m41t80_rtc_alarm_irq_enable, 371 .alarm_irq_enable = m41t80_rtc_alarm_irq_enable,
372 */
364}; 373};
365 374
366#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE) 375#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE)
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 11f07f888223..b79576b64f45 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -55,6 +55,10 @@ static void zfcp_scsi_slave_destroy(struct scsi_device *sdev)
55{ 55{
56 struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); 56 struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
57 57
58 /* if previous slave_alloc returned early, there is nothing to do */
59 if (!zfcp_sdev->port)
60 return;
61
58 zfcp_erp_lun_shutdown_wait(sdev, "scssd_1"); 62 zfcp_erp_lun_shutdown_wait(sdev, "scssd_1");
59 put_device(&zfcp_sdev->port->dev); 63 put_device(&zfcp_sdev->port->dev);
60} 64}
diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c
index 5f94d22c491e..542668292900 100644
--- a/drivers/sbus/char/bbc_i2c.c
+++ b/drivers/sbus/char/bbc_i2c.c
@@ -233,13 +233,9 @@ int bbc_i2c_write_buf(struct bbc_i2c_client *client,
233 int ret = 0; 233 int ret = 0;
234 234
235 while (len > 0) { 235 while (len > 0) {
236 int err = bbc_i2c_writeb(client, *buf, off); 236 ret = bbc_i2c_writeb(client, *buf, off);
237 237 if (ret < 0)
238 if (err < 0) {
239 ret = err;
240 break; 238 break;
241 }
242
243 len--; 239 len--;
244 buf++; 240 buf++;
245 off++; 241 off++;
@@ -253,11 +249,9 @@ int bbc_i2c_read_buf(struct bbc_i2c_client *client,
253 int ret = 0; 249 int ret = 0;
254 250
255 while (len > 0) { 251 while (len > 0) {
256 int err = bbc_i2c_readb(client, buf, off); 252 ret = bbc_i2c_readb(client, buf, off);
257 if (err < 0) { 253 if (ret < 0)
258 ret = err;
259 break; 254 break;
260 }
261 len--; 255 len--;
262 buf++; 256 buf++;
263 off++; 257 off++;
@@ -422,17 +416,6 @@ static struct platform_driver bbc_i2c_driver = {
422 .remove = __devexit_p(bbc_i2c_remove), 416 .remove = __devexit_p(bbc_i2c_remove),
423}; 417};
424 418
425static int __init bbc_i2c_init(void) 419module_platform_driver(bbc_i2c_driver);
426{
427 return platform_driver_register(&bbc_i2c_driver);
428}
429
430static void __exit bbc_i2c_exit(void)
431{
432 platform_driver_unregister(&bbc_i2c_driver);
433}
434
435module_init(bbc_i2c_init);
436module_exit(bbc_i2c_exit);
437 420
438MODULE_LICENSE("GPL"); 421MODULE_LICENSE("GPL");
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 965a1fccd66a..4b9939726c34 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -275,15 +275,4 @@ static struct platform_driver d7s_driver = {
275 .remove = __devexit_p(d7s_remove), 275 .remove = __devexit_p(d7s_remove),
276}; 276};
277 277
278static int __init d7s_init(void) 278module_platform_driver(d7s_driver);
279{
280 return platform_driver_register(&d7s_driver);
281}
282
283static void __exit d7s_exit(void)
284{
285 platform_driver_unregister(&d7s_driver);
286}
287
288module_init(d7s_init);
289module_exit(d7s_exit);
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index be7b4e56154f..339fd6f65eda 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -1138,16 +1138,6 @@ static struct platform_driver envctrl_driver = {
1138 .remove = __devexit_p(envctrl_remove), 1138 .remove = __devexit_p(envctrl_remove),
1139}; 1139};
1140 1140
1141static int __init envctrl_init(void) 1141module_platform_driver(envctrl_driver);
1142{
1143 return platform_driver_register(&envctrl_driver);
1144}
1145
1146static void __exit envctrl_exit(void)
1147{
1148 platform_driver_unregister(&envctrl_driver);
1149}
1150 1142
1151module_init(envctrl_init);
1152module_exit(envctrl_exit);
1153MODULE_LICENSE("GPL"); 1143MODULE_LICENSE("GPL");
diff --git a/drivers/sbus/char/flash.c b/drivers/sbus/char/flash.c
index 73dd4e7afaaa..826157f38694 100644
--- a/drivers/sbus/char/flash.c
+++ b/drivers/sbus/char/flash.c
@@ -216,16 +216,6 @@ static struct platform_driver flash_driver = {
216 .remove = __devexit_p(flash_remove), 216 .remove = __devexit_p(flash_remove),
217}; 217};
218 218
219static int __init flash_init(void) 219module_platform_driver(flash_driver);
220{
221 return platform_driver_register(&flash_driver);
222}
223
224static void __exit flash_cleanup(void)
225{
226 platform_driver_unregister(&flash_driver);
227}
228 220
229module_init(flash_init);
230module_exit(flash_cleanup);
231MODULE_LICENSE("GPL"); 221MODULE_LICENSE("GPL");
diff --git a/drivers/sbus/char/uctrl.c b/drivers/sbus/char/uctrl.c
index ebce9639a26a..0b31658ccde5 100644
--- a/drivers/sbus/char/uctrl.c
+++ b/drivers/sbus/char/uctrl.c
@@ -435,16 +435,6 @@ static struct platform_driver uctrl_driver = {
435}; 435};
436 436
437 437
438static int __init uctrl_init(void) 438module_platform_driver(uctrl_driver);
439{
440 return platform_driver_register(&uctrl_driver);
441}
442
443static void __exit uctrl_exit(void)
444{
445 platform_driver_unregister(&uctrl_driver);
446}
447 439
448module_init(uctrl_init);
449module_exit(uctrl_exit);
450MODULE_LICENSE("GPL"); 440MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index dba72a4e6a1c..1ad0b8225560 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1906,18 +1906,19 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
1906 spin_lock(&session->lock); 1906 spin_lock(&session->lock);
1907 task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data, 1907 task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data,
1908 cqe->itt & ISCSI_CMD_RESPONSE_INDEX); 1908 cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
1909 if (!task) { 1909 if (!task || !task->sc) {
1910 spin_unlock(&session->lock); 1910 spin_unlock(&session->lock);
1911 return -EINVAL; 1911 return -EINVAL;
1912 } 1912 }
1913 sc = task->sc; 1913 sc = task->sc;
1914 spin_unlock(&session->lock);
1915 1914
1916 if (!blk_rq_cpu_valid(sc->request)) 1915 if (!blk_rq_cpu_valid(sc->request))
1917 cpu = smp_processor_id(); 1916 cpu = smp_processor_id();
1918 else 1917 else
1919 cpu = sc->request->cpu; 1918 cpu = sc->request->cpu;
1920 1919
1920 spin_unlock(&session->lock);
1921
1921 p = &per_cpu(bnx2i_percpu, cpu); 1922 p = &per_cpu(bnx2i_percpu, cpu);
1922 spin_lock(&p->p_work_lock); 1923 spin_lock(&p->p_work_lock);
1923 if (unlikely(!p->iothread)) { 1924 if (unlikely(!p->iothread)) {
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index cefbe44bb84a..8d67467dd9ce 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -31,6 +31,8 @@
31#include <linux/sysfs.h> 31#include <linux/sysfs.h>
32#include <linux/ctype.h> 32#include <linux/ctype.h>
33#include <linux/workqueue.h> 33#include <linux/workqueue.h>
34#include <net/dcbnl.h>
35#include <net/dcbevent.h>
34#include <scsi/scsi_tcq.h> 36#include <scsi/scsi_tcq.h>
35#include <scsi/scsicam.h> 37#include <scsi/scsicam.h>
36#include <scsi/scsi_transport.h> 38#include <scsi/scsi_transport.h>
@@ -101,6 +103,8 @@ static int fcoe_ddp_done(struct fc_lport *, u16);
101static int fcoe_ddp_target(struct fc_lport *, u16, struct scatterlist *, 103static int fcoe_ddp_target(struct fc_lport *, u16, struct scatterlist *,
102 unsigned int); 104 unsigned int);
103static int fcoe_cpu_callback(struct notifier_block *, unsigned long, void *); 105static int fcoe_cpu_callback(struct notifier_block *, unsigned long, void *);
106static int fcoe_dcb_app_notification(struct notifier_block *notifier,
107 ulong event, void *ptr);
104 108
105static bool fcoe_match(struct net_device *netdev); 109static bool fcoe_match(struct net_device *netdev);
106static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode); 110static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode);
@@ -129,6 +133,11 @@ static struct notifier_block fcoe_cpu_notifier = {
129 .notifier_call = fcoe_cpu_callback, 133 .notifier_call = fcoe_cpu_callback,
130}; 134};
131 135
136/* notification function for DCB events */
137static struct notifier_block dcb_notifier = {
138 .notifier_call = fcoe_dcb_app_notification,
139};
140
132static struct scsi_transport_template *fcoe_nport_scsi_transport; 141static struct scsi_transport_template *fcoe_nport_scsi_transport;
133static struct scsi_transport_template *fcoe_vport_scsi_transport; 142static struct scsi_transport_template *fcoe_vport_scsi_transport;
134 143
@@ -1522,6 +1531,8 @@ int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp)
1522 skb_reset_network_header(skb); 1531 skb_reset_network_header(skb);
1523 skb->mac_len = elen; 1532 skb->mac_len = elen;
1524 skb->protocol = htons(ETH_P_FCOE); 1533 skb->protocol = htons(ETH_P_FCOE);
1534 skb->priority = port->priority;
1535
1525 if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN && 1536 if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN &&
1526 fcoe->realdev->features & NETIF_F_HW_VLAN_TX) { 1537 fcoe->realdev->features & NETIF_F_HW_VLAN_TX) {
1527 skb->vlan_tci = VLAN_TAG_PRESENT | 1538 skb->vlan_tci = VLAN_TAG_PRESENT |
@@ -1624,6 +1635,7 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
1624 stats->InvalidCRCCount++; 1635 stats->InvalidCRCCount++;
1625 if (stats->InvalidCRCCount < 5) 1636 if (stats->InvalidCRCCount < 5)
1626 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); 1637 printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
1638 put_cpu();
1627 return -EINVAL; 1639 return -EINVAL;
1628} 1640}
1629 1641
@@ -1746,6 +1758,7 @@ int fcoe_percpu_receive_thread(void *arg)
1746 */ 1758 */
1747static void fcoe_dev_setup(void) 1759static void fcoe_dev_setup(void)
1748{ 1760{
1761 register_dcbevent_notifier(&dcb_notifier);
1749 register_netdevice_notifier(&fcoe_notifier); 1762 register_netdevice_notifier(&fcoe_notifier);
1750} 1763}
1751 1764
@@ -1754,9 +1767,69 @@ static void fcoe_dev_setup(void)
1754 */ 1767 */
1755static void fcoe_dev_cleanup(void) 1768static void fcoe_dev_cleanup(void)
1756{ 1769{
1770 unregister_dcbevent_notifier(&dcb_notifier);
1757 unregister_netdevice_notifier(&fcoe_notifier); 1771 unregister_netdevice_notifier(&fcoe_notifier);
1758} 1772}
1759 1773
1774static struct fcoe_interface *
1775fcoe_hostlist_lookup_realdev_port(struct net_device *netdev)
1776{
1777 struct fcoe_interface *fcoe;
1778 struct net_device *real_dev;
1779
1780 list_for_each_entry(fcoe, &fcoe_hostlist, list) {
1781 if (fcoe->netdev->priv_flags & IFF_802_1Q_VLAN)
1782 real_dev = vlan_dev_real_dev(fcoe->netdev);
1783 else
1784 real_dev = fcoe->netdev;
1785
1786 if (netdev == real_dev)
1787 return fcoe;
1788 }
1789 return NULL;
1790}
1791
1792static int fcoe_dcb_app_notification(struct notifier_block *notifier,
1793 ulong event, void *ptr)
1794{
1795 struct dcb_app_type *entry = ptr;
1796 struct fcoe_interface *fcoe;
1797 struct net_device *netdev;
1798 struct fcoe_port *port;
1799 int prio;
1800
1801 if (entry->app.selector != DCB_APP_IDTYPE_ETHTYPE)
1802 return NOTIFY_OK;
1803
1804 netdev = dev_get_by_index(&init_net, entry->ifindex);
1805 if (!netdev)
1806 return NOTIFY_OK;
1807
1808 fcoe = fcoe_hostlist_lookup_realdev_port(netdev);
1809 dev_put(netdev);
1810 if (!fcoe)
1811 return NOTIFY_OK;
1812
1813 if (entry->dcbx & DCB_CAP_DCBX_VER_CEE)
1814 prio = ffs(entry->app.priority) - 1;
1815 else
1816 prio = entry->app.priority;
1817
1818 if (prio < 0)
1819 return NOTIFY_OK;
1820
1821 if (entry->app.protocol == ETH_P_FIP ||
1822 entry->app.protocol == ETH_P_FCOE)
1823 fcoe->ctlr.priority = prio;
1824
1825 if (entry->app.protocol == ETH_P_FCOE) {
1826 port = lport_priv(fcoe->ctlr.lp);
1827 port->priority = prio;
1828 }
1829
1830 return NOTIFY_OK;
1831}
1832
1760/** 1833/**
1761 * fcoe_device_notification() - Handler for net device events 1834 * fcoe_device_notification() - Handler for net device events
1762 * @notifier: The context of the notification 1835 * @notifier: The context of the notification
@@ -1965,6 +2038,46 @@ static bool fcoe_match(struct net_device *netdev)
1965} 2038}
1966 2039
1967/** 2040/**
2041 * fcoe_dcb_create() - Initialize DCB attributes and hooks
2042 * @netdev: The net_device object of the L2 link that should be queried
2043 * @port: The fcoe_port to bind FCoE APP priority with
2044 * @
2045 */
2046static void fcoe_dcb_create(struct fcoe_interface *fcoe)
2047{
2048#ifdef CONFIG_DCB
2049 int dcbx;
2050 u8 fup, up;
2051 struct net_device *netdev = fcoe->realdev;
2052 struct fcoe_port *port = lport_priv(fcoe->ctlr.lp);
2053 struct dcb_app app = {
2054 .priority = 0,
2055 .protocol = ETH_P_FCOE
2056 };
2057
2058 /* setup DCB priority attributes. */
2059 if (netdev && netdev->dcbnl_ops && netdev->dcbnl_ops->getdcbx) {
2060 dcbx = netdev->dcbnl_ops->getdcbx(netdev);
2061
2062 if (dcbx & DCB_CAP_DCBX_VER_IEEE) {
2063 app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
2064 up = dcb_ieee_getapp_mask(netdev, &app);
2065 app.protocol = ETH_P_FIP;
2066 fup = dcb_ieee_getapp_mask(netdev, &app);
2067 } else {
2068 app.selector = DCB_APP_IDTYPE_ETHTYPE;
2069 up = dcb_getapp(netdev, &app);
2070 app.protocol = ETH_P_FIP;
2071 fup = dcb_getapp(netdev, &app);
2072 }
2073
2074 port->priority = ffs(up) ? ffs(up) - 1 : 0;
2075 fcoe->ctlr.priority = ffs(fup) ? ffs(fup) - 1 : port->priority;
2076 }
2077#endif
2078}
2079
2080/**
1968 * fcoe_create() - Create a fcoe interface 2081 * fcoe_create() - Create a fcoe interface
1969 * @netdev : The net_device object the Ethernet interface to create on 2082 * @netdev : The net_device object the Ethernet interface to create on
1970 * @fip_mode: The FIP mode for this creation 2083 * @fip_mode: The FIP mode for this creation
@@ -2007,6 +2120,9 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
2007 /* Make this the "master" N_Port */ 2120 /* Make this the "master" N_Port */
2008 fcoe->ctlr.lp = lport; 2121 fcoe->ctlr.lp = lport;
2009 2122
2123 /* setup DCB priority attributes. */
2124 fcoe_dcb_create(fcoe);
2125
2010 /* add to lports list */ 2126 /* add to lports list */
2011 fcoe_hostlist_add(lport); 2127 fcoe_hostlist_add(lport);
2012 2128
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index c74c4b8e71ef..e7522dcc296e 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -320,6 +320,7 @@ static void fcoe_ctlr_solicit(struct fcoe_ctlr *fip, struct fcoe_fcf *fcf)
320 320
321 skb_put(skb, sizeof(*sol)); 321 skb_put(skb, sizeof(*sol));
322 skb->protocol = htons(ETH_P_FIP); 322 skb->protocol = htons(ETH_P_FIP);
323 skb->priority = fip->priority;
323 skb_reset_mac_header(skb); 324 skb_reset_mac_header(skb);
324 skb_reset_network_header(skb); 325 skb_reset_network_header(skb);
325 fip->send(fip, skb); 326 fip->send(fip, skb);
@@ -474,6 +475,7 @@ static void fcoe_ctlr_send_keep_alive(struct fcoe_ctlr *fip,
474 } 475 }
475 skb_put(skb, len); 476 skb_put(skb, len);
476 skb->protocol = htons(ETH_P_FIP); 477 skb->protocol = htons(ETH_P_FIP);
478 skb->priority = fip->priority;
477 skb_reset_mac_header(skb); 479 skb_reset_mac_header(skb);
478 skb_reset_network_header(skb); 480 skb_reset_network_header(skb);
479 fip->send(fip, skb); 481 fip->send(fip, skb);
@@ -566,6 +568,7 @@ static int fcoe_ctlr_encaps(struct fcoe_ctlr *fip, struct fc_lport *lport,
566 cap->fip.fip_dl_len = htons(dlen / FIP_BPW); 568 cap->fip.fip_dl_len = htons(dlen / FIP_BPW);
567 569
568 skb->protocol = htons(ETH_P_FIP); 570 skb->protocol = htons(ETH_P_FIP);
571 skb->priority = fip->priority;
569 skb_reset_mac_header(skb); 572 skb_reset_mac_header(skb);
570 skb_reset_network_header(skb); 573 skb_reset_network_header(skb);
571 return 0; 574 return 0;
@@ -1911,6 +1914,7 @@ static void fcoe_ctlr_vn_send(struct fcoe_ctlr *fip,
1911 1914
1912 skb_put(skb, len); 1915 skb_put(skb, len);
1913 skb->protocol = htons(ETH_P_FIP); 1916 skb->protocol = htons(ETH_P_FIP);
1917 skb->priority = fip->priority;
1914 skb_reset_mac_header(skb); 1918 skb_reset_mac_header(skb);
1915 skb_reset_network_header(skb); 1919 skb_reset_network_header(skb);
1916 1920
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 4e041f6d808c..d570573b7963 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -4335,7 +4335,7 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle)
4335 /* insert into event log */ 4335 /* insert into event log */
4336 sz = offsetof(Mpi2EventNotificationReply_t, EventData) + 4336 sz = offsetof(Mpi2EventNotificationReply_t, EventData) +
4337 sizeof(Mpi2EventDataSasDeviceStatusChange_t); 4337 sizeof(Mpi2EventDataSasDeviceStatusChange_t);
4338 event_reply = kzalloc(sz, GFP_KERNEL); 4338 event_reply = kzalloc(sz, GFP_ATOMIC);
4339 if (!event_reply) { 4339 if (!event_reply) {
4340 printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", 4340 printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
4341 ioc->name, __FILE__, __LINE__, __func__); 4341 ioc->name, __FILE__, __LINE__, __func__);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index ac326c41e931..6465dae5883a 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1762,12 +1762,31 @@ qla2x00_get_host_port_state(struct Scsi_Host *shost)
1762 scsi_qla_host_t *vha = shost_priv(shost); 1762 scsi_qla_host_t *vha = shost_priv(shost);
1763 struct scsi_qla_host *base_vha = pci_get_drvdata(vha->hw->pdev); 1763 struct scsi_qla_host *base_vha = pci_get_drvdata(vha->hw->pdev);
1764 1764
1765 if (!base_vha->flags.online) 1765 if (!base_vha->flags.online) {
1766 fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; 1766 fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
1767 else if (atomic_read(&base_vha->loop_state) == LOOP_TIMEOUT) 1767 return;
1768 fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN; 1768 }
1769 else 1769
1770 switch (atomic_read(&base_vha->loop_state)) {
1771 case LOOP_UPDATE:
1772 fc_host_port_state(shost) = FC_PORTSTATE_DIAGNOSTICS;
1773 break;
1774 case LOOP_DOWN:
1775 if (test_bit(LOOP_RESYNC_NEEDED, &base_vha->dpc_flags))
1776 fc_host_port_state(shost) = FC_PORTSTATE_DIAGNOSTICS;
1777 else
1778 fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
1779 break;
1780 case LOOP_DEAD:
1781 fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
1782 break;
1783 case LOOP_READY:
1770 fc_host_port_state(shost) = FC_PORTSTATE_ONLINE; 1784 fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
1785 break;
1786 default:
1787 fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN;
1788 break;
1789 }
1771} 1790}
1772 1791
1773static int 1792static int
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 9df4787715c0..f3cddd5800c3 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -12,17 +12,17 @@
12 * | Level | Last Value Used | Holes | 12 * | Level | Last Value Used | Holes |
13 * ---------------------------------------------------------------------- 13 * ----------------------------------------------------------------------
14 * | Module Init and Probe | 0x0116 | | 14 * | Module Init and Probe | 0x0116 | |
15 * | Mailbox commands | 0x1129 | | 15 * | Mailbox commands | 0x112b | |
16 * | Device Discovery | 0x2083 | | 16 * | Device Discovery | 0x2083 | |
17 * | Queue Command and IO tracing | 0x302e | 0x3008 | 17 * | Queue Command and IO tracing | 0x302e | 0x3008 |
18 * | DPC Thread | 0x401c | | 18 * | DPC Thread | 0x401c | |
19 * | Async Events | 0x5059 | | 19 * | Async Events | 0x5059 | |
20 * | Timer Routines | 0x600d | | 20 * | Timer Routines | 0x6010 | 0x600e,0x600f |
21 * | User Space Interactions | 0x709d | | 21 * | User Space Interactions | 0x709d | |
22 * | Task Management | 0x8041 | | 22 * | Task Management | 0x8041 | 0x800b |
23 * | AER/EEH | 0x900f | | 23 * | AER/EEH | 0x900f | |
24 * | Virtual Port | 0xa007 | | 24 * | Virtual Port | 0xa007 | |
25 * | ISP82XX Specific | 0xb051 | | 25 * | ISP82XX Specific | 0xb052 | |
26 * | MultiQ | 0xc00b | | 26 * | MultiQ | 0xc00b | |
27 * | Misc | 0xd00b | | 27 * | Misc | 0xd00b | |
28 * ---------------------------------------------------------------------- 28 * ----------------------------------------------------------------------
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index ce32d8135c9e..c0c11afb685c 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -578,6 +578,7 @@ extern int qla82xx_check_md_needed(scsi_qla_host_t *);
578extern void qla82xx_chip_reset_cleanup(scsi_qla_host_t *); 578extern void qla82xx_chip_reset_cleanup(scsi_qla_host_t *);
579extern int qla82xx_mbx_beacon_ctl(scsi_qla_host_t *, int); 579extern int qla82xx_mbx_beacon_ctl(scsi_qla_host_t *, int);
580extern char *qdev_state(uint32_t); 580extern char *qdev_state(uint32_t);
581extern void qla82xx_clear_pending_mbx(scsi_qla_host_t *);
581 582
582/* BSG related functions */ 583/* BSG related functions */
583extern int qla24xx_bsg_request(struct fc_bsg_job *); 584extern int qla24xx_bsg_request(struct fc_bsg_job *);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index f03e915f1877..54ea68cec4c5 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1509,7 +1509,8 @@ enable_82xx_npiv:
1509 &ha->fw_xcb_count, NULL, NULL, 1509 &ha->fw_xcb_count, NULL, NULL,
1510 &ha->max_npiv_vports, NULL); 1510 &ha->max_npiv_vports, NULL);
1511 1511
1512 if (!fw_major_version && ql2xallocfwdump) 1512 if (!fw_major_version && ql2xallocfwdump
1513 && !IS_QLA82XX(ha))
1513 qla2x00_alloc_fw_dump(vha); 1514 qla2x00_alloc_fw_dump(vha);
1514 } 1515 }
1515 } else { 1516 } else {
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index dbec89622a0f..a4b267e60a35 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -120,11 +120,10 @@ qla2x00_prep_cont_type0_iocb(struct scsi_qla_host *vha)
120 * Returns a pointer to the continuation type 1 IOCB packet. 120 * Returns a pointer to the continuation type 1 IOCB packet.
121 */ 121 */
122static inline cont_a64_entry_t * 122static inline cont_a64_entry_t *
123qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha) 123qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *vha, struct req_que *req)
124{ 124{
125 cont_a64_entry_t *cont_pkt; 125 cont_a64_entry_t *cont_pkt;
126 126
127 struct req_que *req = vha->req;
128 /* Adjust ring index. */ 127 /* Adjust ring index. */
129 req->ring_index++; 128 req->ring_index++;
130 if (req->ring_index == req->length) { 129 if (req->ring_index == req->length) {
@@ -292,7 +291,7 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
292 * Five DSDs are available in the Continuation 291 * Five DSDs are available in the Continuation
293 * Type 1 IOCB. 292 * Type 1 IOCB.
294 */ 293 */
295 cont_pkt = qla2x00_prep_cont_type1_iocb(vha); 294 cont_pkt = qla2x00_prep_cont_type1_iocb(vha, vha->req);
296 cur_dsd = (uint32_t *)cont_pkt->dseg_0_address; 295 cur_dsd = (uint32_t *)cont_pkt->dseg_0_address;
297 avail_dsds = 5; 296 avail_dsds = 5;
298 } 297 }
@@ -684,7 +683,7 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
684 * Five DSDs are available in the Continuation 683 * Five DSDs are available in the Continuation
685 * Type 1 IOCB. 684 * Type 1 IOCB.
686 */ 685 */
687 cont_pkt = qla2x00_prep_cont_type1_iocb(vha); 686 cont_pkt = qla2x00_prep_cont_type1_iocb(vha, vha->req);
688 cur_dsd = (uint32_t *)cont_pkt->dseg_0_address; 687 cur_dsd = (uint32_t *)cont_pkt->dseg_0_address;
689 avail_dsds = 5; 688 avail_dsds = 5;
690 } 689 }
@@ -2070,7 +2069,8 @@ qla2x00_ct_iocb(srb_t *sp, ms_iocb_entry_t *ct_iocb)
2070 * Five DSDs are available in the Cont. 2069 * Five DSDs are available in the Cont.
2071 * Type 1 IOCB. 2070 * Type 1 IOCB.
2072 */ 2071 */
2073 cont_pkt = qla2x00_prep_cont_type1_iocb(vha); 2072 cont_pkt = qla2x00_prep_cont_type1_iocb(vha,
2073 vha->hw->req_q_map[0]);
2074 cur_dsd = (uint32_t *) cont_pkt->dseg_0_address; 2074 cur_dsd = (uint32_t *) cont_pkt->dseg_0_address;
2075 avail_dsds = 5; 2075 avail_dsds = 5;
2076 cont_iocb_prsnt = 1; 2076 cont_iocb_prsnt = 1;
@@ -2096,6 +2096,7 @@ qla24xx_ct_iocb(srb_t *sp, struct ct_entry_24xx *ct_iocb)
2096 int index; 2096 int index;
2097 uint16_t tot_dsds; 2097 uint16_t tot_dsds;
2098 scsi_qla_host_t *vha = sp->fcport->vha; 2098 scsi_qla_host_t *vha = sp->fcport->vha;
2099 struct qla_hw_data *ha = vha->hw;
2099 struct fc_bsg_job *bsg_job = ((struct srb_ctx *)sp->ctx)->u.bsg_job; 2100 struct fc_bsg_job *bsg_job = ((struct srb_ctx *)sp->ctx)->u.bsg_job;
2100 int loop_iterartion = 0; 2101 int loop_iterartion = 0;
2101 int cont_iocb_prsnt = 0; 2102 int cont_iocb_prsnt = 0;
@@ -2141,7 +2142,8 @@ qla24xx_ct_iocb(srb_t *sp, struct ct_entry_24xx *ct_iocb)
2141 * Five DSDs are available in the Cont. 2142 * Five DSDs are available in the Cont.
2142 * Type 1 IOCB. 2143 * Type 1 IOCB.
2143 */ 2144 */
2144 cont_pkt = qla2x00_prep_cont_type1_iocb(vha); 2145 cont_pkt = qla2x00_prep_cont_type1_iocb(vha,
2146 ha->req_q_map[0]);
2145 cur_dsd = (uint32_t *) cont_pkt->dseg_0_address; 2147 cur_dsd = (uint32_t *) cont_pkt->dseg_0_address;
2146 avail_dsds = 5; 2148 avail_dsds = 5;
2147 cont_iocb_prsnt = 1; 2149 cont_iocb_prsnt = 1;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 2516adf1aeea..7b91b290ffd6 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1741,7 +1741,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
1741 resid, scsi_bufflen(cp)); 1741 resid, scsi_bufflen(cp));
1742 1742
1743 cp->result = DID_ERROR << 16 | lscsi_status; 1743 cp->result = DID_ERROR << 16 | lscsi_status;
1744 break; 1744 goto check_scsi_status;
1745 } 1745 }
1746 1746
1747 if (!lscsi_status && 1747 if (!lscsi_status &&
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 3b3cec9f6ac2..82a33533ed26 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -79,8 +79,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
79 mcp->mb[0] = MBS_LINK_DOWN_ERROR; 79 mcp->mb[0] = MBS_LINK_DOWN_ERROR;
80 ql_log(ql_log_warn, base_vha, 0x1004, 80 ql_log(ql_log_warn, base_vha, 0x1004,
81 "FW hung = %d.\n", ha->flags.isp82xx_fw_hung); 81 "FW hung = %d.\n", ha->flags.isp82xx_fw_hung);
82 rval = QLA_FUNCTION_FAILED; 82 return QLA_FUNCTION_TIMEOUT;
83 goto premature_exit;
84 } 83 }
85 84
86 /* 85 /*
@@ -163,6 +162,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
163 HINT_MBX_INT_PENDING) { 162 HINT_MBX_INT_PENDING) {
164 spin_unlock_irqrestore(&ha->hardware_lock, 163 spin_unlock_irqrestore(&ha->hardware_lock,
165 flags); 164 flags);
165 ha->flags.mbox_busy = 0;
166 ql_dbg(ql_dbg_mbx, base_vha, 0x1010, 166 ql_dbg(ql_dbg_mbx, base_vha, 0x1010,
167 "Pending mailbox timeout, exiting.\n"); 167 "Pending mailbox timeout, exiting.\n");
168 rval = QLA_FUNCTION_TIMEOUT; 168 rval = QLA_FUNCTION_TIMEOUT;
@@ -188,6 +188,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
188 HINT_MBX_INT_PENDING) { 188 HINT_MBX_INT_PENDING) {
189 spin_unlock_irqrestore(&ha->hardware_lock, 189 spin_unlock_irqrestore(&ha->hardware_lock,
190 flags); 190 flags);
191 ha->flags.mbox_busy = 0;
191 ql_dbg(ql_dbg_mbx, base_vha, 0x1012, 192 ql_dbg(ql_dbg_mbx, base_vha, 0x1012,
192 "Pending mailbox timeout, exiting.\n"); 193 "Pending mailbox timeout, exiting.\n");
193 rval = QLA_FUNCTION_TIMEOUT; 194 rval = QLA_FUNCTION_TIMEOUT;
@@ -302,7 +303,15 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
302 if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) && 303 if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) &&
303 !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && 304 !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
304 !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { 305 !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
305 306 if (IS_QLA82XX(ha)) {
307 ql_dbg(ql_dbg_mbx, vha, 0x112a,
308 "disabling pause transmit on port "
309 "0 & 1.\n");
310 qla82xx_wr_32(ha,
311 QLA82XX_CRB_NIU + 0x98,
312 CRB_NIU_XG_PAUSE_CTL_P0|
313 CRB_NIU_XG_PAUSE_CTL_P1);
314 }
306 ql_log(ql_log_info, base_vha, 0x101c, 315 ql_log(ql_log_info, base_vha, 0x101c,
307 "Mailbox cmd timeout occured. " 316 "Mailbox cmd timeout occured. "
308 "Scheduling ISP abort eeh_busy=0x%x.\n", 317 "Scheduling ISP abort eeh_busy=0x%x.\n",
@@ -318,7 +327,15 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
318 if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) && 327 if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) &&
319 !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && 328 !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
320 !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { 329 !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
321 330 if (IS_QLA82XX(ha)) {
331 ql_dbg(ql_dbg_mbx, vha, 0x112b,
332 "disabling pause transmit on port "
333 "0 & 1.\n");
334 qla82xx_wr_32(ha,
335 QLA82XX_CRB_NIU + 0x98,
336 CRB_NIU_XG_PAUSE_CTL_P0|
337 CRB_NIU_XG_PAUSE_CTL_P1);
338 }
322 ql_log(ql_log_info, base_vha, 0x101e, 339 ql_log(ql_log_info, base_vha, 0x101e,
323 "Mailbox cmd timeout occured. " 340 "Mailbox cmd timeout occured. "
324 "Scheduling ISP abort.\n"); 341 "Scheduling ISP abort.\n");
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 94bded5ddce4..03554934b0a5 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -3817,6 +3817,20 @@ exit:
3817 return rval; 3817 return rval;
3818} 3818}
3819 3819
3820void qla82xx_clear_pending_mbx(scsi_qla_host_t *vha)
3821{
3822 struct qla_hw_data *ha = vha->hw;
3823
3824 if (ha->flags.mbox_busy) {
3825 ha->flags.mbox_int = 1;
3826 ha->flags.mbox_busy = 0;
3827 ql_log(ql_log_warn, vha, 0x6010,
3828 "Doing premature completion of mbx command.\n");
3829 if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags))
3830 complete(&ha->mbx_intr_comp);
3831 }
3832}
3833
3820void qla82xx_watchdog(scsi_qla_host_t *vha) 3834void qla82xx_watchdog(scsi_qla_host_t *vha)
3821{ 3835{
3822 uint32_t dev_state, halt_status; 3836 uint32_t dev_state, halt_status;
@@ -3839,9 +3853,13 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
3839 qla2xxx_wake_dpc(vha); 3853 qla2xxx_wake_dpc(vha);
3840 } else { 3854 } else {
3841 if (qla82xx_check_fw_alive(vha)) { 3855 if (qla82xx_check_fw_alive(vha)) {
3856 ql_dbg(ql_dbg_timer, vha, 0x6011,
3857 "disabling pause transmit on port 0 & 1.\n");
3858 qla82xx_wr_32(ha, QLA82XX_CRB_NIU + 0x98,
3859 CRB_NIU_XG_PAUSE_CTL_P0|CRB_NIU_XG_PAUSE_CTL_P1);
3842 halt_status = qla82xx_rd_32(ha, 3860 halt_status = qla82xx_rd_32(ha,
3843 QLA82XX_PEG_HALT_STATUS1); 3861 QLA82XX_PEG_HALT_STATUS1);
3844 ql_dbg(ql_dbg_timer, vha, 0x6005, 3862 ql_log(ql_log_info, vha, 0x6005,
3845 "dumping hw/fw registers:.\n " 3863 "dumping hw/fw registers:.\n "
3846 " PEG_HALT_STATUS1: 0x%x, PEG_HALT_STATUS2: 0x%x,.\n " 3864 " PEG_HALT_STATUS1: 0x%x, PEG_HALT_STATUS2: 0x%x,.\n "
3847 " PEG_NET_0_PC: 0x%x, PEG_NET_1_PC: 0x%x,.\n " 3865 " PEG_NET_0_PC: 0x%x, PEG_NET_1_PC: 0x%x,.\n "
@@ -3858,6 +3876,11 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
3858 QLA82XX_CRB_PEG_NET_3 + 0x3c), 3876 QLA82XX_CRB_PEG_NET_3 + 0x3c),
3859 qla82xx_rd_32(ha, 3877 qla82xx_rd_32(ha,
3860 QLA82XX_CRB_PEG_NET_4 + 0x3c)); 3878 QLA82XX_CRB_PEG_NET_4 + 0x3c));
3879 if (LSW(MSB(halt_status)) == 0x67)
3880 ql_log(ql_log_warn, vha, 0xb052,
3881 "Firmware aborted with "
3882 "error code 0x00006700. Device is "
3883 "being reset.\n");
3861 if (halt_status & HALT_STATUS_UNRECOVERABLE) { 3884 if (halt_status & HALT_STATUS_UNRECOVERABLE) {
3862 set_bit(ISP_UNRECOVERABLE, 3885 set_bit(ISP_UNRECOVERABLE,
3863 &vha->dpc_flags); 3886 &vha->dpc_flags);
@@ -3869,16 +3892,8 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
3869 } 3892 }
3870 qla2xxx_wake_dpc(vha); 3893 qla2xxx_wake_dpc(vha);
3871 ha->flags.isp82xx_fw_hung = 1; 3894 ha->flags.isp82xx_fw_hung = 1;
3872 if (ha->flags.mbox_busy) { 3895 ql_log(ql_log_warn, vha, 0x6007, "Firmware hung.\n");
3873 ha->flags.mbox_int = 1; 3896 qla82xx_clear_pending_mbx(vha);
3874 ql_log(ql_log_warn, vha, 0x6007,
3875 "Due to FW hung, doing "
3876 "premature completion of mbx "
3877 "command.\n");
3878 if (test_bit(MBX_INTR_WAIT,
3879 &ha->mbx_cmd_flags))
3880 complete(&ha->mbx_intr_comp);
3881 }
3882 } 3897 }
3883 } 3898 }
3884 } 3899 }
@@ -4073,10 +4088,7 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha)
4073 msleep(1000); 4088 msleep(1000);
4074 if (qla82xx_check_fw_alive(vha)) { 4089 if (qla82xx_check_fw_alive(vha)) {
4075 ha->flags.isp82xx_fw_hung = 1; 4090 ha->flags.isp82xx_fw_hung = 1;
4076 if (ha->flags.mbox_busy) { 4091 qla82xx_clear_pending_mbx(vha);
4077 ha->flags.mbox_int = 1;
4078 complete(&ha->mbx_intr_comp);
4079 }
4080 break; 4092 break;
4081 } 4093 }
4082 } 4094 }
diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h
index 57820c199bc2..57a226be339a 100644
--- a/drivers/scsi/qla2xxx/qla_nx.h
+++ b/drivers/scsi/qla2xxx/qla_nx.h
@@ -1173,4 +1173,8 @@ struct qla82xx_md_entry_queue {
1173 1173
1174static const int MD_MIU_TEST_AGT_RDDATA[] = { 0x410000A8, 0x410000AC, 1174static const int MD_MIU_TEST_AGT_RDDATA[] = { 0x410000A8, 0x410000AC,
1175 0x410000B8, 0x410000BC }; 1175 0x410000B8, 0x410000BC };
1176
1177#define CRB_NIU_XG_PAUSE_CTL_P0 0x1
1178#define CRB_NIU_XG_PAUSE_CTL_P1 0x8
1179
1176#endif 1180#endif
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index fd14c7bfc626..f9e5b85e84d8 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -201,12 +201,12 @@ MODULE_PARM_DESC(ql2xmdcapmask,
201 "Set the Minidump driver capture mask level. " 201 "Set the Minidump driver capture mask level. "
202 "Default is 0x7F - Can be set to 0x3, 0x7, 0xF, 0x1F, 0x7F."); 202 "Default is 0x7F - Can be set to 0x3, 0x7, 0xF, 0x1F, 0x7F.");
203 203
204int ql2xmdenable; 204int ql2xmdenable = 1;
205module_param(ql2xmdenable, int, S_IRUGO); 205module_param(ql2xmdenable, int, S_IRUGO);
206MODULE_PARM_DESC(ql2xmdenable, 206MODULE_PARM_DESC(ql2xmdenable,
207 "Enable/disable MiniDump. " 207 "Enable/disable MiniDump. "
208 "0 (Default) - MiniDump disabled. " 208 "0 - MiniDump disabled. "
209 "1 - MiniDump enabled."); 209 "1 (Default) - MiniDump enabled.");
210 210
211/* 211/*
212 * SCSI host template entry points 212 * SCSI host template entry points
@@ -423,6 +423,7 @@ fail2:
423 qla25xx_delete_queues(vha); 423 qla25xx_delete_queues(vha);
424 destroy_workqueue(ha->wq); 424 destroy_workqueue(ha->wq);
425 ha->wq = NULL; 425 ha->wq = NULL;
426 vha->req = ha->req_q_map[0];
426fail: 427fail:
427 ha->mqenable = 0; 428 ha->mqenable = 0;
428 kfree(ha->req_q_map); 429 kfree(ha->req_q_map);
@@ -814,49 +815,6 @@ qla2x00_wait_for_chip_reset(scsi_qla_host_t *vha)
814 return return_status; 815 return return_status;
815} 816}
816 817
817/*
818 * qla2x00_wait_for_loop_ready
819 * Wait for MAX_LOOP_TIMEOUT(5 min) value for loop
820 * to be in LOOP_READY state.
821 * Input:
822 * ha - pointer to host adapter structure
823 *
824 * Note:
825 * Does context switching-Release SPIN_LOCK
826 * (if any) before calling this routine.
827 *
828 *
829 * Return:
830 * Success (LOOP_READY) : 0
831 * Failed (LOOP_NOT_READY) : 1
832 */
833static inline int
834qla2x00_wait_for_loop_ready(scsi_qla_host_t *vha)
835{
836 int return_status = QLA_SUCCESS;
837 unsigned long loop_timeout ;
838 struct qla_hw_data *ha = vha->hw;
839 scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
840
841 /* wait for 5 min at the max for loop to be ready */
842 loop_timeout = jiffies + (MAX_LOOP_TIMEOUT * HZ);
843
844 while ((!atomic_read(&base_vha->loop_down_timer) &&
845 atomic_read(&base_vha->loop_state) == LOOP_DOWN) ||
846 atomic_read(&base_vha->loop_state) != LOOP_READY) {
847 if (atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
848 return_status = QLA_FUNCTION_FAILED;
849 break;
850 }
851 msleep(1000);
852 if (time_after_eq(jiffies, loop_timeout)) {
853 return_status = QLA_FUNCTION_FAILED;
854 break;
855 }
856 }
857 return (return_status);
858}
859
860static void 818static void
861sp_get(struct srb *sp) 819sp_get(struct srb *sp)
862{ 820{
@@ -1035,12 +993,6 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type,
1035 "Wait for hba online failed for cmd=%p.\n", cmd); 993 "Wait for hba online failed for cmd=%p.\n", cmd);
1036 goto eh_reset_failed; 994 goto eh_reset_failed;
1037 } 995 }
1038 err = 1;
1039 if (qla2x00_wait_for_loop_ready(vha) != QLA_SUCCESS) {
1040 ql_log(ql_log_warn, vha, 0x800b,
1041 "Wait for loop ready failed for cmd=%p.\n", cmd);
1042 goto eh_reset_failed;
1043 }
1044 err = 2; 996 err = 2;
1045 if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1) 997 if (do_reset(fcport, cmd->device->lun, cmd->request->cpu + 1)
1046 != QLA_SUCCESS) { 998 != QLA_SUCCESS) {
@@ -1137,10 +1089,9 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd)
1137 goto eh_bus_reset_done; 1089 goto eh_bus_reset_done;
1138 } 1090 }
1139 1091
1140 if (qla2x00_wait_for_loop_ready(vha) == QLA_SUCCESS) { 1092 if (qla2x00_loop_reset(vha) == QLA_SUCCESS)
1141 if (qla2x00_loop_reset(vha) == QLA_SUCCESS) 1093 ret = SUCCESS;
1142 ret = SUCCESS; 1094
1143 }
1144 if (ret == FAILED) 1095 if (ret == FAILED)
1145 goto eh_bus_reset_done; 1096 goto eh_bus_reset_done;
1146 1097
@@ -1206,15 +1157,6 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd)
1206 if (qla2x00_wait_for_reset_ready(vha) != QLA_SUCCESS) 1157 if (qla2x00_wait_for_reset_ready(vha) != QLA_SUCCESS)
1207 goto eh_host_reset_lock; 1158 goto eh_host_reset_lock;
1208 1159
1209 /*
1210 * Fixme-may be dpc thread is active and processing
1211 * loop_resync,so wait a while for it to
1212 * be completed and then issue big hammer.Otherwise
1213 * it may cause I/O failure as big hammer marks the
1214 * devices as lost kicking of the port_down_timer
1215 * while dpc is stuck for the mailbox to complete.
1216 */
1217 qla2x00_wait_for_loop_ready(vha);
1218 if (vha != base_vha) { 1160 if (vha != base_vha) {
1219 if (qla2x00_vp_abort_isp(vha)) 1161 if (qla2x00_vp_abort_isp(vha))
1220 goto eh_host_reset_lock; 1162 goto eh_host_reset_lock;
@@ -1297,16 +1239,13 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
1297 atomic_set(&vha->loop_state, LOOP_DOWN); 1239 atomic_set(&vha->loop_state, LOOP_DOWN);
1298 atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); 1240 atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
1299 qla2x00_mark_all_devices_lost(vha, 0); 1241 qla2x00_mark_all_devices_lost(vha, 0);
1300 qla2x00_wait_for_loop_ready(vha);
1301 } 1242 }
1302 1243
1303 if (ha->flags.enable_lip_reset) { 1244 if (ha->flags.enable_lip_reset) {
1304 ret = qla2x00_lip_reset(vha); 1245 ret = qla2x00_lip_reset(vha);
1305 if (ret != QLA_SUCCESS) { 1246 if (ret != QLA_SUCCESS)
1306 ql_dbg(ql_dbg_taskm, vha, 0x802e, 1247 ql_dbg(ql_dbg_taskm, vha, 0x802e,
1307 "lip_reset failed (%d).\n", ret); 1248 "lip_reset failed (%d).\n", ret);
1308 } else
1309 qla2x00_wait_for_loop_ready(vha);
1310 } 1249 }
1311 1250
1312 /* Issue marker command only when we are going to start the I/O */ 1251 /* Issue marker command only when we are going to start the I/O */
@@ -4070,13 +4009,8 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
4070 /* For ISP82XX complete any pending mailbox cmd */ 4009 /* For ISP82XX complete any pending mailbox cmd */
4071 if (IS_QLA82XX(ha)) { 4010 if (IS_QLA82XX(ha)) {
4072 ha->flags.isp82xx_fw_hung = 1; 4011 ha->flags.isp82xx_fw_hung = 1;
4073 if (ha->flags.mbox_busy) { 4012 ql_dbg(ql_dbg_aer, vha, 0x9001, "Pci channel io frozen\n");
4074 ha->flags.mbox_int = 1; 4013 qla82xx_clear_pending_mbx(vha);
4075 ql_dbg(ql_dbg_aer, vha, 0x9001,
4076 "Due to pci channel io frozen, doing premature "
4077 "completion of mbx command.\n");
4078 complete(&ha->mbx_intr_comp);
4079 }
4080 } 4014 }
4081 qla2x00_free_irqs(vha); 4015 qla2x00_free_irqs(vha);
4082 pci_disable_device(pdev); 4016 pci_disable_device(pdev);
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 13b6357c1fa2..23f33a6d52d7 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
7/* 7/*
8 * Driver version 8 * Driver version
9 */ 9 */
10#define QLA2XXX_VERSION "8.03.07.07-k" 10#define QLA2XXX_VERSION "8.03.07.12-k"
11 11
12#define QLA_DRIVER_MAJOR_VER 8 12#define QLA_DRIVER_MAJOR_VER 8
13#define QLA_DRIVER_MINOR_VER 3 13#define QLA_DRIVER_MINOR_VER 3
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index ace637bf254e..fd5edc6e166d 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h
@@ -147,7 +147,7 @@
147#define ISCSI_ALIAS_SIZE 32 /* ISCSI Alias name size */ 147#define ISCSI_ALIAS_SIZE 32 /* ISCSI Alias name size */
148#define ISCSI_NAME_SIZE 0xE0 /* ISCSI Name size */ 148#define ISCSI_NAME_SIZE 0xE0 /* ISCSI Name size */
149 149
150#define QL4_SESS_RECOVERY_TMO 30 /* iSCSI session */ 150#define QL4_SESS_RECOVERY_TMO 120 /* iSCSI session */
151 /* recovery timeout */ 151 /* recovery timeout */
152 152
153#define LSDW(x) ((u32)((u64)(x))) 153#define LSDW(x) ((u32)((u64)(x)))
@@ -173,6 +173,8 @@
173#define ISNS_DEREG_TOV 5 173#define ISNS_DEREG_TOV 5
174#define HBA_ONLINE_TOV 30 174#define HBA_ONLINE_TOV 30
175#define DISABLE_ACB_TOV 30 175#define DISABLE_ACB_TOV 30
176#define IP_CONFIG_TOV 30
177#define LOGIN_TOV 12
176 178
177#define MAX_RESET_HA_RETRIES 2 179#define MAX_RESET_HA_RETRIES 2
178 180
@@ -240,6 +242,45 @@ struct ddb_entry {
240 242
241 uint16_t fw_ddb_index; /* DDB firmware index */ 243 uint16_t fw_ddb_index; /* DDB firmware index */
242 uint32_t fw_ddb_device_state; /* F/W Device State -- see ql4_fw.h */ 244 uint32_t fw_ddb_device_state; /* F/W Device State -- see ql4_fw.h */
245 uint16_t ddb_type;
246#define FLASH_DDB 0x01
247
248 struct dev_db_entry fw_ddb_entry;
249 int (*unblock_sess)(struct iscsi_cls_session *cls_session);
250 int (*ddb_change)(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
251 struct ddb_entry *ddb_entry, uint32_t state);
252
253 /* Driver Re-login */
254 unsigned long flags; /* DDB Flags */
255 uint16_t default_relogin_timeout; /* Max time to wait for
256 * relogin to complete */
257 atomic_t retry_relogin_timer; /* Min Time between relogins
258 * (4000 only) */
259 atomic_t relogin_timer; /* Max Time to wait for
260 * relogin to complete */
261 atomic_t relogin_retry_count; /* Num of times relogin has been
262 * retried */
263 uint32_t default_time2wait; /* Default Min time between
264 * relogins (+aens) */
265
266};
267
268struct qla_ddb_index {
269 struct list_head list;
270 uint16_t fw_ddb_idx;
271 struct dev_db_entry fw_ddb;
272};
273
274#define DDB_IPADDR_LEN 64
275
276struct ql4_tuple_ddb {
277 int port;
278 int tpgt;
279 char ip_addr[DDB_IPADDR_LEN];
280 char iscsi_name[ISCSI_NAME_SIZE];
281 uint16_t options;
282#define DDB_OPT_IPV6 0x0e0e
283#define DDB_OPT_IPV4 0x0f0f
243}; 284};
244 285
245/* 286/*
@@ -411,7 +452,7 @@ struct scsi_qla_host {
411#define AF_FW_RECOVERY 19 /* 0x00080000 */ 452#define AF_FW_RECOVERY 19 /* 0x00080000 */
412#define AF_EEH_BUSY 20 /* 0x00100000 */ 453#define AF_EEH_BUSY 20 /* 0x00100000 */
413#define AF_PCI_CHANNEL_IO_PERM_FAILURE 21 /* 0x00200000 */ 454#define AF_PCI_CHANNEL_IO_PERM_FAILURE 21 /* 0x00200000 */
414 455#define AF_BUILD_DDB_LIST 22 /* 0x00400000 */
415 unsigned long dpc_flags; 456 unsigned long dpc_flags;
416 457
417#define DPC_RESET_HA 1 /* 0x00000002 */ 458#define DPC_RESET_HA 1 /* 0x00000002 */
@@ -604,6 +645,7 @@ struct scsi_qla_host {
604 uint16_t bootload_minor; 645 uint16_t bootload_minor;
605 uint16_t bootload_patch; 646 uint16_t bootload_patch;
606 uint16_t bootload_build; 647 uint16_t bootload_build;
648 uint16_t def_timeout; /* Default login timeout */
607 649
608 uint32_t flash_state; 650 uint32_t flash_state;
609#define QLFLASH_WAITING 0 651#define QLFLASH_WAITING 0
@@ -623,6 +665,11 @@ struct scsi_qla_host {
623 uint16_t iscsi_pci_func_cnt; 665 uint16_t iscsi_pci_func_cnt;
624 uint8_t model_name[16]; 666 uint8_t model_name[16];
625 struct completion disable_acb_comp; 667 struct completion disable_acb_comp;
668 struct dma_pool *fw_ddb_dma_pool;
669#define DDB_DMA_BLOCK_SIZE 512
670 uint16_t pri_ddb_idx;
671 uint16_t sec_ddb_idx;
672 int is_reset;
626}; 673};
627 674
628struct ql4_task_data { 675struct ql4_task_data {
@@ -835,6 +882,10 @@ static inline int ql4xxx_reset_active(struct scsi_qla_host *ha)
835/*---------------------------------------------------------------------------*/ 882/*---------------------------------------------------------------------------*/
836 883
837/* Defines for qla4xxx_initialize_adapter() and qla4xxx_recover_adapter() */ 884/* Defines for qla4xxx_initialize_adapter() and qla4xxx_recover_adapter() */
885
886#define INIT_ADAPTER 0
887#define RESET_ADAPTER 1
888
838#define PRESERVE_DDB_LIST 0 889#define PRESERVE_DDB_LIST 0
839#define REBUILD_DDB_LIST 1 890#define REBUILD_DDB_LIST 1
840 891
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index cbd5a20dbbd1..4ac07f882521 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h
@@ -12,6 +12,7 @@
12#define MAX_PRST_DEV_DB_ENTRIES 64 12#define MAX_PRST_DEV_DB_ENTRIES 64
13#define MIN_DISC_DEV_DB_ENTRY MAX_PRST_DEV_DB_ENTRIES 13#define MIN_DISC_DEV_DB_ENTRY MAX_PRST_DEV_DB_ENTRIES
14#define MAX_DEV_DB_ENTRIES 512 14#define MAX_DEV_DB_ENTRIES 512
15#define MAX_DEV_DB_ENTRIES_40XX 256
15 16
16/************************************************************************* 17/*************************************************************************
17 * 18 *
@@ -604,6 +605,13 @@ struct addr_ctrl_blk {
604 uint8_t res14[140]; /* 274-2FF */ 605 uint8_t res14[140]; /* 274-2FF */
605}; 606};
606 607
608#define IP_ADDR_COUNT 4 /* Total 4 IP address supported in one interface
609 * One IPv4, one IPv6 link local and 2 IPv6
610 */
611
612#define IP_STATE_MASK 0x0F000000
613#define IP_STATE_SHIFT 24
614
607struct init_fw_ctrl_blk { 615struct init_fw_ctrl_blk {
608 struct addr_ctrl_blk pri; 616 struct addr_ctrl_blk pri;
609/* struct addr_ctrl_blk sec;*/ 617/* struct addr_ctrl_blk sec;*/
diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h
index 160db9d5ea21..d0dd4b330206 100644
--- a/drivers/scsi/qla4xxx/ql4_glbl.h
+++ b/drivers/scsi/qla4xxx/ql4_glbl.h
@@ -13,7 +13,7 @@ struct iscsi_cls_conn;
13int qla4xxx_hw_reset(struct scsi_qla_host *ha); 13int qla4xxx_hw_reset(struct scsi_qla_host *ha);
14int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a); 14int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a);
15int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb *srb); 15int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb *srb);
16int qla4xxx_initialize_adapter(struct scsi_qla_host *ha); 16int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset);
17int qla4xxx_soft_reset(struct scsi_qla_host *ha); 17int qla4xxx_soft_reset(struct scsi_qla_host *ha);
18irqreturn_t qla4xxx_intr_handler(int irq, void *dev_id); 18irqreturn_t qla4xxx_intr_handler(int irq, void *dev_id);
19 19
@@ -153,10 +153,13 @@ int qla4xxx_req_ddb_entry(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
153 uint32_t *mbx_sts); 153 uint32_t *mbx_sts);
154int qla4xxx_clear_ddb_entry(struct scsi_qla_host *ha, uint32_t fw_ddb_index); 154int qla4xxx_clear_ddb_entry(struct scsi_qla_host *ha, uint32_t fw_ddb_index);
155int qla4xxx_send_passthru0(struct iscsi_task *task); 155int qla4xxx_send_passthru0(struct iscsi_task *task);
156void qla4xxx_free_ddb_index(struct scsi_qla_host *ha);
156int qla4xxx_get_mgmt_data(struct scsi_qla_host *ha, uint16_t fw_ddb_index, 157int qla4xxx_get_mgmt_data(struct scsi_qla_host *ha, uint16_t fw_ddb_index,
157 uint16_t stats_size, dma_addr_t stats_dma); 158 uint16_t stats_size, dma_addr_t stats_dma);
158void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha, 159void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha,
159 struct ddb_entry *ddb_entry); 160 struct ddb_entry *ddb_entry);
161void qla4xxx_update_session_conn_fwddb_param(struct scsi_qla_host *ha,
162 struct ddb_entry *ddb_entry);
160int qla4xxx_bootdb_by_index(struct scsi_qla_host *ha, 163int qla4xxx_bootdb_by_index(struct scsi_qla_host *ha,
161 struct dev_db_entry *fw_ddb_entry, 164 struct dev_db_entry *fw_ddb_entry,
162 dma_addr_t fw_ddb_entry_dma, uint16_t ddb_index); 165 dma_addr_t fw_ddb_entry_dma, uint16_t ddb_index);
@@ -169,11 +172,22 @@ int qla4xxx_set_nvram(struct scsi_qla_host *ha, dma_addr_t nvram_dma,
169int qla4xxx_restore_factory_defaults(struct scsi_qla_host *ha, 172int qla4xxx_restore_factory_defaults(struct scsi_qla_host *ha,
170 uint32_t region, uint32_t field0, 173 uint32_t region, uint32_t field0,
171 uint32_t field1); 174 uint32_t field1);
175int qla4xxx_get_ddb_index(struct scsi_qla_host *ha, uint16_t *ddb_index);
176void qla4xxx_login_flash_ddb(struct iscsi_cls_session *cls_session);
177int qla4xxx_unblock_ddb(struct iscsi_cls_session *cls_session);
178int qla4xxx_unblock_flash_ddb(struct iscsi_cls_session *cls_session);
179int qla4xxx_flash_ddb_change(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
180 struct ddb_entry *ddb_entry, uint32_t state);
181int qla4xxx_ddb_change(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
182 struct ddb_entry *ddb_entry, uint32_t state);
183void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset);
172 184
173/* BSG Functions */ 185/* BSG Functions */
174int qla4xxx_bsg_request(struct bsg_job *bsg_job); 186int qla4xxx_bsg_request(struct bsg_job *bsg_job);
175int qla4xxx_process_vendor_specific(struct bsg_job *bsg_job); 187int qla4xxx_process_vendor_specific(struct bsg_job *bsg_job);
176 188
189void qla4xxx_arm_relogin_timer(struct ddb_entry *ddb_entry);
190
177extern int ql4xextended_error_logging; 191extern int ql4xextended_error_logging;
178extern int ql4xdontresethba; 192extern int ql4xdontresethba;
179extern int ql4xenablemsix; 193extern int ql4xenablemsix;
diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c
index 3075fbaef553..1bdfa8120ac8 100644
--- a/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c
@@ -773,22 +773,24 @@ int qla4xxx_start_firmware(struct scsi_qla_host *ha)
773 * be freed so that when login happens from user space there are free DDB 773 * be freed so that when login happens from user space there are free DDB
774 * indices available. 774 * indices available.
775 **/ 775 **/
776static void qla4xxx_free_ddb_index(struct scsi_qla_host *ha) 776void qla4xxx_free_ddb_index(struct scsi_qla_host *ha)
777{ 777{
778 int max_ddbs; 778 int max_ddbs;
779 int ret; 779 int ret;
780 uint32_t idx = 0, next_idx = 0; 780 uint32_t idx = 0, next_idx = 0;
781 uint32_t state = 0, conn_err = 0; 781 uint32_t state = 0, conn_err = 0;
782 782
783 max_ddbs = is_qla40XX(ha) ? MAX_PRST_DEV_DB_ENTRIES : 783 max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX :
784 MAX_DEV_DB_ENTRIES; 784 MAX_DEV_DB_ENTRIES;
785 785
786 for (idx = 0; idx < max_ddbs; idx = next_idx) { 786 for (idx = 0; idx < max_ddbs; idx = next_idx) {
787 ret = qla4xxx_get_fwddb_entry(ha, idx, NULL, 0, NULL, 787 ret = qla4xxx_get_fwddb_entry(ha, idx, NULL, 0, NULL,
788 &next_idx, &state, &conn_err, 788 &next_idx, &state, &conn_err,
789 NULL, NULL); 789 NULL, NULL);
790 if (ret == QLA_ERROR) 790 if (ret == QLA_ERROR) {
791 next_idx++;
791 continue; 792 continue;
793 }
792 if (state == DDB_DS_NO_CONNECTION_ACTIVE || 794 if (state == DDB_DS_NO_CONNECTION_ACTIVE ||
793 state == DDB_DS_SESSION_FAILED) { 795 state == DDB_DS_SESSION_FAILED) {
794 DEBUG2(ql4_printk(KERN_INFO, ha, 796 DEBUG2(ql4_printk(KERN_INFO, ha,
@@ -804,7 +806,6 @@ static void qla4xxx_free_ddb_index(struct scsi_qla_host *ha)
804 } 806 }
805} 807}
806 808
807
808/** 809/**
809 * qla4xxx_initialize_adapter - initiailizes hba 810 * qla4xxx_initialize_adapter - initiailizes hba
810 * @ha: Pointer to host adapter structure. 811 * @ha: Pointer to host adapter structure.
@@ -812,7 +813,7 @@ static void qla4xxx_free_ddb_index(struct scsi_qla_host *ha)
812 * This routine parforms all of the steps necessary to initialize the adapter. 813 * This routine parforms all of the steps necessary to initialize the adapter.
813 * 814 *
814 **/ 815 **/
815int qla4xxx_initialize_adapter(struct scsi_qla_host *ha) 816int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset)
816{ 817{
817 int status = QLA_ERROR; 818 int status = QLA_ERROR;
818 819
@@ -840,7 +841,8 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha)
840 if (status == QLA_ERROR) 841 if (status == QLA_ERROR)
841 goto exit_init_hba; 842 goto exit_init_hba;
842 843
843 qla4xxx_free_ddb_index(ha); 844 if (is_reset == RESET_ADAPTER)
845 qla4xxx_build_ddb_list(ha, is_reset);
844 846
845 set_bit(AF_ONLINE, &ha->flags); 847 set_bit(AF_ONLINE, &ha->flags);
846exit_init_hba: 848exit_init_hba:
@@ -855,38 +857,12 @@ exit_init_hba:
855 return status; 857 return status;
856} 858}
857 859
858/** 860int qla4xxx_ddb_change(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
859 * qla4xxx_process_ddb_changed - process ddb state change 861 struct ddb_entry *ddb_entry, uint32_t state)
860 * @ha - Pointer to host adapter structure.
861 * @fw_ddb_index - Firmware's device database index
862 * @state - Device state
863 *
864 * This routine processes a Decive Database Changed AEN Event.
865 **/
866int qla4xxx_process_ddb_changed(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
867 uint32_t state, uint32_t conn_err)
868{ 862{
869 struct ddb_entry * ddb_entry;
870 uint32_t old_fw_ddb_device_state; 863 uint32_t old_fw_ddb_device_state;
871 int status = QLA_ERROR; 864 int status = QLA_ERROR;
872 865
873 /* check for out of range index */
874 if (fw_ddb_index >= MAX_DDB_ENTRIES)
875 goto exit_ddb_event;
876
877 /* Get the corresponging ddb entry */
878 ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, fw_ddb_index);
879 /* Device does not currently exist in our database. */
880 if (ddb_entry == NULL) {
881 ql4_printk(KERN_ERR, ha, "%s: No ddb_entry at FW index [%d]\n",
882 __func__, fw_ddb_index);
883
884 if (state == DDB_DS_NO_CONNECTION_ACTIVE)
885 clear_bit(fw_ddb_index, ha->ddb_idx_map);
886
887 goto exit_ddb_event;
888 }
889
890 old_fw_ddb_device_state = ddb_entry->fw_ddb_device_state; 866 old_fw_ddb_device_state = ddb_entry->fw_ddb_device_state;
891 DEBUG2(ql4_printk(KERN_INFO, ha, 867 DEBUG2(ql4_printk(KERN_INFO, ha,
892 "%s: DDB - old state = 0x%x, new state = 0x%x for " 868 "%s: DDB - old state = 0x%x, new state = 0x%x for "
@@ -900,9 +876,7 @@ int qla4xxx_process_ddb_changed(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
900 switch (state) { 876 switch (state) {
901 case DDB_DS_SESSION_ACTIVE: 877 case DDB_DS_SESSION_ACTIVE:
902 case DDB_DS_DISCOVERY: 878 case DDB_DS_DISCOVERY:
903 iscsi_conn_start(ddb_entry->conn); 879 ddb_entry->unblock_sess(ddb_entry->sess);
904 iscsi_conn_login_event(ddb_entry->conn,
905 ISCSI_CONN_STATE_LOGGED_IN);
906 qla4xxx_update_session_conn_param(ha, ddb_entry); 880 qla4xxx_update_session_conn_param(ha, ddb_entry);
907 status = QLA_SUCCESS; 881 status = QLA_SUCCESS;
908 break; 882 break;
@@ -936,9 +910,7 @@ int qla4xxx_process_ddb_changed(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
936 switch (state) { 910 switch (state) {
937 case DDB_DS_SESSION_ACTIVE: 911 case DDB_DS_SESSION_ACTIVE:
938 case DDB_DS_DISCOVERY: 912 case DDB_DS_DISCOVERY:
939 iscsi_conn_start(ddb_entry->conn); 913 ddb_entry->unblock_sess(ddb_entry->sess);
940 iscsi_conn_login_event(ddb_entry->conn,
941 ISCSI_CONN_STATE_LOGGED_IN);
942 qla4xxx_update_session_conn_param(ha, ddb_entry); 914 qla4xxx_update_session_conn_param(ha, ddb_entry);
943 status = QLA_SUCCESS; 915 status = QLA_SUCCESS;
944 break; 916 break;
@@ -954,7 +926,198 @@ int qla4xxx_process_ddb_changed(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
954 __func__)); 926 __func__));
955 break; 927 break;
956 } 928 }
929 return status;
930}
931
932void qla4xxx_arm_relogin_timer(struct ddb_entry *ddb_entry)
933{
934 /*
935 * This triggers a relogin. After the relogin_timer
936 * expires, the relogin gets scheduled. We must wait a
937 * minimum amount of time since receiving an 0x8014 AEN
938 * with failed device_state or a logout response before
939 * we can issue another relogin.
940 *
941 * Firmware pads this timeout: (time2wait +1).
942 * Driver retry to login should be longer than F/W.
943 * Otherwise F/W will fail
944 * set_ddb() mbx cmd with 0x4005 since it still
945 * counting down its time2wait.
946 */
947 atomic_set(&ddb_entry->relogin_timer, 0);
948 atomic_set(&ddb_entry->retry_relogin_timer,
949 ddb_entry->default_time2wait + 4);
950
951}
952
953int qla4xxx_flash_ddb_change(struct scsi_qla_host *ha, uint32_t fw_ddb_index,
954 struct ddb_entry *ddb_entry, uint32_t state)
955{
956 uint32_t old_fw_ddb_device_state;
957 int status = QLA_ERROR;
958
959 old_fw_ddb_device_state = ddb_entry->fw_ddb_device_state;
960 DEBUG2(ql4_printk(KERN_INFO, ha,
961 "%s: DDB - old state = 0x%x, new state = 0x%x for "
962 "index [%d]\n", __func__,
963 ddb_entry->fw_ddb_device_state, state, fw_ddb_index));
964
965 ddb_entry->fw_ddb_device_state = state;
966
967 switch (old_fw_ddb_device_state) {
968 case DDB_DS_LOGIN_IN_PROCESS:
969 case DDB_DS_NO_CONNECTION_ACTIVE:
970 switch (state) {
971 case DDB_DS_SESSION_ACTIVE:
972 ddb_entry->unblock_sess(ddb_entry->sess);
973 qla4xxx_update_session_conn_fwddb_param(ha, ddb_entry);
974 status = QLA_SUCCESS;
975 break;
976 case DDB_DS_SESSION_FAILED:
977 iscsi_block_session(ddb_entry->sess);
978 if (!test_bit(DF_RELOGIN, &ddb_entry->flags))
979 qla4xxx_arm_relogin_timer(ddb_entry);
980 status = QLA_SUCCESS;
981 break;
982 }
983 break;
984 case DDB_DS_SESSION_ACTIVE:
985 switch (state) {
986 case DDB_DS_SESSION_FAILED:
987 iscsi_block_session(ddb_entry->sess);
988 if (!test_bit(DF_RELOGIN, &ddb_entry->flags))
989 qla4xxx_arm_relogin_timer(ddb_entry);
990 status = QLA_SUCCESS;
991 break;
992 }
993 break;
994 case DDB_DS_SESSION_FAILED:
995 switch (state) {
996 case DDB_DS_SESSION_ACTIVE:
997 ddb_entry->unblock_sess(ddb_entry->sess);
998 qla4xxx_update_session_conn_fwddb_param(ha, ddb_entry);
999 status = QLA_SUCCESS;
1000 break;
1001 case DDB_DS_SESSION_FAILED:
1002 if (!test_bit(DF_RELOGIN, &ddb_entry->flags))
1003 qla4xxx_arm_relogin_timer(ddb_entry);
1004 status = QLA_SUCCESS;
1005 break;
1006 }
1007 break;
1008 default:
1009 DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unknown Event\n",
1010 __func__));
1011 break;
1012 }
1013 return status;
1014}
1015
1016/**
1017 * qla4xxx_process_ddb_changed - process ddb state change
1018 * @ha - Pointer to host adapter structure.
1019 * @fw_ddb_index - Firmware's device database index
1020 * @state - Device state
1021 *
1022 * This routine processes a Decive Database Changed AEN Event.
1023 **/
1024int qla4xxx_process_ddb_changed(struct scsi_qla_host *ha,
1025 uint32_t fw_ddb_index,
1026 uint32_t state, uint32_t conn_err)
1027{
1028 struct ddb_entry *ddb_entry;
1029 int status = QLA_ERROR;
1030
1031 /* check for out of range index */
1032 if (fw_ddb_index >= MAX_DDB_ENTRIES)
1033 goto exit_ddb_event;
1034
1035 /* Get the corresponging ddb entry */
1036 ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, fw_ddb_index);
1037 /* Device does not currently exist in our database. */
1038 if (ddb_entry == NULL) {
1039 ql4_printk(KERN_ERR, ha, "%s: No ddb_entry at FW index [%d]\n",
1040 __func__, fw_ddb_index);
1041
1042 if (state == DDB_DS_NO_CONNECTION_ACTIVE)
1043 clear_bit(fw_ddb_index, ha->ddb_idx_map);
1044
1045 goto exit_ddb_event;
1046 }
1047
1048 ddb_entry->ddb_change(ha, fw_ddb_index, ddb_entry, state);
957 1049
958exit_ddb_event: 1050exit_ddb_event:
959 return status; 1051 return status;
960} 1052}
1053
1054/**
1055 * qla4xxx_login_flash_ddb - Login to target (DDB)
1056 * @cls_session: Pointer to the session to login
1057 *
1058 * This routine logins to the target.
1059 * Issues setddb and conn open mbx
1060 **/
1061void qla4xxx_login_flash_ddb(struct iscsi_cls_session *cls_session)
1062{
1063 struct iscsi_session *sess;
1064 struct ddb_entry *ddb_entry;
1065 struct scsi_qla_host *ha;
1066 struct dev_db_entry *fw_ddb_entry = NULL;
1067 dma_addr_t fw_ddb_dma;
1068 uint32_t mbx_sts = 0;
1069 int ret;
1070
1071 sess = cls_session->dd_data;
1072 ddb_entry = sess->dd_data;
1073 ha = ddb_entry->ha;
1074
1075 if (!test_bit(AF_LINK_UP, &ha->flags))
1076 return;
1077
1078 if (ddb_entry->ddb_type != FLASH_DDB) {
1079 DEBUG2(ql4_printk(KERN_INFO, ha,
1080 "Skipping login to non FLASH DB"));
1081 goto exit_login;
1082 }
1083
1084 fw_ddb_entry = dma_pool_alloc(ha->fw_ddb_dma_pool, GFP_KERNEL,
1085 &fw_ddb_dma);
1086 if (fw_ddb_entry == NULL) {
1087 DEBUG2(ql4_printk(KERN_ERR, ha, "Out of memory\n"));
1088 goto exit_login;
1089 }
1090
1091 if (ddb_entry->fw_ddb_index == INVALID_ENTRY) {
1092 ret = qla4xxx_get_ddb_index(ha, &ddb_entry->fw_ddb_index);
1093 if (ret == QLA_ERROR)
1094 goto exit_login;
1095
1096 ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = ddb_entry;
1097 ha->tot_ddbs++;
1098 }
1099
1100 memcpy(fw_ddb_entry, &ddb_entry->fw_ddb_entry,
1101 sizeof(struct dev_db_entry));
1102 ddb_entry->sess->target_id = ddb_entry->fw_ddb_index;
1103
1104 ret = qla4xxx_set_ddb_entry(ha, ddb_entry->fw_ddb_index,
1105 fw_ddb_dma, &mbx_sts);
1106 if (ret == QLA_ERROR) {
1107 DEBUG2(ql4_printk(KERN_ERR, ha, "Set DDB failed\n"));
1108 goto exit_login;
1109 }
1110
1111 ddb_entry->fw_ddb_device_state = DDB_DS_LOGIN_IN_PROCESS;
1112 ret = qla4xxx_conn_open(ha, ddb_entry->fw_ddb_index);
1113 if (ret == QLA_ERROR) {
1114 ql4_printk(KERN_ERR, ha, "%s: Login failed: %s\n", __func__,
1115 sess->targetname);
1116 goto exit_login;
1117 }
1118
1119exit_login:
1120 if (fw_ddb_entry)
1121 dma_pool_free(ha->fw_ddb_dma_pool, fw_ddb_entry, fw_ddb_dma);
1122}
1123
diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index 4c2b84870392..c2593782fbbe 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c
@@ -41,6 +41,16 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount,
41 return status; 41 return status;
42 } 42 }
43 43
44 if (is_qla40XX(ha)) {
45 if (test_bit(AF_HA_REMOVAL, &ha->flags)) {
46 DEBUG2(ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: "
47 "prematurely completing mbx cmd as "
48 "adapter removal detected\n",
49 ha->host_no, __func__));
50 return status;
51 }
52 }
53
44 if (is_qla8022(ha)) { 54 if (is_qla8022(ha)) {
45 if (test_bit(AF_FW_RECOVERY, &ha->flags)) { 55 if (test_bit(AF_FW_RECOVERY, &ha->flags)) {
46 DEBUG2(ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: " 56 DEBUG2(ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: "
@@ -413,6 +423,7 @@ qla4xxx_update_local_ifcb(struct scsi_qla_host *ha,
413 memcpy(ha->name_string, init_fw_cb->iscsi_name, 423 memcpy(ha->name_string, init_fw_cb->iscsi_name,
414 min(sizeof(ha->name_string), 424 min(sizeof(ha->name_string),
415 sizeof(init_fw_cb->iscsi_name))); 425 sizeof(init_fw_cb->iscsi_name)));
426 ha->def_timeout = le16_to_cpu(init_fw_cb->def_timeout);
416 /*memcpy(ha->alias, init_fw_cb->Alias, 427 /*memcpy(ha->alias, init_fw_cb->Alias,
417 min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/ 428 min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/
418 429
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 30f31b127f33..4169c8baa112 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -8,6 +8,7 @@
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include <linux/iscsi_boot_sysfs.h> 10#include <linux/iscsi_boot_sysfs.h>
11#include <linux/inet.h>
11 12
12#include <scsi/scsi_tcq.h> 13#include <scsi/scsi_tcq.h>
13#include <scsi/scsicam.h> 14#include <scsi/scsicam.h>
@@ -31,6 +32,13 @@ static struct kmem_cache *srb_cachep;
31/* 32/*
32 * Module parameter information and variables 33 * Module parameter information and variables
33 */ 34 */
35int ql4xdisablesysfsboot = 1;
36module_param(ql4xdisablesysfsboot, int, S_IRUGO | S_IWUSR);
37MODULE_PARM_DESC(ql4xdisablesysfsboot,
38 "Set to disable exporting boot targets to sysfs\n"
39 " 0 - Export boot targets\n"
40 " 1 - Do not export boot targets (Default)");
41
34int ql4xdontresethba = 0; 42int ql4xdontresethba = 0;
35module_param(ql4xdontresethba, int, S_IRUGO | S_IWUSR); 43module_param(ql4xdontresethba, int, S_IRUGO | S_IWUSR);
36MODULE_PARM_DESC(ql4xdontresethba, 44MODULE_PARM_DESC(ql4xdontresethba,
@@ -63,7 +71,7 @@ static int ql4xsess_recovery_tmo = QL4_SESS_RECOVERY_TMO;
63module_param(ql4xsess_recovery_tmo, int, S_IRUGO); 71module_param(ql4xsess_recovery_tmo, int, S_IRUGO);
64MODULE_PARM_DESC(ql4xsess_recovery_tmo, 72MODULE_PARM_DESC(ql4xsess_recovery_tmo,
65 "Target Session Recovery Timeout.\n" 73 "Target Session Recovery Timeout.\n"
66 " Default: 30 sec."); 74 " Default: 120 sec.");
67 75
68static int qla4xxx_wait_for_hba_online(struct scsi_qla_host *ha); 76static int qla4xxx_wait_for_hba_online(struct scsi_qla_host *ha);
69/* 77/*
@@ -415,7 +423,7 @@ static int qla4xxx_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
415 qla_ep = ep->dd_data; 423 qla_ep = ep->dd_data;
416 ha = to_qla_host(qla_ep->host); 424 ha = to_qla_host(qla_ep->host);
417 425
418 if (adapter_up(ha)) 426 if (adapter_up(ha) && !test_bit(AF_BUILD_DDB_LIST, &ha->flags))
419 ret = 1; 427 ret = 1;
420 428
421 return ret; 429 return ret;
@@ -975,6 +983,150 @@ static int qla4xxx_conn_get_param(struct iscsi_cls_conn *cls_conn,
975 983
976} 984}
977 985
986int qla4xxx_get_ddb_index(struct scsi_qla_host *ha, uint16_t *ddb_index)
987{
988 uint32_t mbx_sts = 0;
989 uint16_t tmp_ddb_index;
990 int ret;
991
992get_ddb_index:
993 tmp_ddb_index = find_first_zero_bit(ha->ddb_idx_map, MAX_DDB_ENTRIES);
994
995 if (tmp_ddb_index >= MAX_DDB_ENTRIES) {
996 DEBUG2(ql4_printk(KERN_INFO, ha,
997 "Free DDB index not available\n"));
998 ret = QLA_ERROR;
999 goto exit_get_ddb_index;
1000 }
1001
1002 if (test_and_set_bit(tmp_ddb_index, ha->ddb_idx_map))
1003 goto get_ddb_index;
1004
1005 DEBUG2(ql4_printk(KERN_INFO, ha,
1006 "Found a free DDB index at %d\n", tmp_ddb_index));
1007 ret = qla4xxx_req_ddb_entry(ha, tmp_ddb_index, &mbx_sts);
1008 if (ret == QLA_ERROR) {
1009 if (mbx_sts == MBOX_STS_COMMAND_ERROR) {
1010 ql4_printk(KERN_INFO, ha,
1011 "DDB index = %d not available trying next\n",
1012 tmp_ddb_index);
1013 goto get_ddb_index;
1014 }
1015 DEBUG2(ql4_printk(KERN_INFO, ha,
1016 "Free FW DDB not available\n"));
1017 }
1018
1019 *ddb_index = tmp_ddb_index;
1020
1021exit_get_ddb_index:
1022 return ret;
1023}
1024
1025static int qla4xxx_match_ipaddress(struct scsi_qla_host *ha,
1026 struct ddb_entry *ddb_entry,
1027 char *existing_ipaddr,
1028 char *user_ipaddr)
1029{
1030 uint8_t dst_ipaddr[IPv6_ADDR_LEN];
1031 char formatted_ipaddr[DDB_IPADDR_LEN];
1032 int status = QLA_SUCCESS, ret = 0;
1033
1034 if (ddb_entry->fw_ddb_entry.options & DDB_OPT_IPV6_DEVICE) {
1035 ret = in6_pton(user_ipaddr, strlen(user_ipaddr), dst_ipaddr,
1036 '\0', NULL);
1037 if (ret == 0) {
1038 status = QLA_ERROR;
1039 goto out_match;
1040 }
1041 ret = sprintf(formatted_ipaddr, "%pI6", dst_ipaddr);
1042 } else {
1043 ret = in4_pton(user_ipaddr, strlen(user_ipaddr), dst_ipaddr,
1044 '\0', NULL);
1045 if (ret == 0) {
1046 status = QLA_ERROR;
1047 goto out_match;
1048 }
1049 ret = sprintf(formatted_ipaddr, "%pI4", dst_ipaddr);
1050 }
1051
1052 if (strcmp(existing_ipaddr, formatted_ipaddr))
1053 status = QLA_ERROR;
1054
1055out_match:
1056 return status;
1057}
1058
1059static int qla4xxx_match_fwdb_session(struct scsi_qla_host *ha,
1060 struct iscsi_cls_conn *cls_conn)
1061{
1062 int idx = 0, max_ddbs, rval;
1063 struct iscsi_cls_session *cls_sess = iscsi_conn_to_session(cls_conn);
1064 struct iscsi_session *sess, *existing_sess;
1065 struct iscsi_conn *conn, *existing_conn;
1066 struct ddb_entry *ddb_entry;
1067
1068 sess = cls_sess->dd_data;
1069 conn = cls_conn->dd_data;
1070
1071 if (sess->targetname == NULL ||
1072 conn->persistent_address == NULL ||
1073 conn->persistent_port == 0)
1074 return QLA_ERROR;
1075
1076 max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX :
1077 MAX_DEV_DB_ENTRIES;
1078
1079 for (idx = 0; idx < max_ddbs; idx++) {
1080 ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx);
1081 if (ddb_entry == NULL)
1082 continue;
1083
1084 if (ddb_entry->ddb_type != FLASH_DDB)
1085 continue;
1086
1087 existing_sess = ddb_entry->sess->dd_data;
1088 existing_conn = ddb_entry->conn->dd_data;
1089
1090 if (existing_sess->targetname == NULL ||
1091 existing_conn->persistent_address == NULL ||
1092 existing_conn->persistent_port == 0)
1093 continue;
1094
1095 DEBUG2(ql4_printk(KERN_INFO, ha,
1096 "IQN = %s User IQN = %s\n",
1097 existing_sess->targetname,
1098 sess->targetname));
1099
1100 DEBUG2(ql4_printk(KERN_INFO, ha,
1101 "IP = %s User IP = %s\n",
1102 existing_conn->persistent_address,
1103 conn->persistent_address));
1104
1105 DEBUG2(ql4_printk(KERN_INFO, ha,
1106 "Port = %d User Port = %d\n",
1107 existing_conn->persistent_port,
1108 conn->persistent_port));
1109
1110 if (strcmp(existing_sess->targetname, sess->targetname))
1111 continue;
1112 rval = qla4xxx_match_ipaddress(ha, ddb_entry,
1113 existing_conn->persistent_address,
1114 conn->persistent_address);
1115 if (rval == QLA_ERROR)
1116 continue;
1117 if (existing_conn->persistent_port != conn->persistent_port)
1118 continue;
1119 break;
1120 }
1121
1122 if (idx == max_ddbs)
1123 return QLA_ERROR;
1124
1125 DEBUG2(ql4_printk(KERN_INFO, ha,
1126 "Match found in fwdb sessions\n"));
1127 return QLA_SUCCESS;
1128}
1129
978static struct iscsi_cls_session * 1130static struct iscsi_cls_session *
979qla4xxx_session_create(struct iscsi_endpoint *ep, 1131qla4xxx_session_create(struct iscsi_endpoint *ep,
980 uint16_t cmds_max, uint16_t qdepth, 1132 uint16_t cmds_max, uint16_t qdepth,
@@ -984,8 +1136,7 @@ qla4xxx_session_create(struct iscsi_endpoint *ep,
984 struct scsi_qla_host *ha; 1136 struct scsi_qla_host *ha;
985 struct qla_endpoint *qla_ep; 1137 struct qla_endpoint *qla_ep;
986 struct ddb_entry *ddb_entry; 1138 struct ddb_entry *ddb_entry;
987 uint32_t ddb_index; 1139 uint16_t ddb_index;
988 uint32_t mbx_sts = 0;
989 struct iscsi_session *sess; 1140 struct iscsi_session *sess;
990 struct sockaddr *dst_addr; 1141 struct sockaddr *dst_addr;
991 int ret; 1142 int ret;
@@ -1000,32 +1151,9 @@ qla4xxx_session_create(struct iscsi_endpoint *ep,
1000 dst_addr = (struct sockaddr *)&qla_ep->dst_addr; 1151 dst_addr = (struct sockaddr *)&qla_ep->dst_addr;
1001 ha = to_qla_host(qla_ep->host); 1152 ha = to_qla_host(qla_ep->host);
1002 1153
1003get_ddb_index: 1154 ret = qla4xxx_get_ddb_index(ha, &ddb_index);
1004 ddb_index = find_first_zero_bit(ha->ddb_idx_map, MAX_DDB_ENTRIES); 1155 if (ret == QLA_ERROR)
1005
1006 if (ddb_index >= MAX_DDB_ENTRIES) {
1007 DEBUG2(ql4_printk(KERN_INFO, ha,
1008 "Free DDB index not available\n"));
1009 return NULL;
1010 }
1011
1012 if (test_and_set_bit(ddb_index, ha->ddb_idx_map))
1013 goto get_ddb_index;
1014
1015 DEBUG2(ql4_printk(KERN_INFO, ha,
1016 "Found a free DDB index at %d\n", ddb_index));
1017 ret = qla4xxx_req_ddb_entry(ha, ddb_index, &mbx_sts);
1018 if (ret == QLA_ERROR) {
1019 if (mbx_sts == MBOX_STS_COMMAND_ERROR) {
1020 ql4_printk(KERN_INFO, ha,
1021 "DDB index = %d not available trying next\n",
1022 ddb_index);
1023 goto get_ddb_index;
1024 }
1025 DEBUG2(ql4_printk(KERN_INFO, ha,
1026 "Free FW DDB not available\n"));
1027 return NULL; 1156 return NULL;
1028 }
1029 1157
1030 cls_sess = iscsi_session_setup(&qla4xxx_iscsi_transport, qla_ep->host, 1158 cls_sess = iscsi_session_setup(&qla4xxx_iscsi_transport, qla_ep->host,
1031 cmds_max, sizeof(struct ddb_entry), 1159 cmds_max, sizeof(struct ddb_entry),
@@ -1040,6 +1168,8 @@ get_ddb_index:
1040 ddb_entry->fw_ddb_device_state = DDB_DS_NO_CONNECTION_ACTIVE; 1168 ddb_entry->fw_ddb_device_state = DDB_DS_NO_CONNECTION_ACTIVE;
1041 ddb_entry->ha = ha; 1169 ddb_entry->ha = ha;
1042 ddb_entry->sess = cls_sess; 1170 ddb_entry->sess = cls_sess;
1171 ddb_entry->unblock_sess = qla4xxx_unblock_ddb;
1172 ddb_entry->ddb_change = qla4xxx_ddb_change;
1043 cls_sess->recovery_tmo = ql4xsess_recovery_tmo; 1173 cls_sess->recovery_tmo = ql4xsess_recovery_tmo;
1044 ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = ddb_entry; 1174 ha->fw_ddb_index_map[ddb_entry->fw_ddb_index] = ddb_entry;
1045 ha->tot_ddbs++; 1175 ha->tot_ddbs++;
@@ -1077,6 +1207,9 @@ qla4xxx_conn_create(struct iscsi_cls_session *cls_sess, uint32_t conn_idx)
1077 DEBUG2(printk(KERN_INFO "Func: %s\n", __func__)); 1207 DEBUG2(printk(KERN_INFO "Func: %s\n", __func__));
1078 cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), 1208 cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn),
1079 conn_idx); 1209 conn_idx);
1210 if (!cls_conn)
1211 return NULL;
1212
1080 sess = cls_sess->dd_data; 1213 sess = cls_sess->dd_data;
1081 ddb_entry = sess->dd_data; 1214 ddb_entry = sess->dd_data;
1082 ddb_entry->conn = cls_conn; 1215 ddb_entry->conn = cls_conn;
@@ -1109,7 +1242,7 @@ static int qla4xxx_conn_start(struct iscsi_cls_conn *cls_conn)
1109 struct iscsi_session *sess; 1242 struct iscsi_session *sess;
1110 struct ddb_entry *ddb_entry; 1243 struct ddb_entry *ddb_entry;
1111 struct scsi_qla_host *ha; 1244 struct scsi_qla_host *ha;
1112 struct dev_db_entry *fw_ddb_entry; 1245 struct dev_db_entry *fw_ddb_entry = NULL;
1113 dma_addr_t fw_ddb_entry_dma; 1246 dma_addr_t fw_ddb_entry_dma;
1114 uint32_t mbx_sts = 0; 1247 uint32_t mbx_sts = 0;
1115 int ret = 0; 1248 int ret = 0;
@@ -1120,12 +1253,25 @@ static int qla4xxx_conn_start(struct iscsi_cls_conn *cls_conn)
1120 ddb_entry = sess->dd_data; 1253 ddb_entry = sess->dd_data;
1121 ha = ddb_entry->ha; 1254 ha = ddb_entry->ha;
1122 1255
1256 /* Check if we have matching FW DDB, if yes then do not
1257 * login to this target. This could cause target to logout previous
1258 * connection
1259 */
1260 ret = qla4xxx_match_fwdb_session(ha, cls_conn);
1261 if (ret == QLA_SUCCESS) {
1262 ql4_printk(KERN_INFO, ha,
1263 "Session already exist in FW.\n");
1264 ret = -EEXIST;
1265 goto exit_conn_start;
1266 }
1267
1123 fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), 1268 fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
1124 &fw_ddb_entry_dma, GFP_KERNEL); 1269 &fw_ddb_entry_dma, GFP_KERNEL);
1125 if (!fw_ddb_entry) { 1270 if (!fw_ddb_entry) {
1126 ql4_printk(KERN_ERR, ha, 1271 ql4_printk(KERN_ERR, ha,
1127 "%s: Unable to allocate dma buffer\n", __func__); 1272 "%s: Unable to allocate dma buffer\n", __func__);
1128 return -ENOMEM; 1273 ret = -ENOMEM;
1274 goto exit_conn_start;
1129 } 1275 }
1130 1276
1131 ret = qla4xxx_set_param_ddbentry(ha, ddb_entry, cls_conn, &mbx_sts); 1277 ret = qla4xxx_set_param_ddbentry(ha, ddb_entry, cls_conn, &mbx_sts);
@@ -1138,9 +1284,7 @@ static int qla4xxx_conn_start(struct iscsi_cls_conn *cls_conn)
1138 if (mbx_sts) 1284 if (mbx_sts)
1139 if (ddb_entry->fw_ddb_device_state == 1285 if (ddb_entry->fw_ddb_device_state ==
1140 DDB_DS_SESSION_ACTIVE) { 1286 DDB_DS_SESSION_ACTIVE) {
1141 iscsi_conn_start(ddb_entry->conn); 1287 ddb_entry->unblock_sess(ddb_entry->sess);
1142 iscsi_conn_login_event(ddb_entry->conn,
1143 ISCSI_CONN_STATE_LOGGED_IN);
1144 goto exit_set_param; 1288 goto exit_set_param;
1145 } 1289 }
1146 1290
@@ -1167,8 +1311,9 @@ exit_set_param:
1167 ret = 0; 1311 ret = 0;
1168 1312
1169exit_conn_start: 1313exit_conn_start:
1170 dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), 1314 if (fw_ddb_entry)
1171 fw_ddb_entry, fw_ddb_entry_dma); 1315 dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
1316 fw_ddb_entry, fw_ddb_entry_dma);
1172 return ret; 1317 return ret;
1173} 1318}
1174 1319
@@ -1344,6 +1489,101 @@ static int qla4xxx_task_xmit(struct iscsi_task *task)
1344 return -ENOSYS; 1489 return -ENOSYS;
1345} 1490}
1346 1491
1492static void qla4xxx_copy_fwddb_param(struct scsi_qla_host *ha,
1493 struct dev_db_entry *fw_ddb_entry,
1494 struct iscsi_cls_session *cls_sess,
1495 struct iscsi_cls_conn *cls_conn)
1496{
1497 int buflen = 0;
1498 struct iscsi_session *sess;
1499 struct iscsi_conn *conn;
1500 char ip_addr[DDB_IPADDR_LEN];
1501 uint16_t options = 0;
1502
1503 sess = cls_sess->dd_data;
1504 conn = cls_conn->dd_data;
1505
1506 conn->max_recv_dlength = BYTE_UNITS *
1507 le16_to_cpu(fw_ddb_entry->iscsi_max_rcv_data_seg_len);
1508
1509 conn->max_xmit_dlength = BYTE_UNITS *
1510 le16_to_cpu(fw_ddb_entry->iscsi_max_snd_data_seg_len);
1511
1512 sess->initial_r2t_en =
1513 (BIT_10 & le16_to_cpu(fw_ddb_entry->iscsi_options));
1514
1515 sess->max_r2t = le16_to_cpu(fw_ddb_entry->iscsi_max_outsnd_r2t);
1516
1517 sess->imm_data_en = (BIT_11 & le16_to_cpu(fw_ddb_entry->iscsi_options));
1518
1519 sess->first_burst = BYTE_UNITS *
1520 le16_to_cpu(fw_ddb_entry->iscsi_first_burst_len);
1521
1522 sess->max_burst = BYTE_UNITS *
1523 le16_to_cpu(fw_ddb_entry->iscsi_max_burst_len);
1524
1525 sess->time2wait = le16_to_cpu(fw_ddb_entry->iscsi_def_time2wait);
1526
1527 sess->time2retain = le16_to_cpu(fw_ddb_entry->iscsi_def_time2retain);
1528
1529 conn->persistent_port = le16_to_cpu(fw_ddb_entry->port);
1530
1531 sess->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp);
1532
1533 options = le16_to_cpu(fw_ddb_entry->options);
1534 if (options & DDB_OPT_IPV6_DEVICE)
1535 sprintf(ip_addr, "%pI6", fw_ddb_entry->ip_addr);
1536 else
1537 sprintf(ip_addr, "%pI4", fw_ddb_entry->ip_addr);
1538
1539 iscsi_set_param(cls_conn, ISCSI_PARAM_TARGET_NAME,
1540 (char *)fw_ddb_entry->iscsi_name, buflen);
1541 iscsi_set_param(cls_conn, ISCSI_PARAM_INITIATOR_NAME,
1542 (char *)ha->name_string, buflen);
1543 iscsi_set_param(cls_conn, ISCSI_PARAM_PERSISTENT_ADDRESS,
1544 (char *)ip_addr, buflen);
1545}
1546
1547void qla4xxx_update_session_conn_fwddb_param(struct scsi_qla_host *ha,
1548 struct ddb_entry *ddb_entry)
1549{
1550 struct iscsi_cls_session *cls_sess;
1551 struct iscsi_cls_conn *cls_conn;
1552 uint32_t ddb_state;
1553 dma_addr_t fw_ddb_entry_dma;
1554 struct dev_db_entry *fw_ddb_entry;
1555
1556 fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
1557 &fw_ddb_entry_dma, GFP_KERNEL);
1558 if (!fw_ddb_entry) {
1559 ql4_printk(KERN_ERR, ha,
1560 "%s: Unable to allocate dma buffer\n", __func__);
1561 goto exit_session_conn_fwddb_param;
1562 }
1563
1564 if (qla4xxx_get_fwddb_entry(ha, ddb_entry->fw_ddb_index, fw_ddb_entry,
1565 fw_ddb_entry_dma, NULL, NULL, &ddb_state,
1566 NULL, NULL, NULL) == QLA_ERROR) {
1567 DEBUG2(ql4_printk(KERN_ERR, ha, "scsi%ld: %s: failed "
1568 "get_ddb_entry for fw_ddb_index %d\n",
1569 ha->host_no, __func__,
1570 ddb_entry->fw_ddb_index));
1571 goto exit_session_conn_fwddb_param;
1572 }
1573
1574 cls_sess = ddb_entry->sess;
1575
1576 cls_conn = ddb_entry->conn;
1577
1578 /* Update params */
1579 qla4xxx_copy_fwddb_param(ha, fw_ddb_entry, cls_sess, cls_conn);
1580
1581exit_session_conn_fwddb_param:
1582 if (fw_ddb_entry)
1583 dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
1584 fw_ddb_entry, fw_ddb_entry_dma);
1585}
1586
1347void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha, 1587void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha,
1348 struct ddb_entry *ddb_entry) 1588 struct ddb_entry *ddb_entry)
1349{ 1589{
@@ -1360,7 +1600,7 @@ void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha,
1360 if (!fw_ddb_entry) { 1600 if (!fw_ddb_entry) {
1361 ql4_printk(KERN_ERR, ha, 1601 ql4_printk(KERN_ERR, ha,
1362 "%s: Unable to allocate dma buffer\n", __func__); 1602 "%s: Unable to allocate dma buffer\n", __func__);
1363 return; 1603 goto exit_session_conn_param;
1364 } 1604 }
1365 1605
1366 if (qla4xxx_get_fwddb_entry(ha, ddb_entry->fw_ddb_index, fw_ddb_entry, 1606 if (qla4xxx_get_fwddb_entry(ha, ddb_entry->fw_ddb_index, fw_ddb_entry,
@@ -1370,7 +1610,7 @@ void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha,
1370 "get_ddb_entry for fw_ddb_index %d\n", 1610 "get_ddb_entry for fw_ddb_index %d\n",
1371 ha->host_no, __func__, 1611 ha->host_no, __func__,
1372 ddb_entry->fw_ddb_index)); 1612 ddb_entry->fw_ddb_index));
1373 return; 1613 goto exit_session_conn_param;
1374 } 1614 }
1375 1615
1376 cls_sess = ddb_entry->sess; 1616 cls_sess = ddb_entry->sess;
@@ -1379,6 +1619,12 @@ void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha,
1379 cls_conn = ddb_entry->conn; 1619 cls_conn = ddb_entry->conn;
1380 conn = cls_conn->dd_data; 1620 conn = cls_conn->dd_data;
1381 1621
1622 /* Update timers after login */
1623 ddb_entry->default_relogin_timeout =
1624 le16_to_cpu(fw_ddb_entry->def_timeout);
1625 ddb_entry->default_time2wait =
1626 le16_to_cpu(fw_ddb_entry->iscsi_def_time2wait);
1627
1382 /* Update params */ 1628 /* Update params */
1383 conn->max_recv_dlength = BYTE_UNITS * 1629 conn->max_recv_dlength = BYTE_UNITS *
1384 le16_to_cpu(fw_ddb_entry->iscsi_max_rcv_data_seg_len); 1630 le16_to_cpu(fw_ddb_entry->iscsi_max_rcv_data_seg_len);
@@ -1407,6 +1653,11 @@ void qla4xxx_update_session_conn_param(struct scsi_qla_host *ha,
1407 1653
1408 memcpy(sess->initiatorname, ha->name_string, 1654 memcpy(sess->initiatorname, ha->name_string,
1409 min(sizeof(ha->name_string), sizeof(sess->initiatorname))); 1655 min(sizeof(ha->name_string), sizeof(sess->initiatorname)));
1656
1657exit_session_conn_param:
1658 if (fw_ddb_entry)
1659 dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
1660 fw_ddb_entry, fw_ddb_entry_dma);
1410} 1661}
1411 1662
1412/* 1663/*
@@ -1607,6 +1858,9 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha)
1607 vfree(ha->chap_list); 1858 vfree(ha->chap_list);
1608 ha->chap_list = NULL; 1859 ha->chap_list = NULL;
1609 1860
1861 if (ha->fw_ddb_dma_pool)
1862 dma_pool_destroy(ha->fw_ddb_dma_pool);
1863
1610 /* release io space registers */ 1864 /* release io space registers */
1611 if (is_qla8022(ha)) { 1865 if (is_qla8022(ha)) {
1612 if (ha->nx_pcibase) 1866 if (ha->nx_pcibase)
@@ -1689,6 +1943,16 @@ static int qla4xxx_mem_alloc(struct scsi_qla_host *ha)
1689 goto mem_alloc_error_exit; 1943 goto mem_alloc_error_exit;
1690 } 1944 }
1691 1945
1946 ha->fw_ddb_dma_pool = dma_pool_create("ql4_fw_ddb", &ha->pdev->dev,
1947 DDB_DMA_BLOCK_SIZE, 8, 0);
1948
1949 if (ha->fw_ddb_dma_pool == NULL) {
1950 ql4_printk(KERN_WARNING, ha,
1951 "%s: fw_ddb_dma_pool allocation failed..\n",
1952 __func__);
1953 goto mem_alloc_error_exit;
1954 }
1955
1692 return QLA_SUCCESS; 1956 return QLA_SUCCESS;
1693 1957
1694mem_alloc_error_exit: 1958mem_alloc_error_exit:
@@ -1800,6 +2064,60 @@ void qla4_8xxx_watchdog(struct scsi_qla_host *ha)
1800 } 2064 }
1801} 2065}
1802 2066
2067void qla4xxx_check_relogin_flash_ddb(struct iscsi_cls_session *cls_sess)
2068{
2069 struct iscsi_session *sess;
2070 struct ddb_entry *ddb_entry;
2071 struct scsi_qla_host *ha;
2072
2073 sess = cls_sess->dd_data;
2074 ddb_entry = sess->dd_data;
2075 ha = ddb_entry->ha;
2076
2077 if (!(ddb_entry->ddb_type == FLASH_DDB))
2078 return;
2079
2080 if (adapter_up(ha) && !test_bit(DF_RELOGIN, &ddb_entry->flags) &&
2081 !iscsi_is_session_online(cls_sess)) {
2082 if (atomic_read(&ddb_entry->retry_relogin_timer) !=
2083 INVALID_ENTRY) {
2084 if (atomic_read(&ddb_entry->retry_relogin_timer) ==
2085 0) {
2086 atomic_set(&ddb_entry->retry_relogin_timer,
2087 INVALID_ENTRY);
2088 set_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags);
2089 set_bit(DF_RELOGIN, &ddb_entry->flags);
2090 DEBUG2(ql4_printk(KERN_INFO, ha,
2091 "%s: index [%d] login device\n",
2092 __func__, ddb_entry->fw_ddb_index));
2093 } else
2094 atomic_dec(&ddb_entry->retry_relogin_timer);
2095 }
2096 }
2097
2098 /* Wait for relogin to timeout */
2099 if (atomic_read(&ddb_entry->relogin_timer) &&
2100 (atomic_dec_and_test(&ddb_entry->relogin_timer) != 0)) {
2101 /*
2102 * If the relogin times out and the device is
2103 * still NOT ONLINE then try and relogin again.
2104 */
2105 if (!iscsi_is_session_online(cls_sess)) {
2106 /* Reset retry relogin timer */
2107 atomic_inc(&ddb_entry->relogin_retry_count);
2108 DEBUG2(ql4_printk(KERN_INFO, ha,
2109 "%s: index[%d] relogin timed out-retrying"
2110 " relogin (%d), retry (%d)\n", __func__,
2111 ddb_entry->fw_ddb_index,
2112 atomic_read(&ddb_entry->relogin_retry_count),
2113 ddb_entry->default_time2wait + 4));
2114 set_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags);
2115 atomic_set(&ddb_entry->retry_relogin_timer,
2116 ddb_entry->default_time2wait + 4);
2117 }
2118 }
2119}
2120
1803/** 2121/**
1804 * qla4xxx_timer - checks every second for work to do. 2122 * qla4xxx_timer - checks every second for work to do.
1805 * @ha: Pointer to host adapter structure. 2123 * @ha: Pointer to host adapter structure.
@@ -1809,6 +2127,8 @@ static void qla4xxx_timer(struct scsi_qla_host *ha)
1809 int start_dpc = 0; 2127 int start_dpc = 0;
1810 uint16_t w; 2128 uint16_t w;
1811 2129
2130 iscsi_host_for_each_session(ha->host, qla4xxx_check_relogin_flash_ddb);
2131
1812 /* If we are in the middle of AER/EEH processing 2132 /* If we are in the middle of AER/EEH processing
1813 * skip any processing and reschedule the timer 2133 * skip any processing and reschedule the timer
1814 */ 2134 */
@@ -2078,7 +2398,12 @@ static void qla4xxx_fail_session(struct iscsi_cls_session *cls_session)
2078 sess = cls_session->dd_data; 2398 sess = cls_session->dd_data;
2079 ddb_entry = sess->dd_data; 2399 ddb_entry = sess->dd_data;
2080 ddb_entry->fw_ddb_device_state = DDB_DS_SESSION_FAILED; 2400 ddb_entry->fw_ddb_device_state = DDB_DS_SESSION_FAILED;
2081 iscsi_session_failure(cls_session->dd_data, ISCSI_ERR_CONN_FAILED); 2401
2402 if (ddb_entry->ddb_type == FLASH_DDB)
2403 iscsi_block_session(ddb_entry->sess);
2404 else
2405 iscsi_session_failure(cls_session->dd_data,
2406 ISCSI_ERR_CONN_FAILED);
2082} 2407}
2083 2408
2084/** 2409/**
@@ -2163,7 +2488,7 @@ recover_ha_init_adapter:
2163 2488
2164 /* NOTE: AF_ONLINE flag set upon successful completion of 2489 /* NOTE: AF_ONLINE flag set upon successful completion of
2165 * qla4xxx_initialize_adapter */ 2490 * qla4xxx_initialize_adapter */
2166 status = qla4xxx_initialize_adapter(ha); 2491 status = qla4xxx_initialize_adapter(ha, RESET_ADAPTER);
2167 } 2492 }
2168 2493
2169 /* Retry failed adapter initialization, if necessary 2494 /* Retry failed adapter initialization, if necessary
@@ -2245,17 +2570,108 @@ static void qla4xxx_relogin_devices(struct iscsi_cls_session *cls_session)
2245 iscsi_unblock_session(ddb_entry->sess); 2570 iscsi_unblock_session(ddb_entry->sess);
2246 } else { 2571 } else {
2247 /* Trigger relogin */ 2572 /* Trigger relogin */
2248 iscsi_session_failure(cls_session->dd_data, 2573 if (ddb_entry->ddb_type == FLASH_DDB) {
2249 ISCSI_ERR_CONN_FAILED); 2574 if (!test_bit(DF_RELOGIN, &ddb_entry->flags))
2575 qla4xxx_arm_relogin_timer(ddb_entry);
2576 } else
2577 iscsi_session_failure(cls_session->dd_data,
2578 ISCSI_ERR_CONN_FAILED);
2250 } 2579 }
2251 } 2580 }
2252} 2581}
2253 2582
2583int qla4xxx_unblock_flash_ddb(struct iscsi_cls_session *cls_session)
2584{
2585 struct iscsi_session *sess;
2586 struct ddb_entry *ddb_entry;
2587 struct scsi_qla_host *ha;
2588
2589 sess = cls_session->dd_data;
2590 ddb_entry = sess->dd_data;
2591 ha = ddb_entry->ha;
2592 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: ddb[%d]"
2593 " unblock session\n", ha->host_no, __func__,
2594 ddb_entry->fw_ddb_index);
2595
2596 iscsi_unblock_session(ddb_entry->sess);
2597
2598 /* Start scan target */
2599 if (test_bit(AF_ONLINE, &ha->flags)) {
2600 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: ddb[%d]"
2601 " start scan\n", ha->host_no, __func__,
2602 ddb_entry->fw_ddb_index);
2603 scsi_queue_work(ha->host, &ddb_entry->sess->scan_work);
2604 }
2605 return QLA_SUCCESS;
2606}
2607
2608int qla4xxx_unblock_ddb(struct iscsi_cls_session *cls_session)
2609{
2610 struct iscsi_session *sess;
2611 struct ddb_entry *ddb_entry;
2612 struct scsi_qla_host *ha;
2613
2614 sess = cls_session->dd_data;
2615 ddb_entry = sess->dd_data;
2616 ha = ddb_entry->ha;
2617 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: ddb[%d]"
2618 " unblock user space session\n", ha->host_no, __func__,
2619 ddb_entry->fw_ddb_index);
2620 iscsi_conn_start(ddb_entry->conn);
2621 iscsi_conn_login_event(ddb_entry->conn,
2622 ISCSI_CONN_STATE_LOGGED_IN);
2623
2624 return QLA_SUCCESS;
2625}
2626
2254static void qla4xxx_relogin_all_devices(struct scsi_qla_host *ha) 2627static void qla4xxx_relogin_all_devices(struct scsi_qla_host *ha)
2255{ 2628{
2256 iscsi_host_for_each_session(ha->host, qla4xxx_relogin_devices); 2629 iscsi_host_for_each_session(ha->host, qla4xxx_relogin_devices);
2257} 2630}
2258 2631
2632static void qla4xxx_relogin_flash_ddb(struct iscsi_cls_session *cls_sess)
2633{
2634 uint16_t relogin_timer;
2635 struct iscsi_session *sess;
2636 struct ddb_entry *ddb_entry;
2637 struct scsi_qla_host *ha;
2638
2639 sess = cls_sess->dd_data;
2640 ddb_entry = sess->dd_data;
2641 ha = ddb_entry->ha;
2642
2643 relogin_timer = max(ddb_entry->default_relogin_timeout,
2644 (uint16_t)RELOGIN_TOV);
2645 atomic_set(&ddb_entry->relogin_timer, relogin_timer);
2646
2647 DEBUG2(ql4_printk(KERN_INFO, ha,
2648 "scsi%ld: Relogin index [%d]. TOV=%d\n", ha->host_no,
2649 ddb_entry->fw_ddb_index, relogin_timer));
2650
2651 qla4xxx_login_flash_ddb(cls_sess);
2652}
2653
2654static void qla4xxx_dpc_relogin(struct iscsi_cls_session *cls_sess)
2655{
2656 struct iscsi_session *sess;
2657 struct ddb_entry *ddb_entry;
2658 struct scsi_qla_host *ha;
2659
2660 sess = cls_sess->dd_data;
2661 ddb_entry = sess->dd_data;
2662 ha = ddb_entry->ha;
2663
2664 if (!(ddb_entry->ddb_type == FLASH_DDB))
2665 return;
2666
2667 if (test_and_clear_bit(DF_RELOGIN, &ddb_entry->flags) &&
2668 !iscsi_is_session_online(cls_sess)) {
2669 DEBUG2(ql4_printk(KERN_INFO, ha,
2670 "relogin issued\n"));
2671 qla4xxx_relogin_flash_ddb(cls_sess);
2672 }
2673}
2674
2259void qla4xxx_wake_dpc(struct scsi_qla_host *ha) 2675void qla4xxx_wake_dpc(struct scsi_qla_host *ha)
2260{ 2676{
2261 if (ha->dpc_thread) 2677 if (ha->dpc_thread)
@@ -2356,6 +2772,12 @@ dpc_post_reset_ha:
2356 if (test_and_clear_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags)) 2772 if (test_and_clear_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags))
2357 qla4xxx_get_dhcp_ip_address(ha); 2773 qla4xxx_get_dhcp_ip_address(ha);
2358 2774
2775 /* ---- relogin device? --- */
2776 if (adapter_up(ha) &&
2777 test_and_clear_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags)) {
2778 iscsi_host_for_each_session(ha->host, qla4xxx_dpc_relogin);
2779 }
2780
2359 /* ---- link change? --- */ 2781 /* ---- link change? --- */
2360 if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { 2782 if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) {
2361 if (!test_bit(AF_LINK_UP, &ha->flags)) { 2783 if (!test_bit(AF_LINK_UP, &ha->flags)) {
@@ -2368,8 +2790,12 @@ dpc_post_reset_ha:
2368 * fatal error recovery. Therefore, the driver must 2790 * fatal error recovery. Therefore, the driver must
2369 * manually relogin to devices when recovering from 2791 * manually relogin to devices when recovering from
2370 * connection failures, logouts, expired KATO, etc. */ 2792 * connection failures, logouts, expired KATO, etc. */
2371 2793 if (test_and_clear_bit(AF_BUILD_DDB_LIST, &ha->flags)) {
2372 qla4xxx_relogin_all_devices(ha); 2794 qla4xxx_build_ddb_list(ha, ha->is_reset);
2795 iscsi_host_for_each_session(ha->host,
2796 qla4xxx_login_flash_ddb);
2797 } else
2798 qla4xxx_relogin_all_devices(ha);
2373 } 2799 }
2374 } 2800 }
2375} 2801}
@@ -2867,6 +3293,9 @@ static int get_fw_boot_info(struct scsi_qla_host *ha, uint16_t ddb_index[])
2867 " target ID %d\n", __func__, ddb_index[0], 3293 " target ID %d\n", __func__, ddb_index[0],
2868 ddb_index[1])); 3294 ddb_index[1]));
2869 3295
3296 ha->pri_ddb_idx = ddb_index[0];
3297 ha->sec_ddb_idx = ddb_index[1];
3298
2870exit_boot_info_free: 3299exit_boot_info_free:
2871 dma_free_coherent(&ha->pdev->dev, size, buf, buf_dma); 3300 dma_free_coherent(&ha->pdev->dev, size, buf, buf_dma);
2872exit_boot_info: 3301exit_boot_info:
@@ -3034,6 +3463,9 @@ static int qla4xxx_get_boot_info(struct scsi_qla_host *ha)
3034 return ret; 3463 return ret;
3035 } 3464 }
3036 3465
3466 if (ql4xdisablesysfsboot)
3467 return QLA_SUCCESS;
3468
3037 if (ddb_index[0] == 0xffff) 3469 if (ddb_index[0] == 0xffff)
3038 goto sec_target; 3470 goto sec_target;
3039 3471
@@ -3066,7 +3498,15 @@ static int qla4xxx_setup_boot_info(struct scsi_qla_host *ha)
3066 struct iscsi_boot_kobj *boot_kobj; 3498 struct iscsi_boot_kobj *boot_kobj;
3067 3499
3068 if (qla4xxx_get_boot_info(ha) != QLA_SUCCESS) 3500 if (qla4xxx_get_boot_info(ha) != QLA_SUCCESS)
3069 return 0; 3501 return QLA_ERROR;
3502
3503 if (ql4xdisablesysfsboot) {
3504 ql4_printk(KERN_INFO, ha,
3505 "%s: syfsboot disabled - driver will trigger login"
3506 "and publish session for discovery .\n", __func__);
3507 return QLA_SUCCESS;
3508 }
3509
3070 3510
3071 ha->boot_kset = iscsi_boot_create_host_kset(ha->host->host_no); 3511 ha->boot_kset = iscsi_boot_create_host_kset(ha->host->host_no);
3072 if (!ha->boot_kset) 3512 if (!ha->boot_kset)
@@ -3108,7 +3548,7 @@ static int qla4xxx_setup_boot_info(struct scsi_qla_host *ha)
3108 if (!boot_kobj) 3548 if (!boot_kobj)
3109 goto put_host; 3549 goto put_host;
3110 3550
3111 return 0; 3551 return QLA_SUCCESS;
3112 3552
3113put_host: 3553put_host:
3114 scsi_host_put(ha->host); 3554 scsi_host_put(ha->host);
@@ -3174,9 +3614,507 @@ static void qla4xxx_create_chap_list(struct scsi_qla_host *ha)
3174exit_chap_list: 3614exit_chap_list:
3175 dma_free_coherent(&ha->pdev->dev, chap_size, 3615 dma_free_coherent(&ha->pdev->dev, chap_size,
3176 chap_flash_data, chap_dma); 3616 chap_flash_data, chap_dma);
3177 return;
3178} 3617}
3179 3618
3619static void qla4xxx_get_param_ddb(struct ddb_entry *ddb_entry,
3620 struct ql4_tuple_ddb *tddb)
3621{
3622 struct scsi_qla_host *ha;
3623 struct iscsi_cls_session *cls_sess;
3624 struct iscsi_cls_conn *cls_conn;
3625 struct iscsi_session *sess;
3626 struct iscsi_conn *conn;
3627
3628 DEBUG2(printk(KERN_INFO "Func: %s\n", __func__));
3629 ha = ddb_entry->ha;
3630 cls_sess = ddb_entry->sess;
3631 sess = cls_sess->dd_data;
3632 cls_conn = ddb_entry->conn;
3633 conn = cls_conn->dd_data;
3634
3635 tddb->tpgt = sess->tpgt;
3636 tddb->port = conn->persistent_port;
3637 strncpy(tddb->iscsi_name, sess->targetname, ISCSI_NAME_SIZE);
3638 strncpy(tddb->ip_addr, conn->persistent_address, DDB_IPADDR_LEN);
3639}
3640
3641static void qla4xxx_convert_param_ddb(struct dev_db_entry *fw_ddb_entry,
3642 struct ql4_tuple_ddb *tddb)
3643{
3644 uint16_t options = 0;
3645
3646 tddb->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp);
3647 memcpy(&tddb->iscsi_name[0], &fw_ddb_entry->iscsi_name[0],
3648 min(sizeof(tddb->iscsi_name), sizeof(fw_ddb_entry->iscsi_name)));
3649
3650 options = le16_to_cpu(fw_ddb_entry->options);
3651 if (options & DDB_OPT_IPV6_DEVICE)
3652 sprintf(tddb->ip_addr, "%pI6", fw_ddb_entry->ip_addr);
3653 else
3654 sprintf(tddb->ip_addr, "%pI4", fw_ddb_entry->ip_addr);
3655
3656 tddb->port = le16_to_cpu(fw_ddb_entry->port);
3657}
3658
3659static int qla4xxx_compare_tuple_ddb(struct scsi_qla_host *ha,
3660 struct ql4_tuple_ddb *old_tddb,
3661 struct ql4_tuple_ddb *new_tddb)
3662{
3663 if (strcmp(old_tddb->iscsi_name, new_tddb->iscsi_name))
3664 return QLA_ERROR;
3665
3666 if (strcmp(old_tddb->ip_addr, new_tddb->ip_addr))
3667 return QLA_ERROR;
3668
3669 if (old_tddb->port != new_tddb->port)
3670 return QLA_ERROR;
3671
3672 DEBUG2(ql4_printk(KERN_INFO, ha,
3673 "Match Found, fw[%d,%d,%s,%s], [%d,%d,%s,%s]",
3674 old_tddb->port, old_tddb->tpgt, old_tddb->ip_addr,
3675 old_tddb->iscsi_name, new_tddb->port, new_tddb->tpgt,
3676 new_tddb->ip_addr, new_tddb->iscsi_name));
3677
3678 return QLA_SUCCESS;
3679}
3680
3681static int qla4xxx_is_session_exists(struct scsi_qla_host *ha,
3682 struct dev_db_entry *fw_ddb_entry)
3683{
3684 struct ddb_entry *ddb_entry;
3685 struct ql4_tuple_ddb *fw_tddb = NULL;
3686 struct ql4_tuple_ddb *tmp_tddb = NULL;
3687 int idx;
3688 int ret = QLA_ERROR;
3689
3690 fw_tddb = vzalloc(sizeof(*fw_tddb));
3691 if (!fw_tddb) {
3692 DEBUG2(ql4_printk(KERN_WARNING, ha,
3693 "Memory Allocation failed.\n"));
3694 ret = QLA_SUCCESS;
3695 goto exit_check;
3696 }
3697
3698 tmp_tddb = vzalloc(sizeof(*tmp_tddb));
3699 if (!tmp_tddb) {
3700 DEBUG2(ql4_printk(KERN_WARNING, ha,
3701 "Memory Allocation failed.\n"));
3702 ret = QLA_SUCCESS;
3703 goto exit_check;
3704 }
3705
3706 qla4xxx_convert_param_ddb(fw_ddb_entry, fw_tddb);
3707
3708 for (idx = 0; idx < MAX_DDB_ENTRIES; idx++) {
3709 ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx);
3710 if (ddb_entry == NULL)
3711 continue;
3712
3713 qla4xxx_get_param_ddb(ddb_entry, tmp_tddb);
3714 if (!qla4xxx_compare_tuple_ddb(ha, fw_tddb, tmp_tddb)) {
3715 ret = QLA_SUCCESS; /* found */
3716 goto exit_check;
3717 }
3718 }
3719
3720exit_check:
3721 if (fw_tddb)
3722 vfree(fw_tddb);
3723 if (tmp_tddb)
3724 vfree(tmp_tddb);
3725 return ret;
3726}
3727
3728static int qla4xxx_is_flash_ddb_exists(struct scsi_qla_host *ha,
3729 struct list_head *list_nt,
3730 struct dev_db_entry *fw_ddb_entry)
3731{
3732 struct qla_ddb_index *nt_ddb_idx, *nt_ddb_idx_tmp;
3733 struct ql4_tuple_ddb *fw_tddb = NULL;
3734 struct ql4_tuple_ddb *tmp_tddb = NULL;
3735 int ret = QLA_ERROR;
3736
3737 fw_tddb = vzalloc(sizeof(*fw_tddb));
3738 if (!fw_tddb) {
3739 DEBUG2(ql4_printk(KERN_WARNING, ha,
3740 "Memory Allocation failed.\n"));
3741 ret = QLA_SUCCESS;
3742 goto exit_check;
3743 }
3744
3745 tmp_tddb = vzalloc(sizeof(*tmp_tddb));
3746 if (!tmp_tddb) {
3747 DEBUG2(ql4_printk(KERN_WARNING, ha,
3748 "Memory Allocation failed.\n"));
3749 ret = QLA_SUCCESS;
3750 goto exit_check;
3751 }
3752
3753 qla4xxx_convert_param_ddb(fw_ddb_entry, fw_tddb);
3754
3755 list_for_each_entry_safe(nt_ddb_idx, nt_ddb_idx_tmp, list_nt, list) {
3756 qla4xxx_convert_param_ddb(&nt_ddb_idx->fw_ddb, tmp_tddb);
3757 if (!qla4xxx_compare_tuple_ddb(ha, fw_tddb, tmp_tddb)) {
3758 ret = QLA_SUCCESS; /* found */
3759 goto exit_check;
3760 }
3761 }
3762
3763exit_check:
3764 if (fw_tddb)
3765 vfree(fw_tddb);
3766 if (tmp_tddb)
3767 vfree(tmp_tddb);
3768 return ret;
3769}
3770
3771static void qla4xxx_free_nt_list(struct list_head *list_nt)
3772{
3773 struct qla_ddb_index *nt_ddb_idx, *nt_ddb_idx_tmp;
3774
3775 /* Free up the normaltargets list */
3776 list_for_each_entry_safe(nt_ddb_idx, nt_ddb_idx_tmp, list_nt, list) {
3777 list_del_init(&nt_ddb_idx->list);
3778 vfree(nt_ddb_idx);
3779 }
3780
3781}
3782
3783static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha,
3784 struct dev_db_entry *fw_ddb_entry)
3785{
3786 struct iscsi_endpoint *ep;
3787 struct sockaddr_in *addr;
3788 struct sockaddr_in6 *addr6;
3789 struct sockaddr *dst_addr;
3790 char *ip;
3791
3792 /* TODO: need to destroy on unload iscsi_endpoint*/
3793 dst_addr = vmalloc(sizeof(*dst_addr));
3794 if (!dst_addr)
3795 return NULL;
3796
3797 if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) {
3798 dst_addr->sa_family = AF_INET6;
3799 addr6 = (struct sockaddr_in6 *)dst_addr;
3800 ip = (char *)&addr6->sin6_addr;
3801 memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN);
3802 addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port));
3803
3804 } else {
3805 dst_addr->sa_family = AF_INET;
3806 addr = (struct sockaddr_in *)dst_addr;
3807 ip = (char *)&addr->sin_addr;
3808 memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN);
3809 addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port));
3810 }
3811
3812 ep = qla4xxx_ep_connect(ha->host, dst_addr, 0);
3813 vfree(dst_addr);
3814 return ep;
3815}
3816
3817static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx)
3818{
3819 if (ql4xdisablesysfsboot)
3820 return QLA_SUCCESS;
3821 if (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx)
3822 return QLA_ERROR;
3823 return QLA_SUCCESS;
3824}
3825
3826static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha,
3827 struct ddb_entry *ddb_entry)
3828{
3829 ddb_entry->ddb_type = FLASH_DDB;
3830 ddb_entry->fw_ddb_index = INVALID_ENTRY;
3831 ddb_entry->fw_ddb_device_state = DDB_DS_NO_CONNECTION_ACTIVE;
3832 ddb_entry->ha = ha;
3833 ddb_entry->unblock_sess = qla4xxx_unblock_flash_ddb;
3834 ddb_entry->ddb_change = qla4xxx_flash_ddb_change;
3835
3836 atomic_set(&ddb_entry->retry_relogin_timer, INVALID_ENTRY);
3837 atomic_set(&ddb_entry->relogin_timer, 0);
3838 atomic_set(&ddb_entry->relogin_retry_count, 0);
3839
3840 ddb_entry->default_relogin_timeout =
3841 le16_to_cpu(ddb_entry->fw_ddb_entry.def_timeout);
3842 ddb_entry->default_time2wait =
3843 le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait);
3844}
3845
3846static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha)
3847{
3848 uint32_t idx = 0;
3849 uint32_t ip_idx[IP_ADDR_COUNT] = {0, 1, 2, 3}; /* 4 IP interfaces */
3850 uint32_t sts[MBOX_REG_COUNT];
3851 uint32_t ip_state;
3852 unsigned long wtime;
3853 int ret;
3854
3855 wtime = jiffies + (HZ * IP_CONFIG_TOV);
3856 do {
3857 for (idx = 0; idx < IP_ADDR_COUNT; idx++) {
3858 if (ip_idx[idx] == -1)
3859 continue;
3860
3861 ret = qla4xxx_get_ip_state(ha, 0, ip_idx[idx], sts);
3862
3863 if (ret == QLA_ERROR) {
3864 ip_idx[idx] = -1;
3865 continue;
3866 }
3867
3868 ip_state = (sts[1] & IP_STATE_MASK) >> IP_STATE_SHIFT;
3869
3870 DEBUG2(ql4_printk(KERN_INFO, ha,
3871 "Waiting for IP state for idx = %d, state = 0x%x\n",
3872 ip_idx[idx], ip_state));
3873 if (ip_state == IP_ADDRSTATE_UNCONFIGURED ||
3874 ip_state == IP_ADDRSTATE_INVALID ||
3875 ip_state == IP_ADDRSTATE_PREFERRED ||
3876 ip_state == IP_ADDRSTATE_DEPRICATED ||
3877 ip_state == IP_ADDRSTATE_DISABLING)
3878 ip_idx[idx] = -1;
3879
3880 }
3881
3882 /* Break if all IP states checked */
3883 if ((ip_idx[0] == -1) &&
3884 (ip_idx[1] == -1) &&
3885 (ip_idx[2] == -1) &&
3886 (ip_idx[3] == -1))
3887 break;
3888 schedule_timeout_uninterruptible(HZ);
3889 } while (time_after(wtime, jiffies));
3890}
3891
3892void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset)
3893{
3894 int max_ddbs;
3895 int ret;
3896 uint32_t idx = 0, next_idx = 0;
3897 uint32_t state = 0, conn_err = 0;
3898 uint16_t conn_id;
3899 struct dev_db_entry *fw_ddb_entry;
3900 struct ddb_entry *ddb_entry = NULL;
3901 dma_addr_t fw_ddb_dma;
3902 struct iscsi_cls_session *cls_sess;
3903 struct iscsi_session *sess;
3904 struct iscsi_cls_conn *cls_conn;
3905 struct iscsi_endpoint *ep;
3906 uint16_t cmds_max = 32, tmo = 0;
3907 uint32_t initial_cmdsn = 0;
3908 struct list_head list_st, list_nt; /* List of sendtargets */
3909 struct qla_ddb_index *st_ddb_idx, *st_ddb_idx_tmp;
3910 int fw_idx_size;
3911 unsigned long wtime;
3912 struct qla_ddb_index *nt_ddb_idx;
3913
3914 if (!test_bit(AF_LINK_UP, &ha->flags)) {
3915 set_bit(AF_BUILD_DDB_LIST, &ha->flags);
3916 ha->is_reset = is_reset;
3917 return;
3918 }
3919 max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX :
3920 MAX_DEV_DB_ENTRIES;
3921
3922 fw_ddb_entry = dma_pool_alloc(ha->fw_ddb_dma_pool, GFP_KERNEL,
3923 &fw_ddb_dma);
3924 if (fw_ddb_entry == NULL) {
3925 DEBUG2(ql4_printk(KERN_ERR, ha, "Out of memory\n"));
3926 goto exit_ddb_list;
3927 }
3928
3929 INIT_LIST_HEAD(&list_st);
3930 INIT_LIST_HEAD(&list_nt);
3931 fw_idx_size = sizeof(struct qla_ddb_index);
3932
3933 for (idx = 0; idx < max_ddbs; idx = next_idx) {
3934 ret = qla4xxx_get_fwddb_entry(ha, idx, fw_ddb_entry,
3935 fw_ddb_dma, NULL,
3936 &next_idx, &state, &conn_err,
3937 NULL, &conn_id);
3938 if (ret == QLA_ERROR)
3939 break;
3940
3941 if (qla4xxx_verify_boot_idx(ha, idx) != QLA_SUCCESS)
3942 goto continue_next_st;
3943
3944 /* Check if ST, add to the list_st */
3945 if (strlen((char *) fw_ddb_entry->iscsi_name) != 0)
3946 goto continue_next_st;
3947
3948 st_ddb_idx = vzalloc(fw_idx_size);
3949 if (!st_ddb_idx)
3950 break;
3951
3952 st_ddb_idx->fw_ddb_idx = idx;
3953
3954 list_add_tail(&st_ddb_idx->list, &list_st);
3955continue_next_st:
3956 if (next_idx == 0)
3957 break;
3958 }
3959
3960 /* Before issuing conn open mbox, ensure all IPs states are configured
3961 * Note, conn open fails if IPs are not configured
3962 */
3963 qla4xxx_wait_for_ip_configuration(ha);
3964
3965 /* Go thru the STs and fire the sendtargets by issuing conn open mbx */
3966 list_for_each_entry_safe(st_ddb_idx, st_ddb_idx_tmp, &list_st, list) {
3967 qla4xxx_conn_open(ha, st_ddb_idx->fw_ddb_idx);
3968 }
3969
3970 /* Wait to ensure all sendtargets are done for min 12 sec wait */
3971 tmo = ((ha->def_timeout < LOGIN_TOV) ? LOGIN_TOV : ha->def_timeout);
3972 DEBUG2(ql4_printk(KERN_INFO, ha,
3973 "Default time to wait for build ddb %d\n", tmo));
3974
3975 wtime = jiffies + (HZ * tmo);
3976 do {
3977 list_for_each_entry_safe(st_ddb_idx, st_ddb_idx_tmp, &list_st,
3978 list) {
3979 ret = qla4xxx_get_fwddb_entry(ha,
3980 st_ddb_idx->fw_ddb_idx,
3981 NULL, 0, NULL, &next_idx,
3982 &state, &conn_err, NULL,
3983 NULL);
3984 if (ret == QLA_ERROR)
3985 continue;
3986
3987 if (state == DDB_DS_NO_CONNECTION_ACTIVE ||
3988 state == DDB_DS_SESSION_FAILED) {
3989 list_del_init(&st_ddb_idx->list);
3990 vfree(st_ddb_idx);
3991 }
3992 }
3993 schedule_timeout_uninterruptible(HZ / 10);
3994 } while (time_after(wtime, jiffies));
3995
3996 /* Free up the sendtargets list */
3997 list_for_each_entry_safe(st_ddb_idx, st_ddb_idx_tmp, &list_st, list) {
3998 list_del_init(&st_ddb_idx->list);
3999 vfree(st_ddb_idx);
4000 }
4001
4002 for (idx = 0; idx < max_ddbs; idx = next_idx) {
4003 ret = qla4xxx_get_fwddb_entry(ha, idx, fw_ddb_entry,
4004 fw_ddb_dma, NULL,
4005 &next_idx, &state, &conn_err,
4006 NULL, &conn_id);
4007 if (ret == QLA_ERROR)
4008 break;
4009
4010 if (qla4xxx_verify_boot_idx(ha, idx) != QLA_SUCCESS)
4011 goto continue_next_nt;
4012
4013 /* Check if NT, then add to list it */
4014 if (strlen((char *) fw_ddb_entry->iscsi_name) == 0)
4015 goto continue_next_nt;
4016
4017 if (state == DDB_DS_NO_CONNECTION_ACTIVE ||
4018 state == DDB_DS_SESSION_FAILED) {
4019 DEBUG2(ql4_printk(KERN_INFO, ha,
4020 "Adding DDB to session = 0x%x\n",
4021 idx));
4022 if (is_reset == INIT_ADAPTER) {
4023 nt_ddb_idx = vmalloc(fw_idx_size);
4024 if (!nt_ddb_idx)
4025 break;
4026
4027 nt_ddb_idx->fw_ddb_idx = idx;
4028
4029 memcpy(&nt_ddb_idx->fw_ddb, fw_ddb_entry,
4030 sizeof(struct dev_db_entry));
4031
4032 if (qla4xxx_is_flash_ddb_exists(ha, &list_nt,
4033 fw_ddb_entry) == QLA_SUCCESS) {
4034 vfree(nt_ddb_idx);
4035 goto continue_next_nt;
4036 }
4037 list_add_tail(&nt_ddb_idx->list, &list_nt);
4038 } else if (is_reset == RESET_ADAPTER) {
4039 if (qla4xxx_is_session_exists(ha,
4040 fw_ddb_entry) == QLA_SUCCESS)
4041 goto continue_next_nt;
4042 }
4043
4044 /* Create session object, with INVALID_ENTRY,
4045 * the targer_id would get set when we issue the login
4046 */
4047 cls_sess = iscsi_session_setup(&qla4xxx_iscsi_transport,
4048 ha->host, cmds_max,
4049 sizeof(struct ddb_entry),
4050 sizeof(struct ql4_task_data),
4051 initial_cmdsn, INVALID_ENTRY);
4052 if (!cls_sess)
4053 goto exit_ddb_list;
4054
4055 /*
4056 * iscsi_session_setup increments the driver reference
4057 * count which wouldn't let the driver to be unloaded.
4058 * so calling module_put function to decrement the
4059 * reference count.
4060 **/
4061 module_put(qla4xxx_iscsi_transport.owner);
4062 sess = cls_sess->dd_data;
4063 ddb_entry = sess->dd_data;
4064 ddb_entry->sess = cls_sess;
4065
4066 cls_sess->recovery_tmo = ql4xsess_recovery_tmo;
4067 memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry,
4068 sizeof(struct dev_db_entry));
4069
4070 qla4xxx_setup_flash_ddb_entry(ha, ddb_entry);
4071
4072 cls_conn = iscsi_conn_setup(cls_sess,
4073 sizeof(struct qla_conn),
4074 conn_id);
4075 if (!cls_conn)
4076 goto exit_ddb_list;
4077
4078 ddb_entry->conn = cls_conn;
4079
4080 /* Setup ep, for displaying attributes in sysfs */
4081 ep = qla4xxx_get_ep_fwdb(ha, fw_ddb_entry);
4082 if (ep) {
4083 ep->conn = cls_conn;
4084 cls_conn->ep = ep;
4085 } else {
4086 DEBUG2(ql4_printk(KERN_ERR, ha,
4087 "Unable to get ep\n"));
4088 }
4089
4090 /* Update sess/conn params */
4091 qla4xxx_copy_fwddb_param(ha, fw_ddb_entry, cls_sess,
4092 cls_conn);
4093
4094 if (is_reset == RESET_ADAPTER) {
4095 iscsi_block_session(cls_sess);
4096 /* Use the relogin path to discover new devices
4097 * by short-circuting the logic of setting
4098 * timer to relogin - instead set the flags
4099 * to initiate login right away.
4100 */
4101 set_bit(DPC_RELOGIN_DEVICE, &ha->dpc_flags);
4102 set_bit(DF_RELOGIN, &ddb_entry->flags);
4103 }
4104 }
4105continue_next_nt:
4106 if (next_idx == 0)
4107 break;
4108 }
4109exit_ddb_list:
4110 qla4xxx_free_nt_list(&list_nt);
4111 if (fw_ddb_entry)
4112 dma_pool_free(ha->fw_ddb_dma_pool, fw_ddb_entry, fw_ddb_dma);
4113
4114 qla4xxx_free_ddb_index(ha);
4115}
4116
4117
3180/** 4118/**
3181 * qla4xxx_probe_adapter - callback function to probe HBA 4119 * qla4xxx_probe_adapter - callback function to probe HBA
3182 * @pdev: pointer to pci_dev structure 4120 * @pdev: pointer to pci_dev structure
@@ -3298,7 +4236,7 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
3298 * firmware 4236 * firmware
3299 * NOTE: interrupts enabled upon successful completion 4237 * NOTE: interrupts enabled upon successful completion
3300 */ 4238 */
3301 status = qla4xxx_initialize_adapter(ha); 4239 status = qla4xxx_initialize_adapter(ha, INIT_ADAPTER);
3302 while ((!test_bit(AF_ONLINE, &ha->flags)) && 4240 while ((!test_bit(AF_ONLINE, &ha->flags)) &&
3303 init_retry_count++ < MAX_INIT_RETRIES) { 4241 init_retry_count++ < MAX_INIT_RETRIES) {
3304 4242
@@ -3319,7 +4257,7 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
3319 if (ha->isp_ops->reset_chip(ha) == QLA_ERROR) 4257 if (ha->isp_ops->reset_chip(ha) == QLA_ERROR)
3320 continue; 4258 continue;
3321 4259
3322 status = qla4xxx_initialize_adapter(ha); 4260 status = qla4xxx_initialize_adapter(ha, INIT_ADAPTER);
3323 } 4261 }
3324 4262
3325 if (!test_bit(AF_ONLINE, &ha->flags)) { 4263 if (!test_bit(AF_ONLINE, &ha->flags)) {
@@ -3386,12 +4324,16 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
3386 ha->host_no, ha->firmware_version[0], ha->firmware_version[1], 4324 ha->host_no, ha->firmware_version[0], ha->firmware_version[1],
3387 ha->patch_number, ha->build_number); 4325 ha->patch_number, ha->build_number);
3388 4326
3389 qla4xxx_create_chap_list(ha);
3390
3391 if (qla4xxx_setup_boot_info(ha)) 4327 if (qla4xxx_setup_boot_info(ha))
3392 ql4_printk(KERN_ERR, ha, "%s:ISCSI boot info setup failed\n", 4328 ql4_printk(KERN_ERR, ha, "%s:ISCSI boot info setup failed\n",
3393 __func__); 4329 __func__);
3394 4330
4331 /* Perform the build ddb list and login to each */
4332 qla4xxx_build_ddb_list(ha, INIT_ADAPTER);
4333 iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb);
4334
4335 qla4xxx_create_chap_list(ha);
4336
3395 qla4xxx_create_ifaces(ha); 4337 qla4xxx_create_ifaces(ha);
3396 return 0; 4338 return 0;
3397 4339
@@ -3449,6 +4391,38 @@ static void qla4xxx_prevent_other_port_reinit(struct scsi_qla_host *ha)
3449 } 4391 }
3450} 4392}
3451 4393
4394static void qla4xxx_destroy_fw_ddb_session(struct scsi_qla_host *ha)
4395{
4396 struct ddb_entry *ddb_entry;
4397 int options;
4398 int idx;
4399
4400 for (idx = 0; idx < MAX_DDB_ENTRIES; idx++) {
4401
4402 ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx);
4403 if ((ddb_entry != NULL) &&
4404 (ddb_entry->ddb_type == FLASH_DDB)) {
4405
4406 options = LOGOUT_OPTION_CLOSE_SESSION;
4407 if (qla4xxx_session_logout_ddb(ha, ddb_entry, options)
4408 == QLA_ERROR)
4409 ql4_printk(KERN_ERR, ha, "%s: Logout failed\n",
4410 __func__);
4411
4412 qla4xxx_clear_ddb_entry(ha, ddb_entry->fw_ddb_index);
4413 /*
4414 * we have decremented the reference count of the driver
4415 * when we setup the session to have the driver unload
4416 * to be seamless without actually destroying the
4417 * session
4418 **/
4419 try_module_get(qla4xxx_iscsi_transport.owner);
4420 iscsi_destroy_endpoint(ddb_entry->conn->ep);
4421 qla4xxx_free_ddb(ha, ddb_entry);
4422 iscsi_session_teardown(ddb_entry->sess);
4423 }
4424 }
4425}
3452/** 4426/**
3453 * qla4xxx_remove_adapter - calback function to remove adapter. 4427 * qla4xxx_remove_adapter - calback function to remove adapter.
3454 * @pci_dev: PCI device pointer 4428 * @pci_dev: PCI device pointer
@@ -3465,9 +4439,11 @@ static void __devexit qla4xxx_remove_adapter(struct pci_dev *pdev)
3465 /* destroy iface from sysfs */ 4439 /* destroy iface from sysfs */
3466 qla4xxx_destroy_ifaces(ha); 4440 qla4xxx_destroy_ifaces(ha);
3467 4441
3468 if (ha->boot_kset) 4442 if ((!ql4xdisablesysfsboot) && ha->boot_kset)
3469 iscsi_boot_destroy_kset(ha->boot_kset); 4443 iscsi_boot_destroy_kset(ha->boot_kset);
3470 4444
4445 qla4xxx_destroy_fw_ddb_session(ha);
4446
3471 scsi_remove_host(ha->host); 4447 scsi_remove_host(ha->host);
3472 4448
3473 qla4xxx_free_adapter(ha); 4449 qla4xxx_free_adapter(ha);
@@ -4115,7 +5091,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
4115 5091
4116 qla4_8xxx_idc_unlock(ha); 5092 qla4_8xxx_idc_unlock(ha);
4117 clear_bit(AF_FW_RECOVERY, &ha->flags); 5093 clear_bit(AF_FW_RECOVERY, &ha->flags);
4118 rval = qla4xxx_initialize_adapter(ha); 5094 rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER);
4119 qla4_8xxx_idc_lock(ha); 5095 qla4_8xxx_idc_lock(ha);
4120 5096
4121 if (rval != QLA_SUCCESS) { 5097 if (rval != QLA_SUCCESS) {
@@ -4151,7 +5127,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
4151 if ((qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE) == 5127 if ((qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE) ==
4152 QLA82XX_DEV_READY)) { 5128 QLA82XX_DEV_READY)) {
4153 clear_bit(AF_FW_RECOVERY, &ha->flags); 5129 clear_bit(AF_FW_RECOVERY, &ha->flags);
4154 rval = qla4xxx_initialize_adapter(ha); 5130 rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER);
4155 if (rval == QLA_SUCCESS) { 5131 if (rval == QLA_SUCCESS) {
4156 ret = qla4xxx_request_irqs(ha); 5132 ret = qla4xxx_request_irqs(ha);
4157 if (ret) { 5133 if (ret) {
diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h
index c15347d3f532..5254e57968f5 100644
--- a/drivers/scsi/qla4xxx/ql4_version.h
+++ b/drivers/scsi/qla4xxx/ql4_version.h
@@ -5,4 +5,4 @@
5 * See LICENSE.qla4xxx for copyright and licensing details. 5 * See LICENSE.qla4xxx for copyright and licensing details.
6 */ 6 */
7 7
8#define QLA4XXX_DRIVER_VERSION "5.02.00-k8" 8#define QLA4XXX_DRIVER_VERSION "5.02.00-k9"
diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index 84c934c0a545..520e8286db28 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -517,10 +517,14 @@ static void ssb_pcicore_pcie_setup_workarounds(struct ssb_pcicore *pc)
517 517
518static void __devinit ssb_pcicore_init_clientmode(struct ssb_pcicore *pc) 518static void __devinit ssb_pcicore_init_clientmode(struct ssb_pcicore *pc)
519{ 519{
520 ssb_pcicore_fix_sprom_core_index(pc); 520 struct ssb_device *pdev = pc->dev;
521 struct ssb_bus *bus = pdev->bus;
522
523 if (bus->bustype == SSB_BUSTYPE_PCI)
524 ssb_pcicore_fix_sprom_core_index(pc);
521 525
522 /* Disable PCI interrupts. */ 526 /* Disable PCI interrupts. */
523 ssb_write32(pc->dev, SSB_INTVEC, 0); 527 ssb_write32(pdev, SSB_INTVEC, 0);
524 528
525 /* Additional PCIe always once-executed workarounds */ 529 /* Additional PCIe always once-executed workarounds */
526 if (pc->dev->id.coreid == SSB_DEV_PCIE) { 530 if (pc->dev->id.coreid == SSB_DEV_PCIE) {
diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
index fb2e89c3056c..5385da2e9cdb 100644
--- a/drivers/staging/rtl8712/usb_intf.c
+++ b/drivers/staging/rtl8712/usb_intf.c
@@ -89,6 +89,7 @@ static struct usb_device_id rtl871x_usb_id_tbl[] = {
89 {USB_DEVICE(0x0DF6, 0x0045)}, 89 {USB_DEVICE(0x0DF6, 0x0045)},
90 {USB_DEVICE(0x0DF6, 0x0059)}, /* 11n mode disable */ 90 {USB_DEVICE(0x0DF6, 0x0059)}, /* 11n mode disable */
91 {USB_DEVICE(0x0DF6, 0x004B)}, 91 {USB_DEVICE(0x0DF6, 0x004B)},
92 {USB_DEVICE(0x0DF6, 0x005D)},
92 {USB_DEVICE(0x0DF6, 0x0063)}, 93 {USB_DEVICE(0x0DF6, 0x0063)},
93 /* Sweex */ 94 /* Sweex */
94 {USB_DEVICE(0x177F, 0x0154)}, 95 {USB_DEVICE(0x177F, 0x0154)},
diff --git a/drivers/staging/tidspbridge/core/dsp-clock.c b/drivers/staging/tidspbridge/core/dsp-clock.c
index 3d1279c424a8..7eb56178fb64 100644
--- a/drivers/staging/tidspbridge/core/dsp-clock.c
+++ b/drivers/staging/tidspbridge/core/dsp-clock.c
@@ -54,6 +54,7 @@
54 54
55/* Bridge GPT id (1 - 4), DM Timer id (5 - 8) */ 55/* Bridge GPT id (1 - 4), DM Timer id (5 - 8) */
56#define DMT_ID(id) ((id) + 4) 56#define DMT_ID(id) ((id) + 4)
57#define DM_TIMER_CLOCKS 4
57 58
58/* Bridge MCBSP id (6 - 10), OMAP Mcbsp id (0 - 4) */ 59/* Bridge MCBSP id (6 - 10), OMAP Mcbsp id (0 - 4) */
59#define MCBSP_ID(id) ((id) - 6) 60#define MCBSP_ID(id) ((id) - 6)
@@ -114,8 +115,13 @@ static s8 get_clk_type(u8 id)
114 */ 115 */
115void dsp_clk_exit(void) 116void dsp_clk_exit(void)
116{ 117{
118 int i;
119
117 dsp_clock_disable_all(dsp_clocks); 120 dsp_clock_disable_all(dsp_clocks);
118 121
122 for (i = 0; i < DM_TIMER_CLOCKS; i++)
123 omap_dm_timer_free(timer[i]);
124
119 clk_put(iva2_clk); 125 clk_put(iva2_clk);
120 clk_put(ssi.sst_fck); 126 clk_put(ssi.sst_fck);
121 clk_put(ssi.ssr_fck); 127 clk_put(ssi.ssr_fck);
@@ -130,9 +136,13 @@ void dsp_clk_exit(void)
130void dsp_clk_init(void) 136void dsp_clk_init(void)
131{ 137{
132 static struct platform_device dspbridge_device; 138 static struct platform_device dspbridge_device;
139 int i, id;
133 140
134 dspbridge_device.dev.bus = &platform_bus_type; 141 dspbridge_device.dev.bus = &platform_bus_type;
135 142
143 for (i = 0, id = 5; i < DM_TIMER_CLOCKS; i++, id++)
144 timer[i] = omap_dm_timer_request_specific(id);
145
136 iva2_clk = clk_get(&dspbridge_device.dev, "iva2_ck"); 146 iva2_clk = clk_get(&dspbridge_device.dev, "iva2_ck");
137 if (IS_ERR(iva2_clk)) 147 if (IS_ERR(iva2_clk))
138 dev_err(bridge, "failed to get iva2 clock %p\n", iva2_clk); 148 dev_err(bridge, "failed to get iva2 clock %p\n", iva2_clk);
@@ -204,8 +214,7 @@ int dsp_clk_enable(enum dsp_clk_id clk_id)
204 clk_enable(iva2_clk); 214 clk_enable(iva2_clk);
205 break; 215 break;
206 case GPT_CLK: 216 case GPT_CLK:
207 timer[clk_id - 1] = 217 status = omap_dm_timer_start(timer[clk_id - 1]);
208 omap_dm_timer_request_specific(DMT_ID(clk_id));
209 break; 218 break;
210#ifdef CONFIG_OMAP_MCBSP 219#ifdef CONFIG_OMAP_MCBSP
211 case MCBSP_CLK: 220 case MCBSP_CLK:
@@ -281,7 +290,7 @@ int dsp_clk_disable(enum dsp_clk_id clk_id)
281 clk_disable(iva2_clk); 290 clk_disable(iva2_clk);
282 break; 291 break;
283 case GPT_CLK: 292 case GPT_CLK:
284 omap_dm_timer_free(timer[clk_id - 1]); 293 status = omap_dm_timer_stop(timer[clk_id - 1]);
285 break; 294 break;
286#ifdef CONFIG_OMAP_MCBSP 295#ifdef CONFIG_OMAP_MCBSP
287 case MCBSP_CLK: 296 case MCBSP_CLK:
diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c
index c43c7e3421c8..76cfc6edecd9 100644
--- a/drivers/staging/tidspbridge/rmgr/drv_interface.c
+++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c
@@ -24,11 +24,7 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/platform_device.h> 25#include <linux/platform_device.h>
26#include <linux/pm.h> 26#include <linux/pm.h>
27
28#ifdef MODULE
29#include <linux/module.h> 27#include <linux/module.h>
30#endif
31
32#include <linux/device.h> 28#include <linux/device.h>
33#include <linux/init.h> 29#include <linux/init.h>
34#include <linux/moduleparam.h> 30#include <linux/moduleparam.h>
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index e8c564a53346..a8078d0638fa 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1458,6 +1458,16 @@ static const struct usb_device_id acm_ids[] = {
1458 }, 1458 },
1459 { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ 1459 { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */
1460 }, 1460 },
1461 /* Motorola H24 HSPA module: */
1462 { USB_DEVICE(0x22b8, 0x2d91) }, /* modem */
1463 { USB_DEVICE(0x22b8, 0x2d92) }, /* modem + diagnostics */
1464 { USB_DEVICE(0x22b8, 0x2d93) }, /* modem + AT port */
1465 { USB_DEVICE(0x22b8, 0x2d95) }, /* modem + AT port + diagnostics */
1466 { USB_DEVICE(0x22b8, 0x2d96) }, /* modem + NMEA */
1467 { USB_DEVICE(0x22b8, 0x2d97) }, /* modem + diagnostics + NMEA */
1468 { USB_DEVICE(0x22b8, 0x2d99) }, /* modem + AT port + NMEA */
1469 { USB_DEVICE(0x22b8, 0x2d9a) }, /* modem + AT port + diagnostics + NMEA */
1470
1461 { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */ 1471 { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */
1462 .driver_info = NO_UNION_NORMAL, /* union descriptor misplaced on 1472 .driver_info = NO_UNION_NORMAL, /* union descriptor misplaced on
1463 data interface instead of 1473 data interface instead of
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 717ebc9ff941..600d82348511 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -264,7 +264,7 @@ static int __devinit dwc3_core_init(struct dwc3 *dwc)
264 ret = -ENODEV; 264 ret = -ENODEV;
265 goto err0; 265 goto err0;
266 } 266 }
267 dwc->revision = reg & DWC3_GSNPSREV_MASK; 267 dwc->revision = reg;
268 268
269 dwc3_core_soft_reset(dwc); 269 dwc3_core_soft_reset(dwc);
270 270
diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c
index 596a0b464e61..4dff83d2f265 100644
--- a/drivers/usb/gadget/epautoconf.c
+++ b/drivers/usb/gadget/epautoconf.c
@@ -130,9 +130,6 @@ ep_matches (
130 num_req_streams = ep_comp->bmAttributes & 0x1f; 130 num_req_streams = ep_comp->bmAttributes & 0x1f;
131 if (num_req_streams > ep->max_streams) 131 if (num_req_streams > ep->max_streams)
132 return 0; 132 return 0;
133 /* Update the ep_comp descriptor if needed */
134 if (num_req_streams != ep->max_streams)
135 ep_comp->bmAttributes = ep->max_streams;
136 } 133 }
137 134
138 } 135 }
diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index c39d58860fa0..1a6f415c0d02 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -2975,6 +2975,7 @@ static void fsg_unbind(struct usb_configuration *c, struct usb_function *f)
2975 fsg_common_put(common); 2975 fsg_common_put(common);
2976 usb_free_descriptors(fsg->function.descriptors); 2976 usb_free_descriptors(fsg->function.descriptors);
2977 usb_free_descriptors(fsg->function.hs_descriptors); 2977 usb_free_descriptors(fsg->function.hs_descriptors);
2978 usb_free_descriptors(fsg->function.ss_descriptors);
2978 kfree(fsg); 2979 kfree(fsg);
2979} 2980}
2980 2981
diff --git a/drivers/usb/host/isp1760-if.c b/drivers/usb/host/isp1760-if.c
index a7dc1e1d45f2..2ac4ac2e4ef9 100644
--- a/drivers/usb/host/isp1760-if.c
+++ b/drivers/usb/host/isp1760-if.c
@@ -18,7 +18,7 @@
18 18
19#include "isp1760-hcd.h" 19#include "isp1760-hcd.h"
20 20
21#ifdef CONFIG_OF 21#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/of.h> 23#include <linux/of.h>
24#include <linux/of_platform.h> 24#include <linux/of_platform.h>
@@ -31,7 +31,7 @@
31#include <linux/pci.h> 31#include <linux/pci.h>
32#endif 32#endif
33 33
34#ifdef CONFIG_OF 34#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
35struct isp1760 { 35struct isp1760 {
36 struct usb_hcd *hcd; 36 struct usb_hcd *hcd;
37 int rst_gpio; 37 int rst_gpio;
@@ -437,7 +437,7 @@ static int __init isp1760_init(void)
437 ret = platform_driver_register(&isp1760_plat_driver); 437 ret = platform_driver_register(&isp1760_plat_driver);
438 if (!ret) 438 if (!ret)
439 any_ret = 0; 439 any_ret = 0;
440#ifdef CONFIG_OF 440#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
441 ret = platform_driver_register(&isp1760_of_driver); 441 ret = platform_driver_register(&isp1760_of_driver);
442 if (!ret) 442 if (!ret)
443 any_ret = 0; 443 any_ret = 0;
@@ -457,7 +457,7 @@ module_init(isp1760_init);
457static void __exit isp1760_exit(void) 457static void __exit isp1760_exit(void)
458{ 458{
459 platform_driver_unregister(&isp1760_plat_driver); 459 platform_driver_unregister(&isp1760_plat_driver);
460#ifdef CONFIG_OF 460#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
461 platform_driver_unregister(&isp1760_of_driver); 461 platform_driver_unregister(&isp1760_of_driver);
462#endif 462#endif
463#ifdef CONFIG_PCI 463#ifdef CONFIG_PCI
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 60ddba8066ea..79cb0af779fa 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -774,6 +774,10 @@ static void musb_ep_program(struct musb *musb, u8 epnum,
774 if (musb->double_buffer_not_ok) 774 if (musb->double_buffer_not_ok)
775 musb_writew(epio, MUSB_TXMAXP, 775 musb_writew(epio, MUSB_TXMAXP,
776 hw_ep->max_packet_sz_tx); 776 hw_ep->max_packet_sz_tx);
777 else if (can_bulk_split(musb, qh->type))
778 musb_writew(epio, MUSB_TXMAXP, packet_sz
779 | ((hw_ep->max_packet_sz_tx /
780 packet_sz) - 1) << 11);
777 else 781 else
778 musb_writew(epio, MUSB_TXMAXP, 782 musb_writew(epio, MUSB_TXMAXP,
779 qh->maxpacket | 783 qh->maxpacket |
diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c
index 053f86d70009..ad96a3896729 100644
--- a/drivers/usb/renesas_usbhs/mod.c
+++ b/drivers/usb/renesas_usbhs/mod.c
@@ -349,7 +349,7 @@ void usbhs_irq_callback_update(struct usbhs_priv *priv, struct usbhs_mod *mod)
349 if (mod->irq_attch) 349 if (mod->irq_attch)
350 intenb1 |= ATTCHE; 350 intenb1 |= ATTCHE;
351 351
352 if (mod->irq_attch) 352 if (mod->irq_dtch)
353 intenb1 |= DTCHE; 353 intenb1 |= DTCHE;
354 354
355 if (mod->irq_sign) 355 if (mod->irq_sign)
diff --git a/drivers/usb/renesas_usbhs/mod_host.c b/drivers/usb/renesas_usbhs/mod_host.c
index bade761a1e52..7955de589951 100644
--- a/drivers/usb/renesas_usbhs/mod_host.c
+++ b/drivers/usb/renesas_usbhs/mod_host.c
@@ -1267,6 +1267,7 @@ int usbhs_mod_host_probe(struct usbhs_priv *priv)
1267 dev_err(dev, "Failed to create hcd\n"); 1267 dev_err(dev, "Failed to create hcd\n");
1268 return -ENOMEM; 1268 return -ENOMEM;
1269 } 1269 }
1270 hcd->has_tt = 1; /* for low/full speed */
1270 1271
1271 pipe_info = kzalloc(sizeof(*pipe_info) * pipe_size, GFP_KERNEL); 1272 pipe_info = kzalloc(sizeof(*pipe_info) * pipe_size, GFP_KERNEL);
1272 if (!pipe_info) { 1273 if (!pipe_info) {
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index e3426602dc82..6dd64534fad0 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -663,7 +663,12 @@ static const struct usb_device_id option_ids[] = {
663 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x01) }, 663 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x01) },
664 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x02) }, 664 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x02) },
665 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x03) }, 665 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x03) },
666 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x08) }, 666 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x10) },
667 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x12) },
668 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x13) },
669 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x02, 0x01) }, /* E398 3G Modem */
670 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x02, 0x02) }, /* E398 3G PC UI Interface */
671 { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x02, 0x03) }, /* E398 3G Application Interface */
667 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, 672 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) },
668 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) }, 673 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) },
669 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V740) }, 674 { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V740) },
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c
index 03f449a430d2..5b89f7d6cd0f 100644
--- a/drivers/watchdog/coh901327_wdt.c
+++ b/drivers/watchdog/coh901327_wdt.c
@@ -76,8 +76,6 @@ static int irq;
76static void __iomem *virtbase; 76static void __iomem *virtbase;
77static unsigned long coh901327_users; 77static unsigned long coh901327_users;
78static unsigned long boot_status; 78static unsigned long boot_status;
79static u16 wdogenablestore;
80static u16 irqmaskstore;
81static struct device *parent; 79static struct device *parent;
82 80
83/* 81/*
@@ -461,6 +459,10 @@ out:
461} 459}
462 460
463#ifdef CONFIG_PM 461#ifdef CONFIG_PM
462
463static u16 wdogenablestore;
464static u16 irqmaskstore;
465
464static int coh901327_suspend(struct platform_device *pdev, pm_message_t state) 466static int coh901327_suspend(struct platform_device *pdev, pm_message_t state)
465{ 467{
466 irqmaskstore = readw(virtbase + U300_WDOG_IMR) & 0x0001U; 468 irqmaskstore = readw(virtbase + U300_WDOG_IMR) & 0x0001U;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 3774c9b8dac9..8464ea1c36a1 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -231,6 +231,7 @@ static int __devinit cru_detect(unsigned long map_entry,
231 231
232 cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE; 232 cmn_regs.u1.reax = CRU_BIOS_SIGNATURE_VALUE;
233 233
234 set_memory_x((unsigned long)bios32_entrypoint, (2 * PAGE_SIZE));
234 asminline_call(&cmn_regs, bios32_entrypoint); 235 asminline_call(&cmn_regs, bios32_entrypoint);
235 236
236 if (cmn_regs.u1.ral != 0) { 237 if (cmn_regs.u1.ral != 0) {
@@ -248,8 +249,10 @@ static int __devinit cru_detect(unsigned long map_entry,
248 if ((physical_bios_base + physical_bios_offset)) { 249 if ((physical_bios_base + physical_bios_offset)) {
249 cru_rom_addr = 250 cru_rom_addr =
250 ioremap(cru_physical_address, cru_length); 251 ioremap(cru_physical_address, cru_length);
251 if (cru_rom_addr) 252 if (cru_rom_addr) {
253 set_memory_x((unsigned long)cru_rom_addr, cru_length);
252 retval = 0; 254 retval = 0;
255 }
253 } 256 }
254 257
255 printk(KERN_DEBUG "hpwdt: CRU Base Address: 0x%lx\n", 258 printk(KERN_DEBUG "hpwdt: CRU Base Address: 0x%lx\n",
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index ba6ad662635a..99796c5d913d 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -384,10 +384,10 @@ MODULE_PARM_DESC(nowayout,
384 "Watchdog cannot be stopped once started (default=" 384 "Watchdog cannot be stopped once started (default="
385 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); 385 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
386 386
387static int turn_SMI_watchdog_clear_off = 0; 387static int turn_SMI_watchdog_clear_off = 1;
388module_param(turn_SMI_watchdog_clear_off, int, 0); 388module_param(turn_SMI_watchdog_clear_off, int, 0);
389MODULE_PARM_DESC(turn_SMI_watchdog_clear_off, 389MODULE_PARM_DESC(turn_SMI_watchdog_clear_off,
390 "Turn off SMI clearing watchdog (default=0)"); 390 "Turn off SMI clearing watchdog (depends on TCO-version)(default=1)");
391 391
392/* 392/*
393 * Some TCO specific functions 393 * Some TCO specific functions
@@ -813,7 +813,7 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
813 ret = -EIO; 813 ret = -EIO;
814 goto out_unmap; 814 goto out_unmap;
815 } 815 }
816 if (turn_SMI_watchdog_clear_off) { 816 if (turn_SMI_watchdog_clear_off >= iTCO_wdt_private.iTCO_version) {
817 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */ 817 /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
818 val32 = inl(SMI_EN); 818 val32 = inl(SMI_EN);
819 val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ 819 val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index cc2cfbe33b30..bfaf9bb1ee0d 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -351,7 +351,7 @@ static int __devexit sp805_wdt_remove(struct amba_device *adev)
351 return 0; 351 return 0;
352} 352}
353 353
354static struct amba_id sp805_wdt_ids[] __initdata = { 354static struct amba_id sp805_wdt_ids[] = {
355 { 355 {
356 .id = 0x00141805, 356 .id = 0x00141805,
357 .mask = 0x00ffffff, 357 .mask = 0x00ffffff,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8e964b91c447..284798aaf8b1 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -166,7 +166,7 @@ retry:
166 /* 166 /*
167 * Get IO TLB memory from any location. 167 * Get IO TLB memory from any location.
168 */ 168 */
169 xen_io_tlb_start = alloc_bootmem(bytes); 169 xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
170 if (!xen_io_tlb_start) { 170 if (!xen_io_tlb_start) {
171 m = "Cannot allocate Xen-SWIOTLB buffer!\n"; 171 m = "Cannot allocate Xen-SWIOTLB buffer!\n";
172 goto error; 172 goto error;
@@ -179,7 +179,7 @@ retry:
179 bytes, 179 bytes,
180 xen_io_tlb_nslabs); 180 xen_io_tlb_nslabs);
181 if (rc) { 181 if (rc) {
182 free_bootmem(__pa(xen_io_tlb_start), bytes); 182 free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
183 m = "Failed to get contiguous memory for DMA from Xen!\n"\ 183 m = "Failed to get contiguous memory for DMA from Xen!\n"\
184 "You either: don't have the permissions, do not have"\ 184 "You either: don't have the permissions, do not have"\
185 " enough free memory under 4GB, or the hypervisor memory"\ 185 " enough free memory under 4GB, or the hypervisor memory"\
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index b3b8f2f3ad10..ede860f921df 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -621,15 +621,6 @@ static struct xenbus_watch *find_watch(const char *token)
621 return NULL; 621 return NULL;
622} 622}
623 623
624static void xs_reset_watches(void)
625{
626 int err;
627
628 err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
629 if (err && err != -EEXIST)
630 printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
631}
632
633/* Register callback to watch this node. */ 624/* Register callback to watch this node. */
634int register_xenbus_watch(struct xenbus_watch *watch) 625int register_xenbus_watch(struct xenbus_watch *watch)
635{ 626{
@@ -906,9 +897,5 @@ int xs_init(void)
906 if (IS_ERR(task)) 897 if (IS_ERR(task))
907 return PTR_ERR(task); 898 return PTR_ERR(task);
908 899
909 /* shutdown watches for kexec boot */
910 if (xen_hvm_domain())
911 xs_reset_watches();
912
913 return 0; 900 return 0;
914} 901}
diff --git a/firmware/README.AddingFirmware b/firmware/README.AddingFirmware
index e24cd8986d8b..ea78c3a17eec 100644
--- a/firmware/README.AddingFirmware
+++ b/firmware/README.AddingFirmware
@@ -12,7 +12,7 @@ here.
12This directory is _NOT_ for adding arbitrary new firmware images. The 12This directory is _NOT_ for adding arbitrary new firmware images. The
13place to add those is the separate linux-firmware repository: 13place to add those is the separate linux-firmware repository:
14 14
15 git://git.kernel.org/pub/scm/linux/kernel/git/dwmw2/linux-firmware.git 15 git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
16 16
17That repository contains all these firmware images which have been 17That repository contains all these firmware images which have been
18extracted from older drivers, as well various new firmware images which 18extracted from older drivers, as well various new firmware images which
@@ -22,6 +22,7 @@ been permitted to redistribute under separate cover.
22To submit firmware to that repository, please send either a git binary 22To submit firmware to that repository, please send either a git binary
23diff or preferably a git pull request to: 23diff or preferably a git pull request to:
24 David Woodhouse <dwmw2@infradead.org> 24 David Woodhouse <dwmw2@infradead.org>
25 Ben Hutchings <ben@decadent.org.uk>
25 26
26Your commit should include an update to the WHENCE file clearly 27Your commit should include an update to the WHENCE file clearly
27identifying the licence under which the firmware is available, and 28identifying the licence under which the firmware is available, and
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef1..0b394580d860 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -64,6 +64,8 @@ struct btrfs_worker_thread {
64 int idle; 64 int idle;
65}; 65};
66 66
67static int __btrfs_start_workers(struct btrfs_workers *workers);
68
67/* 69/*
68 * btrfs_start_workers uses kthread_run, which can block waiting for memory 70 * btrfs_start_workers uses kthread_run, which can block waiting for memory
69 * for a very long time. It will actually throttle on page writeback, 71 * for a very long time. It will actually throttle on page writeback,
@@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work)
88{ 90{
89 struct worker_start *start; 91 struct worker_start *start;
90 start = container_of(work, struct worker_start, work); 92 start = container_of(work, struct worker_start, work);
91 btrfs_start_workers(start->queue, 1); 93 __btrfs_start_workers(start->queue);
92 kfree(start); 94 kfree(start);
93} 95}
94 96
95static int start_new_worker(struct btrfs_workers *queue)
96{
97 struct worker_start *start;
98 int ret;
99
100 start = kzalloc(sizeof(*start), GFP_NOFS);
101 if (!start)
102 return -ENOMEM;
103
104 start->work.func = start_new_worker_func;
105 start->queue = queue;
106 ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
107 if (ret)
108 kfree(start);
109 return ret;
110}
111
112/* 97/*
113 * helper function to move a thread onto the idle list after it 98 * helper function to move a thread onto the idle list after it
114 * has finished some requests. 99 * has finished some requests.
@@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
153static void check_pending_worker_creates(struct btrfs_worker_thread *worker) 138static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
154{ 139{
155 struct btrfs_workers *workers = worker->workers; 140 struct btrfs_workers *workers = worker->workers;
141 struct worker_start *start;
156 unsigned long flags; 142 unsigned long flags;
157 143
158 rmb(); 144 rmb();
159 if (!workers->atomic_start_pending) 145 if (!workers->atomic_start_pending)
160 return; 146 return;
161 147
148 start = kzalloc(sizeof(*start), GFP_NOFS);
149 if (!start)
150 return;
151
152 start->work.func = start_new_worker_func;
153 start->queue = workers;
154
162 spin_lock_irqsave(&workers->lock, flags); 155 spin_lock_irqsave(&workers->lock, flags);
163 if (!workers->atomic_start_pending) 156 if (!workers->atomic_start_pending)
164 goto out; 157 goto out;
@@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
170 163
171 workers->num_workers_starting += 1; 164 workers->num_workers_starting += 1;
172 spin_unlock_irqrestore(&workers->lock, flags); 165 spin_unlock_irqrestore(&workers->lock, flags);
173 start_new_worker(workers); 166 btrfs_queue_worker(workers->atomic_worker_start, &start->work);
174 return; 167 return;
175 168
176out: 169out:
170 kfree(start);
177 spin_unlock_irqrestore(&workers->lock, flags); 171 spin_unlock_irqrestore(&workers->lock, flags);
178} 172}
179 173
@@ -331,7 +325,7 @@ again:
331 run_ordered_completions(worker->workers, work); 325 run_ordered_completions(worker->workers, work);
332 326
333 check_pending_worker_creates(worker); 327 check_pending_worker_creates(worker);
334 328 cond_resched();
335 } 329 }
336 330
337 spin_lock_irq(&worker->lock); 331 spin_lock_irq(&worker->lock);
@@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
462 * starts new worker threads. This does not enforce the max worker 456 * starts new worker threads. This does not enforce the max worker
463 * count in case you need to temporarily go past it. 457 * count in case you need to temporarily go past it.
464 */ 458 */
465static int __btrfs_start_workers(struct btrfs_workers *workers, 459static int __btrfs_start_workers(struct btrfs_workers *workers)
466 int num_workers)
467{ 460{
468 struct btrfs_worker_thread *worker; 461 struct btrfs_worker_thread *worker;
469 int ret = 0; 462 int ret = 0;
470 int i;
471 463
472 for (i = 0; i < num_workers; i++) { 464 worker = kzalloc(sizeof(*worker), GFP_NOFS);
473 worker = kzalloc(sizeof(*worker), GFP_NOFS); 465 if (!worker) {
474 if (!worker) { 466 ret = -ENOMEM;
475 ret = -ENOMEM; 467 goto fail;
476 goto fail; 468 }
477 }
478 469
479 INIT_LIST_HEAD(&worker->pending); 470 INIT_LIST_HEAD(&worker->pending);
480 INIT_LIST_HEAD(&worker->prio_pending); 471 INIT_LIST_HEAD(&worker->prio_pending);
481 INIT_LIST_HEAD(&worker->worker_list); 472 INIT_LIST_HEAD(&worker->worker_list);
482 spin_lock_init(&worker->lock); 473 spin_lock_init(&worker->lock);
483 474
484 atomic_set(&worker->num_pending, 0); 475 atomic_set(&worker->num_pending, 0);
485 atomic_set(&worker->refs, 1); 476 atomic_set(&worker->refs, 1);
486 worker->workers = workers; 477 worker->workers = workers;
487 worker->task = kthread_run(worker_loop, worker, 478 worker->task = kthread_run(worker_loop, worker,
488 "btrfs-%s-%d", workers->name, 479 "btrfs-%s-%d", workers->name,
489 workers->num_workers + i); 480 workers->num_workers + 1);
490 if (IS_ERR(worker->task)) { 481 if (IS_ERR(worker->task)) {
491 ret = PTR_ERR(worker->task); 482 ret = PTR_ERR(worker->task);
492 kfree(worker); 483 kfree(worker);
493 goto fail; 484 goto fail;
494 }
495 spin_lock_irq(&workers->lock);
496 list_add_tail(&worker->worker_list, &workers->idle_list);
497 worker->idle = 1;
498 workers->num_workers++;
499 workers->num_workers_starting--;
500 WARN_ON(workers->num_workers_starting < 0);
501 spin_unlock_irq(&workers->lock);
502 } 485 }
486 spin_lock_irq(&workers->lock);
487 list_add_tail(&worker->worker_list, &workers->idle_list);
488 worker->idle = 1;
489 workers->num_workers++;
490 workers->num_workers_starting--;
491 WARN_ON(workers->num_workers_starting < 0);
492 spin_unlock_irq(&workers->lock);
493
503 return 0; 494 return 0;
504fail: 495fail:
505 btrfs_stop_workers(workers); 496 spin_lock_irq(&workers->lock);
497 workers->num_workers_starting--;
498 spin_unlock_irq(&workers->lock);
506 return ret; 499 return ret;
507} 500}
508 501
509int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 502int btrfs_start_workers(struct btrfs_workers *workers)
510{ 503{
511 spin_lock_irq(&workers->lock); 504 spin_lock_irq(&workers->lock);
512 workers->num_workers_starting += num_workers; 505 workers->num_workers_starting++;
513 spin_unlock_irq(&workers->lock); 506 spin_unlock_irq(&workers->lock);
514 return __btrfs_start_workers(workers, num_workers); 507 return __btrfs_start_workers(workers);
515} 508}
516 509
517/* 510/*
@@ -568,9 +561,10 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
568 struct btrfs_worker_thread *worker; 561 struct btrfs_worker_thread *worker;
569 unsigned long flags; 562 unsigned long flags;
570 struct list_head *fallback; 563 struct list_head *fallback;
564 int ret;
571 565
572again:
573 spin_lock_irqsave(&workers->lock, flags); 566 spin_lock_irqsave(&workers->lock, flags);
567again:
574 worker = next_worker(workers); 568 worker = next_worker(workers);
575 569
576 if (!worker) { 570 if (!worker) {
@@ -584,7 +578,10 @@ again:
584 workers->num_workers_starting++; 578 workers->num_workers_starting++;
585 spin_unlock_irqrestore(&workers->lock, flags); 579 spin_unlock_irqrestore(&workers->lock, flags);
586 /* we're below the limit, start another worker */ 580 /* we're below the limit, start another worker */
587 __btrfs_start_workers(workers, 1); 581 ret = __btrfs_start_workers(workers);
582 spin_lock_irqsave(&workers->lock, flags);
583 if (ret)
584 goto fallback;
588 goto again; 585 goto again;
589 } 586 }
590 } 587 }
@@ -665,7 +662,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work)
665/* 662/*
666 * places a struct btrfs_work into the pending queue of one of the kthreads 663 * places a struct btrfs_work into the pending queue of one of the kthreads
667 */ 664 */
668int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) 665void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
669{ 666{
670 struct btrfs_worker_thread *worker; 667 struct btrfs_worker_thread *worker;
671 unsigned long flags; 668 unsigned long flags;
@@ -673,7 +670,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
673 670
674 /* don't requeue something already on a list */ 671 /* don't requeue something already on a list */
675 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 672 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
676 goto out; 673 return;
677 674
678 worker = find_worker(workers); 675 worker = find_worker(workers);
679 if (workers->ordered) { 676 if (workers->ordered) {
@@ -712,7 +709,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
712 if (wake) 709 if (wake)
713 wake_up_process(worker->task); 710 wake_up_process(worker->task);
714 spin_unlock_irqrestore(&worker->lock, flags); 711 spin_unlock_irqrestore(&worker->lock, flags);
715
716out:
717 return 0;
718} 712}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 5077746cf85e..f34cc31fa3c9 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -109,8 +109,8 @@ struct btrfs_workers {
109 char *name; 109 char *name;
110}; 110};
111 111
112int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
113int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); 113int btrfs_start_workers(struct btrfs_workers *workers);
114int btrfs_stop_workers(struct btrfs_workers *workers); 114int btrfs_stop_workers(struct btrfs_workers *workers);
115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
116 struct btrfs_workers *async_starter); 116 struct btrfs_workers *async_starter);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 50634abef9b4..67385033323d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2692,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2692int btrfs_readpage(struct file *file, struct page *page); 2692int btrfs_readpage(struct file *file, struct page *page);
2693void btrfs_evict_inode(struct inode *inode); 2693void btrfs_evict_inode(struct inode *inode);
2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2695void btrfs_dirty_inode(struct inode *inode, int flags); 2695int btrfs_dirty_inode(struct inode *inode);
2696int btrfs_update_time(struct file *file);
2696struct inode *btrfs_alloc_inode(struct super_block *sb); 2697struct inode *btrfs_alloc_inode(struct super_block *sb);
2697void btrfs_destroy_inode(struct inode *inode); 2698void btrfs_destroy_inode(struct inode *inode);
2698int btrfs_drop_inode(struct inode *inode); 2699int btrfs_drop_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5b163572e0ca..9c1eccc2c503 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -640,8 +640,8 @@ static int btrfs_delayed_inode_reserve_metadata(
640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since 640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
641 * we're accounted for. 641 * we're accounted for.
642 */ 642 */
643 if (!trans->bytes_reserved && 643 if (!src_rsv || (!trans->bytes_reserved &&
644 src_rsv != &root->fs_info->delalloc_block_rsv) { 644 src_rsv != &root->fs_info->delalloc_block_rsv)) {
645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); 645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
646 /* 646 /*
647 * Since we're under a transaction reserve_metadata_bytes could 647 * Since we're under a transaction reserve_metadata_bytes could
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 632f8f3cc9db..f44b3928dc2d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2194,19 +2194,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2194 fs_info->endio_meta_write_workers.idle_thresh = 2; 2194 fs_info->endio_meta_write_workers.idle_thresh = 2;
2195 fs_info->readahead_workers.idle_thresh = 2; 2195 fs_info->readahead_workers.idle_thresh = 2;
2196 2196
2197 btrfs_start_workers(&fs_info->workers, 1); 2197 /*
2198 btrfs_start_workers(&fs_info->generic_worker, 1); 2198 * btrfs_start_workers can really only fail because of ENOMEM so just
2199 btrfs_start_workers(&fs_info->submit_workers, 1); 2199 * return -ENOMEM if any of these fail.
2200 btrfs_start_workers(&fs_info->delalloc_workers, 1); 2200 */
2201 btrfs_start_workers(&fs_info->fixup_workers, 1); 2201 ret = btrfs_start_workers(&fs_info->workers);
2202 btrfs_start_workers(&fs_info->endio_workers, 1); 2202 ret |= btrfs_start_workers(&fs_info->generic_worker);
2203 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 2203 ret |= btrfs_start_workers(&fs_info->submit_workers);
2204 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 2204 ret |= btrfs_start_workers(&fs_info->delalloc_workers);
2205 btrfs_start_workers(&fs_info->endio_write_workers, 1); 2205 ret |= btrfs_start_workers(&fs_info->fixup_workers);
2206 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 2206 ret |= btrfs_start_workers(&fs_info->endio_workers);
2207 btrfs_start_workers(&fs_info->delayed_workers, 1); 2207 ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2208 btrfs_start_workers(&fs_info->caching_workers, 1); 2208 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
2209 btrfs_start_workers(&fs_info->readahead_workers, 1); 2209 ret |= btrfs_start_workers(&fs_info->endio_write_workers);
2210 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
2211 ret |= btrfs_start_workers(&fs_info->delayed_workers);
2212 ret |= btrfs_start_workers(&fs_info->caching_workers);
2213 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2214 if (ret) {
2215 ret = -ENOMEM;
2216 goto fail_sb_buffer;
2217 }
2210 2218
2211 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 2219 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
2212 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2220 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2ad813674d77..f5fbe576d2ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2822,7 +2822,7 @@ out_free:
2822 btrfs_release_path(path); 2822 btrfs_release_path(path);
2823out: 2823out:
2824 spin_lock(&block_group->lock); 2824 spin_lock(&block_group->lock);
2825 if (!ret) 2825 if (!ret && dcs == BTRFS_DC_SETUP)
2826 block_group->cache_generation = trans->transid; 2826 block_group->cache_generation = trans->transid;
2827 block_group->disk_cache_state = dcs; 2827 block_group->disk_cache_state = dcs;
2828 spin_unlock(&block_group->lock); 2828 spin_unlock(&block_group->lock);
@@ -4204,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4204 struct btrfs_root *root = BTRFS_I(inode)->root; 4204 struct btrfs_root *root = BTRFS_I(inode)->root;
4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4206 u64 to_reserve = 0; 4206 u64 to_reserve = 0;
4207 u64 csum_bytes;
4207 unsigned nr_extents = 0; 4208 unsigned nr_extents = 0;
4209 int extra_reserve = 0;
4208 int flush = 1; 4210 int flush = 1;
4209 int ret; 4211 int ret;
4210 4212
4213 /* Need to be holding the i_mutex here if we aren't free space cache */
4211 if (btrfs_is_free_space_inode(root, inode)) 4214 if (btrfs_is_free_space_inode(root, inode))
4212 flush = 0; 4215 flush = 0;
4216 else
4217 WARN_ON(!mutex_is_locked(&inode->i_mutex));
4213 4218
4214 if (flush && btrfs_transaction_in_commit(root->fs_info)) 4219 if (flush && btrfs_transaction_in_commit(root->fs_info))
4215 schedule_timeout(1); 4220 schedule_timeout(1);
@@ -4220,11 +4225,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4220 BTRFS_I(inode)->outstanding_extents++; 4225 BTRFS_I(inode)->outstanding_extents++;
4221 4226
4222 if (BTRFS_I(inode)->outstanding_extents > 4227 if (BTRFS_I(inode)->outstanding_extents >
4223 BTRFS_I(inode)->reserved_extents) { 4228 BTRFS_I(inode)->reserved_extents)
4224 nr_extents = BTRFS_I(inode)->outstanding_extents - 4229 nr_extents = BTRFS_I(inode)->outstanding_extents -
4225 BTRFS_I(inode)->reserved_extents; 4230 BTRFS_I(inode)->reserved_extents;
4226 BTRFS_I(inode)->reserved_extents += nr_extents;
4227 }
4228 4231
4229 /* 4232 /*
4230 * Add an item to reserve for updating the inode when we complete the 4233 * Add an item to reserve for updating the inode when we complete the
@@ -4232,11 +4235,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4232 */ 4235 */
4233 if (!BTRFS_I(inode)->delalloc_meta_reserved) { 4236 if (!BTRFS_I(inode)->delalloc_meta_reserved) {
4234 nr_extents++; 4237 nr_extents++;
4235 BTRFS_I(inode)->delalloc_meta_reserved = 1; 4238 extra_reserve = 1;
4236 } 4239 }
4237 4240
4238 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4241 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4239 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 4242 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4243 csum_bytes = BTRFS_I(inode)->csum_bytes;
4240 spin_unlock(&BTRFS_I(inode)->lock); 4244 spin_unlock(&BTRFS_I(inode)->lock);
4241 4245
4242 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4246 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -4246,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4246 4250
4247 spin_lock(&BTRFS_I(inode)->lock); 4251 spin_lock(&BTRFS_I(inode)->lock);
4248 dropped = drop_outstanding_extent(inode); 4252 dropped = drop_outstanding_extent(inode);
4249 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4250 spin_unlock(&BTRFS_I(inode)->lock);
4251 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4252
4253 /* 4253 /*
4254 * Somebody could have come in and twiddled with the 4254 * If the inodes csum_bytes is the same as the original
4255 * reservation, so if we have to free more than we would have 4255 * csum_bytes then we know we haven't raced with any free()ers
4256 * reserved from this reservation go ahead and release those 4256 * so we can just reduce our inodes csum bytes and carry on.
4257 * bytes. 4257 * Otherwise we have to do the normal free thing to account for
4258 * the case that the free side didn't free up its reserve
4259 * because of this outstanding reservation.
4258 */ 4260 */
4259 to_free -= to_reserve; 4261 if (BTRFS_I(inode)->csum_bytes == csum_bytes)
4262 calc_csum_metadata_size(inode, num_bytes, 0);
4263 else
4264 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4265 spin_unlock(&BTRFS_I(inode)->lock);
4266 if (dropped)
4267 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4268
4260 if (to_free) 4269 if (to_free)
4261 btrfs_block_rsv_release(root, block_rsv, to_free); 4270 btrfs_block_rsv_release(root, block_rsv, to_free);
4262 return ret; 4271 return ret;
4263 } 4272 }
4264 4273
4274 spin_lock(&BTRFS_I(inode)->lock);
4275 if (extra_reserve) {
4276 BTRFS_I(inode)->delalloc_meta_reserved = 1;
4277 nr_extents--;
4278 }
4279 BTRFS_I(inode)->reserved_extents += nr_extents;
4280 spin_unlock(&BTRFS_I(inode)->lock);
4281
4265 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4282 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4266 4283
4267 return 0; 4284 return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dafdfa059bf6..97fbe939c050 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1169 (sizeof(struct page *))); 1169 (sizeof(struct page *)));
1170 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1171 nrptrs = max(nrptrs, 8);
1170 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 1172 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1171 if (!pages) 1173 if (!pages)
1172 return -ENOMEM; 1174 return -ENOMEM;
@@ -1387,7 +1389,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1387 goto out; 1389 goto out;
1388 } 1390 }
1389 1391
1390 file_update_time(file); 1392 err = btrfs_update_time(file);
1393 if (err) {
1394 mutex_unlock(&inode->i_mutex);
1395 goto out;
1396 }
1391 BTRFS_I(inode)->sequence++; 1397 BTRFS_I(inode)->sequence++;
1392 1398
1393 start_pos = round_down(pos, root->sectorsize); 1399 start_pos = round_down(pos, root->sectorsize);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c984f7d4c2a..fd1a06df5bc6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -38,6 +38,7 @@
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -2031,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2031 /* insert an orphan item to track this unlinked/truncated file */ 2032 /* insert an orphan item to track this unlinked/truncated file */
2032 if (insert >= 1) { 2033 if (insert >= 1) {
2033 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2034 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2034 BUG_ON(ret); 2035 BUG_ON(ret && ret != -EEXIST);
2035 } 2036 }
2036 2037
2037 /* insert an orphan item to track subvolume contains orphan files */ 2038 /* insert an orphan item to track subvolume contains orphan files */
@@ -2158,6 +2159,38 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2158 if (ret && ret != -ESTALE) 2159 if (ret && ret != -ESTALE)
2159 goto out; 2160 goto out;
2160 2161
2162 if (ret == -ESTALE && root == root->fs_info->tree_root) {
2163 struct btrfs_root *dead_root;
2164 struct btrfs_fs_info *fs_info = root->fs_info;
2165 int is_dead_root = 0;
2166
2167 /*
2168 * this is an orphan in the tree root. Currently these
2169 * could come from 2 sources:
2170 * a) a snapshot deletion in progress
2171 * b) a free space cache inode
2172 * We need to distinguish those two, as the snapshot
2173 * orphan must not get deleted.
2174 * find_dead_roots already ran before us, so if this
2175 * is a snapshot deletion, we should find the root
2176 * in the dead_roots list
2177 */
2178 spin_lock(&fs_info->trans_lock);
2179 list_for_each_entry(dead_root, &fs_info->dead_roots,
2180 root_list) {
2181 if (dead_root->root_key.objectid ==
2182 found_key.objectid) {
2183 is_dead_root = 1;
2184 break;
2185 }
2186 }
2187 spin_unlock(&fs_info->trans_lock);
2188 if (is_dead_root) {
2189 /* prevent this orphan from being found again */
2190 key.offset = found_key.objectid - 1;
2191 continue;
2192 }
2193 }
2161 /* 2194 /*
2162 * Inode is already gone but the orphan item is still there, 2195 * Inode is already gone but the orphan item is still there,
2163 * kill the orphan item. 2196 * kill the orphan item.
@@ -2191,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2191 continue; 2224 continue;
2192 } 2225 }
2193 nr_truncate++; 2226 nr_truncate++;
2227 /*
2228 * Need to hold the imutex for reservation purposes, not
2229 * a huge deal here but I have a WARN_ON in
2230 * btrfs_delalloc_reserve_space to catch offenders.
2231 */
2232 mutex_lock(&inode->i_mutex);
2194 ret = btrfs_truncate(inode); 2233 ret = btrfs_truncate(inode);
2234 mutex_unlock(&inode->i_mutex);
2195 } else { 2235 } else {
2196 nr_unlink++; 2236 nr_unlink++;
2197 } 2237 }
@@ -3327,7 +3367,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3327 u64 hint_byte = 0; 3367 u64 hint_byte = 0;
3328 hole_size = last_byte - cur_offset; 3368 hole_size = last_byte - cur_offset;
3329 3369
3330 trans = btrfs_start_transaction(root, 2); 3370 trans = btrfs_start_transaction(root, 3);
3331 if (IS_ERR(trans)) { 3371 if (IS_ERR(trans)) {
3332 err = PTR_ERR(trans); 3372 err = PTR_ERR(trans);
3333 break; 3373 break;
@@ -3337,6 +3377,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3337 cur_offset + hole_size, 3377 cur_offset + hole_size,
3338 &hint_byte, 1); 3378 &hint_byte, 1);
3339 if (err) { 3379 if (err) {
3380 btrfs_update_inode(trans, root, inode);
3340 btrfs_end_transaction(trans, root); 3381 btrfs_end_transaction(trans, root);
3341 break; 3382 break;
3342 } 3383 }
@@ -3346,6 +3387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3346 0, hole_size, 0, hole_size, 3387 0, hole_size, 0, hole_size,
3347 0, 0, 0); 3388 0, 0, 0);
3348 if (err) { 3389 if (err) {
3390 btrfs_update_inode(trans, root, inode);
3349 btrfs_end_transaction(trans, root); 3391 btrfs_end_transaction(trans, root);
3350 break; 3392 break;
3351 } 3393 }
@@ -3353,6 +3395,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3353 btrfs_drop_extent_cache(inode, hole_start, 3395 btrfs_drop_extent_cache(inode, hole_start,
3354 last_byte - 1, 0); 3396 last_byte - 1, 0);
3355 3397
3398 btrfs_update_inode(trans, root, inode);
3356 btrfs_end_transaction(trans, root); 3399 btrfs_end_transaction(trans, root);
3357 } 3400 }
3358 free_extent_map(em); 3401 free_extent_map(em);
@@ -3370,6 +3413,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3370 3413
3371static int btrfs_setsize(struct inode *inode, loff_t newsize) 3414static int btrfs_setsize(struct inode *inode, loff_t newsize)
3372{ 3415{
3416 struct btrfs_root *root = BTRFS_I(inode)->root;
3417 struct btrfs_trans_handle *trans;
3373 loff_t oldsize = i_size_read(inode); 3418 loff_t oldsize = i_size_read(inode);
3374 int ret; 3419 int ret;
3375 3420
@@ -3377,16 +3422,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3377 return 0; 3422 return 0;
3378 3423
3379 if (newsize > oldsize) { 3424 if (newsize > oldsize) {
3380 i_size_write(inode, newsize);
3381 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3382 truncate_pagecache(inode, oldsize, newsize); 3425 truncate_pagecache(inode, oldsize, newsize);
3383 ret = btrfs_cont_expand(inode, oldsize, newsize); 3426 ret = btrfs_cont_expand(inode, oldsize, newsize);
3384 if (ret) { 3427 if (ret)
3385 btrfs_setsize(inode, oldsize);
3386 return ret; 3428 return ret;
3387 }
3388 3429
3389 mark_inode_dirty(inode); 3430 trans = btrfs_start_transaction(root, 1);
3431 if (IS_ERR(trans))
3432 return PTR_ERR(trans);
3433
3434 i_size_write(inode, newsize);
3435 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3436 ret = btrfs_update_inode(trans, root, inode);
3437 btrfs_end_transaction_throttle(trans, root);
3390 } else { 3438 } else {
3391 3439
3392 /* 3440 /*
@@ -3426,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3426 3474
3427 if (attr->ia_valid) { 3475 if (attr->ia_valid) {
3428 setattr_copy(inode, attr); 3476 setattr_copy(inode, attr);
3429 mark_inode_dirty(inode); 3477 err = btrfs_dirty_inode(inode);
3430 3478
3431 if (attr->ia_valid & ATTR_MODE) 3479 if (!err && attr->ia_valid & ATTR_MODE)
3432 err = btrfs_acl_chmod(inode); 3480 err = btrfs_acl_chmod(inode);
3433 } 3481 }
3434 3482
@@ -4204,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4204 * FIXME, needs more benchmarking...there are no reasons other than performance 4252 * FIXME, needs more benchmarking...there are no reasons other than performance
4205 * to keep or drop this code. 4253 * to keep or drop this code.
4206 */ 4254 */
4207void btrfs_dirty_inode(struct inode *inode, int flags) 4255int btrfs_dirty_inode(struct inode *inode)
4208{ 4256{
4209 struct btrfs_root *root = BTRFS_I(inode)->root; 4257 struct btrfs_root *root = BTRFS_I(inode)->root;
4210 struct btrfs_trans_handle *trans; 4258 struct btrfs_trans_handle *trans;
4211 int ret; 4259 int ret;
4212 4260
4213 if (BTRFS_I(inode)->dummy_inode) 4261 if (BTRFS_I(inode)->dummy_inode)
4214 return; 4262 return 0;
4215 4263
4216 trans = btrfs_join_transaction(root); 4264 trans = btrfs_join_transaction(root);
4217 BUG_ON(IS_ERR(trans)); 4265 if (IS_ERR(trans))
4266 return PTR_ERR(trans);
4218 4267
4219 ret = btrfs_update_inode(trans, root, inode); 4268 ret = btrfs_update_inode(trans, root, inode);
4220 if (ret && ret == -ENOSPC) { 4269 if (ret && ret == -ENOSPC) {
4221 /* whoops, lets try again with the full transaction */ 4270 /* whoops, lets try again with the full transaction */
4222 btrfs_end_transaction(trans, root); 4271 btrfs_end_transaction(trans, root);
4223 trans = btrfs_start_transaction(root, 1); 4272 trans = btrfs_start_transaction(root, 1);
4224 if (IS_ERR(trans)) { 4273 if (IS_ERR(trans))
4225 printk_ratelimited(KERN_ERR "btrfs: fail to " 4274 return PTR_ERR(trans);
4226 "dirty inode %llu error %ld\n",
4227 (unsigned long long)btrfs_ino(inode),
4228 PTR_ERR(trans));
4229 return;
4230 }
4231 4275
4232 ret = btrfs_update_inode(trans, root, inode); 4276 ret = btrfs_update_inode(trans, root, inode);
4233 if (ret) {
4234 printk_ratelimited(KERN_ERR "btrfs: fail to "
4235 "dirty inode %llu error %d\n",
4236 (unsigned long long)btrfs_ino(inode),
4237 ret);
4238 }
4239 } 4277 }
4240 btrfs_end_transaction(trans, root); 4278 btrfs_end_transaction(trans, root);
4241 if (BTRFS_I(inode)->delayed_node) 4279 if (BTRFS_I(inode)->delayed_node)
4242 btrfs_balance_delayed_items(root); 4280 btrfs_balance_delayed_items(root);
4281
4282 return ret;
4283}
4284
4285/*
4286 * This is a copy of file_update_time. We need this so we can return error on
4287 * ENOSPC for updating the inode in the case of file write and mmap writes.
4288 */
4289int btrfs_update_time(struct file *file)
4290{
4291 struct inode *inode = file->f_path.dentry->d_inode;
4292 struct timespec now;
4293 int ret;
4294 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
4295
4296 /* First try to exhaust all avenues to not sync */
4297 if (IS_NOCMTIME(inode))
4298 return 0;
4299
4300 now = current_fs_time(inode->i_sb);
4301 if (!timespec_equal(&inode->i_mtime, &now))
4302 sync_it = S_MTIME;
4303
4304 if (!timespec_equal(&inode->i_ctime, &now))
4305 sync_it |= S_CTIME;
4306
4307 if (IS_I_VERSION(inode))
4308 sync_it |= S_VERSION;
4309
4310 if (!sync_it)
4311 return 0;
4312
4313 /* Finally allowed to write? Takes lock. */
4314 if (mnt_want_write_file(file))
4315 return 0;
4316
4317 /* Only change inode inside the lock region */
4318 if (sync_it & S_VERSION)
4319 inode_inc_iversion(inode);
4320 if (sync_it & S_CTIME)
4321 inode->i_ctime = now;
4322 if (sync_it & S_MTIME)
4323 inode->i_mtime = now;
4324 ret = btrfs_dirty_inode(inode);
4325 if (!ret)
4326 mark_inode_dirty_sync(inode);
4327 mnt_drop_write(file->f_path.mnt);
4328 return ret;
4243} 4329}
4244 4330
4245/* 4331/*
@@ -4504,10 +4590,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4504 int err = btrfs_add_link(trans, dir, inode, 4590 int err = btrfs_add_link(trans, dir, inode,
4505 dentry->d_name.name, dentry->d_name.len, 4591 dentry->d_name.name, dentry->d_name.len,
4506 backref, index); 4592 backref, index);
4507 if (!err) {
4508 d_instantiate(dentry, inode);
4509 return 0;
4510 }
4511 if (err > 0) 4593 if (err > 0)
4512 err = -EEXIST; 4594 err = -EEXIST;
4513 return err; 4595 return err;
@@ -4555,13 +4637,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4555 goto out_unlock; 4637 goto out_unlock;
4556 } 4638 }
4557 4639
4640 /*
4641 * If the active LSM wants to access the inode during
4642 * d_instantiate it needs these. Smack checks to see
4643 * if the filesystem supports xattrs by looking at the
4644 * ops vector.
4645 */
4646
4647 inode->i_op = &btrfs_special_inode_operations;
4558 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4648 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4559 if (err) 4649 if (err)
4560 drop_inode = 1; 4650 drop_inode = 1;
4561 else { 4651 else {
4562 inode->i_op = &btrfs_special_inode_operations;
4563 init_special_inode(inode, inode->i_mode, rdev); 4652 init_special_inode(inode, inode->i_mode, rdev);
4564 btrfs_update_inode(trans, root, inode); 4653 btrfs_update_inode(trans, root, inode);
4654 d_instantiate(dentry, inode);
4565 } 4655 }
4566out_unlock: 4656out_unlock:
4567 nr = trans->blocks_used; 4657 nr = trans->blocks_used;
@@ -4613,15 +4703,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4613 goto out_unlock; 4703 goto out_unlock;
4614 } 4704 }
4615 4705
4706 /*
4707 * If the active LSM wants to access the inode during
4708 * d_instantiate it needs these. Smack checks to see
4709 * if the filesystem supports xattrs by looking at the
4710 * ops vector.
4711 */
4712 inode->i_fop = &btrfs_file_operations;
4713 inode->i_op = &btrfs_file_inode_operations;
4714
4616 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4715 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4617 if (err) 4716 if (err)
4618 drop_inode = 1; 4717 drop_inode = 1;
4619 else { 4718 else {
4620 inode->i_mapping->a_ops = &btrfs_aops; 4719 inode->i_mapping->a_ops = &btrfs_aops;
4621 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 4720 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
4622 inode->i_fop = &btrfs_file_operations;
4623 inode->i_op = &btrfs_file_inode_operations;
4624 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4721 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4722 d_instantiate(dentry, inode);
4625 } 4723 }
4626out_unlock: 4724out_unlock:
4627 nr = trans->blocks_used; 4725 nr = trans->blocks_used;
@@ -4679,6 +4777,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4679 struct dentry *parent = dentry->d_parent; 4777 struct dentry *parent = dentry->d_parent;
4680 err = btrfs_update_inode(trans, root, inode); 4778 err = btrfs_update_inode(trans, root, inode);
4681 BUG_ON(err); 4779 BUG_ON(err);
4780 d_instantiate(dentry, inode);
4682 btrfs_log_new_name(trans, inode, NULL, parent); 4781 btrfs_log_new_name(trans, inode, NULL, parent);
4683 } 4782 }
4684 4783
@@ -6303,7 +6402,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6303 u64 page_start; 6402 u64 page_start;
6304 u64 page_end; 6403 u64 page_end;
6305 6404
6405 /* Need this to keep space reservations serialized */
6406 mutex_lock(&inode->i_mutex);
6306 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6407 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6408 mutex_unlock(&inode->i_mutex);
6409 if (!ret)
6410 ret = btrfs_update_time(vma->vm_file);
6307 if (ret) { 6411 if (ret) {
6308 if (ret == -ENOMEM) 6412 if (ret == -ENOMEM)
6309 ret = VM_FAULT_OOM; 6413 ret = VM_FAULT_OOM;
@@ -6515,8 +6619,9 @@ static int btrfs_truncate(struct inode *inode)
6515 /* Just need the 1 for updating the inode */ 6619 /* Just need the 1 for updating the inode */
6516 trans = btrfs_start_transaction(root, 1); 6620 trans = btrfs_start_transaction(root, 1);
6517 if (IS_ERR(trans)) { 6621 if (IS_ERR(trans)) {
6518 err = PTR_ERR(trans); 6622 ret = err = PTR_ERR(trans);
6519 goto out; 6623 trans = NULL;
6624 break;
6520 } 6625 }
6521 } 6626 }
6522 6627
@@ -7076,14 +7181,21 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7076 goto out_unlock; 7181 goto out_unlock;
7077 } 7182 }
7078 7183
7184 /*
7185 * If the active LSM wants to access the inode during
7186 * d_instantiate it needs these. Smack checks to see
7187 * if the filesystem supports xattrs by looking at the
7188 * ops vector.
7189 */
7190 inode->i_fop = &btrfs_file_operations;
7191 inode->i_op = &btrfs_file_inode_operations;
7192
7079 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7193 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7080 if (err) 7194 if (err)
7081 drop_inode = 1; 7195 drop_inode = 1;
7082 else { 7196 else {
7083 inode->i_mapping->a_ops = &btrfs_aops; 7197 inode->i_mapping->a_ops = &btrfs_aops;
7084 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 7198 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
7085 inode->i_fop = &btrfs_file_operations;
7086 inode->i_op = &btrfs_file_inode_operations;
7087 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7199 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7088 } 7200 }
7089 if (drop_inode) 7201 if (drop_inode)
@@ -7132,6 +7244,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7132 drop_inode = 1; 7244 drop_inode = 1;
7133 7245
7134out_unlock: 7246out_unlock:
7247 if (!err)
7248 d_instantiate(dentry, inode);
7135 nr = trans->blocks_used; 7249 nr = trans->blocks_used;
7136 btrfs_end_transaction_throttle(trans, root); 7250 btrfs_end_transaction_throttle(trans, root);
7137 if (drop_inode) { 7251 if (drop_inode) {
@@ -7353,6 +7467,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7353 .follow_link = page_follow_link_light, 7467 .follow_link = page_follow_link_light,
7354 .put_link = page_put_link, 7468 .put_link = page_put_link,
7355 .getattr = btrfs_getattr, 7469 .getattr = btrfs_getattr,
7470 .setattr = btrfs_setattr,
7356 .permission = btrfs_permission, 7471 .permission = btrfs_permission,
7357 .setxattr = btrfs_setxattr, 7472 .setxattr = btrfs_setxattr,
7358 .getxattr = btrfs_getxattr, 7473 .getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 72d461656f60..c04f02c7d5bb 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -252,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
252 trans = btrfs_join_transaction(root); 252 trans = btrfs_join_transaction(root);
253 BUG_ON(IS_ERR(trans)); 253 BUG_ON(IS_ERR(trans));
254 254
255 btrfs_update_iflags(inode);
256 inode->i_ctime = CURRENT_TIME;
255 ret = btrfs_update_inode(trans, root, inode); 257 ret = btrfs_update_inode(trans, root, inode);
256 BUG_ON(ret); 258 BUG_ON(ret);
257 259
258 btrfs_update_iflags(inode);
259 inode->i_ctime = CURRENT_TIME;
260 btrfs_end_transaction(trans, root); 260 btrfs_end_transaction(trans, root);
261 261
262 mnt_drop_write(file->f_path.mnt); 262 mnt_drop_write(file->f_path.mnt);
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
858 return 0; 858 return 0;
859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
860 860
861 mutex_lock(&inode->i_mutex);
861 ret = btrfs_delalloc_reserve_space(inode, 862 ret = btrfs_delalloc_reserve_space(inode,
862 num_pages << PAGE_CACHE_SHIFT); 863 num_pages << PAGE_CACHE_SHIFT);
864 mutex_unlock(&inode->i_mutex);
863 if (ret) 865 if (ret)
864 return ret; 866 return ret;
865again: 867again:
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5e151a..cfb55434a469 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2947,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2949 while (index <= last_index) { 2949 while (index <= last_index) {
2950 mutex_lock(&inode->i_mutex);
2950 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); 2951 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2952 mutex_unlock(&inode->i_mutex);
2951 if (ret) 2953 if (ret)
2952 goto out; 2954 goto out;
2953 2955
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c27bcb67f330..ddf2c90d3fc0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1535,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) 1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1536{ 1536{
1537 struct btrfs_fs_info *fs_info = root->fs_info; 1537 struct btrfs_fs_info *fs_info = root->fs_info;
1538 int ret = 0;
1538 1539
1539 mutex_lock(&fs_info->scrub_lock); 1540 mutex_lock(&fs_info->scrub_lock);
1540 if (fs_info->scrub_workers_refcnt == 0) { 1541 if (fs_info->scrub_workers_refcnt == 0) {
1541 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1542 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1542 fs_info->thread_pool_size, &fs_info->generic_worker); 1543 fs_info->thread_pool_size, &fs_info->generic_worker);
1543 fs_info->scrub_workers.idle_thresh = 4; 1544 fs_info->scrub_workers.idle_thresh = 4;
1544 btrfs_start_workers(&fs_info->scrub_workers, 1); 1545 ret = btrfs_start_workers(&fs_info->scrub_workers);
1546 if (ret)
1547 goto out;
1545 } 1548 }
1546 ++fs_info->scrub_workers_refcnt; 1549 ++fs_info->scrub_workers_refcnt;
1550out:
1547 mutex_unlock(&fs_info->scrub_lock); 1551 mutex_unlock(&fs_info->scrub_lock);
1548 1552
1549 return 0; 1553 return ret;
1550} 1554}
1551 1555
1552static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) 1556static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e28ad4baf483..200f63bc6675 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,6 +41,7 @@
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/mnt_namespace.h> 43#include <linux/mnt_namespace.h>
44#include <linux/ratelimit.h>
44#include "compat.h" 45#include "compat.h"
45#include "delayed-inode.h" 46#include "delayed-inode.h"
46#include "ctree.h" 47#include "ctree.h"
@@ -1053,7 +1054,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1053 u64 avail_space; 1054 u64 avail_space;
1054 u64 used_space; 1055 u64 used_space;
1055 u64 min_stripe_size; 1056 u64 min_stripe_size;
1056 int min_stripes = 1; 1057 int min_stripes = 1, num_stripes = 1;
1057 int i = 0, nr_devices; 1058 int i = 0, nr_devices;
1058 int ret; 1059 int ret;
1059 1060
@@ -1067,12 +1068,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1067 1068
1068 /* calc min stripe number for data space alloction */ 1069 /* calc min stripe number for data space alloction */
1069 type = btrfs_get_alloc_profile(root, 1); 1070 type = btrfs_get_alloc_profile(root, 1);
1070 if (type & BTRFS_BLOCK_GROUP_RAID0) 1071 if (type & BTRFS_BLOCK_GROUP_RAID0) {
1071 min_stripes = 2; 1072 min_stripes = 2;
1072 else if (type & BTRFS_BLOCK_GROUP_RAID1) 1073 num_stripes = nr_devices;
1074 } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1073 min_stripes = 2; 1075 min_stripes = 2;
1074 else if (type & BTRFS_BLOCK_GROUP_RAID10) 1076 num_stripes = 2;
1077 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1075 min_stripes = 4; 1078 min_stripes = 4;
1079 num_stripes = 4;
1080 }
1076 1081
1077 if (type & BTRFS_BLOCK_GROUP_DUP) 1082 if (type & BTRFS_BLOCK_GROUP_DUP)
1078 min_stripe_size = 2 * BTRFS_STRIPE_LEN; 1083 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
@@ -1141,13 +1146,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1141 i = nr_devices - 1; 1146 i = nr_devices - 1;
1142 avail_space = 0; 1147 avail_space = 0;
1143 while (nr_devices >= min_stripes) { 1148 while (nr_devices >= min_stripes) {
1149 if (num_stripes > nr_devices)
1150 num_stripes = nr_devices;
1151
1144 if (devices_info[i].max_avail >= min_stripe_size) { 1152 if (devices_info[i].max_avail >= min_stripe_size) {
1145 int j; 1153 int j;
1146 u64 alloc_size; 1154 u64 alloc_size;
1147 1155
1148 avail_space += devices_info[i].max_avail * min_stripes; 1156 avail_space += devices_info[i].max_avail * num_stripes;
1149 alloc_size = devices_info[i].max_avail; 1157 alloc_size = devices_info[i].max_avail;
1150 for (j = i + 1 - min_stripes; j <= i; j++) 1158 for (j = i + 1 - num_stripes; j <= i; j++)
1151 devices_info[j].max_avail -= alloc_size; 1159 devices_info[j].max_avail -= alloc_size;
1152 } 1160 }
1153 i--; 1161 i--;
@@ -1264,6 +1272,16 @@ static int btrfs_unfreeze(struct super_block *sb)
1264 return 0; 1272 return 0;
1265} 1273}
1266 1274
1275static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1276{
1277 int ret;
1278
1279 ret = btrfs_dirty_inode(inode);
1280 if (ret)
1281 printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
1282 "error %d\n", btrfs_ino(inode), ret);
1283}
1284
1267static const struct super_operations btrfs_super_ops = { 1285static const struct super_operations btrfs_super_ops = {
1268 .drop_inode = btrfs_drop_inode, 1286 .drop_inode = btrfs_drop_inode,
1269 .evict_inode = btrfs_evict_inode, 1287 .evict_inode = btrfs_evict_inode,
@@ -1271,7 +1289,7 @@ static const struct super_operations btrfs_super_ops = {
1271 .sync_fs = btrfs_sync_fs, 1289 .sync_fs = btrfs_sync_fs,
1272 .show_options = btrfs_show_options, 1290 .show_options = btrfs_show_options,
1273 .write_inode = btrfs_write_inode, 1291 .write_inode = btrfs_write_inode,
1274 .dirty_inode = btrfs_dirty_inode, 1292 .dirty_inode = btrfs_fs_dirty_inode,
1275 .alloc_inode = btrfs_alloc_inode, 1293 .alloc_inode = btrfs_alloc_inode,
1276 .destroy_inode = btrfs_destroy_inode, 1294 .destroy_inode = btrfs_destroy_inode,
1277 .statfs = btrfs_statfs, 1295 .statfs = btrfs_statfs,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0a8c8f8304b1..f4b839fd3c9d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -295,6 +295,12 @@ loop_lock:
295 btrfs_requeue_work(&device->work); 295 btrfs_requeue_work(&device->work);
296 goto done; 296 goto done;
297 } 297 }
298 /* unplug every 64 requests just for good measure */
299 if (batch_run % 64 == 0) {
300 blk_finish_plug(&plug);
301 blk_start_plug(&plug);
302 sync_pending = 0;
303 }
298 } 304 }
299 305
300 cond_resched(); 306 cond_resched();
@@ -3258,7 +3264,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
3258 */ 3264 */
3259 if (atomic_read(&bbio->error) > bbio->max_errors) { 3265 if (atomic_read(&bbio->error) > bbio->max_errors) {
3260 err = -EIO; 3266 err = -EIO;
3261 } else if (err) { 3267 } else {
3262 /* 3268 /*
3263 * this bio is actually up to date, we didn't 3269 * this bio is actually up to date, we didn't
3264 * go over the max number of errors 3270 * go over the max number of errors
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3eeb97661262..98954003a8d3 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1094,42 +1094,19 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1094/* 1094/*
1095 * Set/clear/test dir complete flag on the dir's dentry. 1095 * Set/clear/test dir complete flag on the dir's dentry.
1096 */ 1096 */
1097static struct dentry * __d_find_any_alias(struct inode *inode)
1098{
1099 struct dentry *alias;
1100
1101 if (list_empty(&inode->i_dentry))
1102 return NULL;
1103 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1104 return alias;
1105}
1106
1107void ceph_dir_set_complete(struct inode *inode) 1097void ceph_dir_set_complete(struct inode *inode)
1108{ 1098{
1109 struct dentry *dentry = __d_find_any_alias(inode); 1099 /* not yet implemented */
1110
1111 if (dentry && ceph_dentry(dentry)) {
1112 dout(" marking %p (%p) complete\n", inode, dentry);
1113 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1114 }
1115} 1100}
1116 1101
1117void ceph_dir_clear_complete(struct inode *inode) 1102void ceph_dir_clear_complete(struct inode *inode)
1118{ 1103{
1119 struct dentry *dentry = __d_find_any_alias(inode); 1104 /* not yet implemented */
1120
1121 if (dentry && ceph_dentry(dentry)) {
1122 dout(" marking %p (%p) NOT complete\n", inode, dentry);
1123 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1124 }
1125} 1105}
1126 1106
1127bool ceph_dir_test_complete(struct inode *inode) 1107bool ceph_dir_test_complete(struct inode *inode)
1128{ 1108{
1129 struct dentry *dentry = __d_find_any_alias(inode); 1109 /* not yet implemented */
1130
1131 if (dentry && ceph_dentry(dentry))
1132 return test_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1133 return false; 1110 return false;
1134} 1111}
1135 1112
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8cd4b52d4217..f3670cf72587 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -282,7 +282,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
282 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length); 282 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
283 byte_count += total_in_buf2; 283 byte_count += total_in_buf2;
284 /* don't allow buffer to overflow */ 284 /* don't allow buffer to overflow */
285 if (byte_count > CIFSMaxBufSize) 285 if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)
286 return -ENOBUFS; 286 return -ENOBUFS;
287 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count); 287 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
288 288
@@ -2122,7 +2122,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
2122 warned_on_ntlm = true; 2122 warned_on_ntlm = true;
2123 cERROR(1, "default security mechanism requested. The default " 2123 cERROR(1, "default security mechanism requested. The default "
2124 "security mechanism will be upgraded from ntlm to " 2124 "security mechanism will be upgraded from ntlm to "
2125 "ntlmv2 in kernel release 3.2"); 2125 "ntlmv2 in kernel release 3.3");
2126 } 2126 }
2127 ses->overrideSecFlg = volume_info->secFlg; 2127 ses->overrideSecFlg = volume_info->secFlg;
2128 2128
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 51352de88ef1..a10e428b32b4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1506,35 +1506,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1506 return -ENOIOCTLCMD; 1506 return -ENOIOCTLCMD;
1507} 1507}
1508 1508
1509static void compat_ioctl_error(struct file *filp, unsigned int fd,
1510 unsigned int cmd, unsigned long arg)
1511{
1512 char buf[10];
1513 char *fn = "?";
1514 char *path;
1515
1516 /* find the name of the device. */
1517 path = (char *)__get_free_page(GFP_KERNEL);
1518 if (path) {
1519 fn = d_path(&filp->f_path, path, PAGE_SIZE);
1520 if (IS_ERR(fn))
1521 fn = "?";
1522 }
1523
1524 sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
1525 if (!isprint(buf[1]))
1526 sprintf(buf, "%02x", buf[1]);
1527 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
1528 "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
1529 current->comm, current->pid,
1530 (int)fd, (unsigned int)cmd, buf,
1531 (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
1532 (unsigned int)arg, fn);
1533
1534 if (path)
1535 free_page((unsigned long)path);
1536}
1537
1538static int compat_ioctl_check_table(unsigned int xcmd) 1509static int compat_ioctl_check_table(unsigned int xcmd)
1539{ 1510{
1540 int i; 1511 int i;
@@ -1621,13 +1592,8 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1621 goto found_handler; 1592 goto found_handler;
1622 1593
1623 error = do_ioctl_trans(fd, cmd, arg, filp); 1594 error = do_ioctl_trans(fd, cmd, arg, filp);
1624 if (error == -ENOIOCTLCMD) { 1595 if (error == -ENOIOCTLCMD)
1625 static int count; 1596 error = -ENOTTY;
1626
1627 if (++count <= 50)
1628 compat_ioctl_error(filp, fd, cmd, arg);
1629 error = -EINVAL;
1630 }
1631 1597
1632 goto out_fput; 1598 goto out_fput;
1633 1599
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ac86f8b3e3cb..517f211a3bd4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -47,17 +47,6 @@ struct wb_writeback_work {
47 struct completion *done; /* set if the caller waits */ 47 struct completion *done; /* set if the caller waits */
48}; 48};
49 49
50const char *wb_reason_name[] = {
51 [WB_REASON_BACKGROUND] = "background",
52 [WB_REASON_TRY_TO_FREE_PAGES] = "try_to_free_pages",
53 [WB_REASON_SYNC] = "sync",
54 [WB_REASON_PERIODIC] = "periodic",
55 [WB_REASON_LAPTOP_TIMER] = "laptop_timer",
56 [WB_REASON_FREE_MORE_MEM] = "free_more_memory",
57 [WB_REASON_FS_FREE_SPACE] = "fs_free_space",
58 [WB_REASON_FORKER_THREAD] = "forker_thread"
59};
60
61/* 50/*
62 * Include the creation of the trace points after defining the 51 * Include the creation of the trace points after defining the
63 * wb_writeback_work structure so that the definition remains local to this 52 * wb_writeback_work structure so that the definition remains local to this
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1d9b9fcb2db4..066836e81848 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -42,7 +42,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
42 42
43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg); 43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
44 if (error == -ENOIOCTLCMD) 44 if (error == -ENOIOCTLCMD)
45 error = -EINVAL; 45 error = -ENOTTY;
46 out: 46 out:
47 return error; 47 return error;
48} 48}
diff --git a/fs/locks.c b/fs/locks.c
index 3b0d05dcd7c1..637694bf3a03 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1205,6 +1205,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
1205 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1205 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1206 1206
1207 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); 1207 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1208 if (IS_ERR(new_fl))
1209 return PTR_ERR(new_fl);
1208 1210
1209 lock_flocks(); 1211 lock_flocks();
1210 1212
@@ -1221,12 +1223,6 @@ int __break_lease(struct inode *inode, unsigned int mode)
1221 if (fl->fl_owner == current->files) 1223 if (fl->fl_owner == current->files)
1222 i_have_this_lease = 1; 1224 i_have_this_lease = 1;
1223 1225
1224 if (IS_ERR(new_fl) && !i_have_this_lease
1225 && ((mode & O_NONBLOCK) == 0)) {
1226 error = PTR_ERR(new_fl);
1227 goto out;
1228 }
1229
1230 break_time = 0; 1226 break_time = 0;
1231 if (lease_break_time > 0) { 1227 if (lease_break_time > 0) {
1232 break_time = jiffies + lease_break_time * HZ; 1228 break_time = jiffies + lease_break_time * HZ;
@@ -1284,8 +1280,7 @@ restart:
1284 1280
1285out: 1281out:
1286 unlock_flocks(); 1282 unlock_flocks();
1287 if (!IS_ERR(new_fl)) 1283 locks_free_lock(new_fl);
1288 locks_free_lock(new_fl);
1289 return error; 1284 return error;
1290} 1285}
1291 1286
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1d9e33966db0..4d46a6a59070 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -263,23 +263,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
263 goto out_no_root; 263 goto out_no_root;
264 } 264 }
265 265
266 ret = -ENOMEM;
267 s->s_root = d_alloc_root(root_inode);
268 if (!s->s_root)
269 goto out_iput;
270
271 if (!(s->s_flags & MS_RDONLY)) {
272 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
273 ms->s_state &= ~MINIX_VALID_FS;
274 mark_buffer_dirty(bh);
275 }
276 if (!(sbi->s_mount_state & MINIX_VALID_FS))
277 printk("MINIX-fs: mounting unchecked file system, "
278 "running fsck is recommended\n");
279 else if (sbi->s_mount_state & MINIX_ERROR_FS)
280 printk("MINIX-fs: mounting file system with errors, "
281 "running fsck is recommended\n");
282
283 /* Apparently minix can create filesystems that allocate more blocks for 266 /* Apparently minix can create filesystems that allocate more blocks for
284 * the bitmaps than needed. We simply ignore that, but verify it didn't 267 * the bitmaps than needed. We simply ignore that, but verify it didn't
285 * create one with not enough blocks and bail out if so. 268 * create one with not enough blocks and bail out if so.
@@ -300,6 +283,23 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
300 goto out_iput; 283 goto out_iput;
301 } 284 }
302 285
286 ret = -ENOMEM;
287 s->s_root = d_alloc_root(root_inode);
288 if (!s->s_root)
289 goto out_iput;
290
291 if (!(s->s_flags & MS_RDONLY)) {
292 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
293 ms->s_state &= ~MINIX_VALID_FS;
294 mark_buffer_dirty(bh);
295 }
296 if (!(sbi->s_mount_state & MINIX_VALID_FS))
297 printk("MINIX-fs: mounting unchecked file system, "
298 "running fsck is recommended\n");
299 else if (sbi->s_mount_state & MINIX_ERROR_FS)
300 printk("MINIX-fs: mounting file system with errors, "
301 "running fsck is recommended\n");
302
303 return 0; 303 return 0;
304 304
305out_iput: 305out_iput:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eca56d4b39c0..606ef0f20aed 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -147,7 +147,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
147 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate 147 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
148 * the cached file length 148 * the cached file length
149 */ 149 */
150 if (origin != SEEK_SET || origin != SEEK_CUR) { 150 if (origin != SEEK_SET && origin != SEEK_CUR) {
151 struct inode *inode = filp->f_mapping->host; 151 struct inode *inode = filp->f_mapping->host;
152 152
153 int retval = nfs_revalidate_file_size(inode, filp); 153 int retval = nfs_revalidate_file_size(inode, filp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be2bbac13817..d9f4d78c3413 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -39,6 +39,8 @@
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/errno.h> 40#include <linux/errno.h>
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/ratelimit.h>
43#include <linux/printk.h>
42#include <linux/slab.h> 44#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h> 46#include <linux/sunrpc/gss_api.h>
@@ -894,6 +896,8 @@ out:
894 896
895static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) 897static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896{ 898{
899 if (delegation == NULL)
900 return 0;
897 if ((delegation->type & fmode) != fmode) 901 if ((delegation->type & fmode) != fmode)
898 return 0; 902 return 0;
899 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 903 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
@@ -1036,8 +1040,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1036 } 1040 }
1037 rcu_read_lock(); 1041 rcu_read_lock();
1038 delegation = rcu_dereference(nfsi->delegation); 1042 delegation = rcu_dereference(nfsi->delegation);
1039 if (delegation == NULL || 1043 if (!can_open_delegated(delegation, fmode)) {
1040 !can_open_delegated(delegation, fmode)) {
1041 rcu_read_unlock(); 1044 rcu_read_unlock();
1042 break; 1045 break;
1043 } 1046 }
@@ -1091,7 +1094,12 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1091 if (delegation) 1094 if (delegation)
1092 delegation_flags = delegation->flags; 1095 delegation_flags = delegation->flags;
1093 rcu_read_unlock(); 1096 rcu_read_unlock();
1094 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1097 if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
1098 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1099 "returning a delegation for "
1100 "OPEN(CLAIM_DELEGATE_CUR)\n",
1101 NFS_CLIENT(inode)->cl_server);
1102 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1095 nfs_inode_set_delegation(state->inode, 1103 nfs_inode_set_delegation(state->inode,
1096 data->owner->so_cred, 1104 data->owner->so_cred,
1097 &data->o_res); 1105 &data->o_res);
@@ -1423,11 +1431,9 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1423 goto out_no_action; 1431 goto out_no_action;
1424 rcu_read_lock(); 1432 rcu_read_lock();
1425 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); 1433 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
1426 if (delegation != NULL && 1434 if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR &&
1427 test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) { 1435 can_open_delegated(delegation, data->o_arg.fmode))
1428 rcu_read_unlock(); 1436 goto unlock_no_action;
1429 goto out_no_action;
1430 }
1431 rcu_read_unlock(); 1437 rcu_read_unlock();
1432 } 1438 }
1433 /* Update sequence id. */ 1439 /* Update sequence id. */
@@ -1444,6 +1450,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1444 return; 1450 return;
1445 rpc_call_start(task); 1451 rpc_call_start(task);
1446 return; 1452 return;
1453unlock_no_action:
1454 rcu_read_unlock();
1447out_no_action: 1455out_no_action:
1448 task->tk_action = NULL; 1456 task->tk_action = NULL;
1449 1457
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 39914be40b03..6a7107ae6b72 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1156,11 +1156,13 @@ restart:
1156 if (status >= 0) { 1156 if (status >= 0) {
1157 status = nfs4_reclaim_locks(state, ops); 1157 status = nfs4_reclaim_locks(state, ops);
1158 if (status >= 0) { 1158 if (status >= 0) {
1159 spin_lock(&state->state_lock);
1159 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1160 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1160 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 1161 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
1161 printk("%s: Lock reclaim failed!\n", 1162 printk("%s: Lock reclaim failed!\n",
1162 __func__); 1163 __func__);
1163 } 1164 }
1165 spin_unlock(&state->state_lock);
1164 nfs4_put_open_state(state); 1166 nfs4_put_open_state(state);
1165 goto restart; 1167 goto restart;
1166 } 1168 }
@@ -1224,10 +1226,12 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
1224 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 1226 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1225 clear_bit(NFS_O_WRONLY_STATE, &state->flags); 1227 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1226 clear_bit(NFS_O_RDWR_STATE, &state->flags); 1228 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1229 spin_lock(&state->state_lock);
1227 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1230 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1228 lock->ls_seqid.flags = 0; 1231 lock->ls_seqid.flags = 0;
1229 lock->ls_flags &= ~NFS_LOCK_INITIALIZED; 1232 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
1230 } 1233 }
1234 spin_unlock(&state->state_lock);
1231} 1235}
1232 1236
1233static void nfs4_reset_seqids(struct nfs_server *server, 1237static void nfs4_reset_seqids(struct nfs_server *server,
@@ -1350,12 +1354,14 @@ static void nfs4_warn_keyexpired(const char *s)
1350static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1354static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1351{ 1355{
1352 switch (error) { 1356 switch (error) {
1357 case 0:
1358 break;
1353 case -NFS4ERR_CB_PATH_DOWN: 1359 case -NFS4ERR_CB_PATH_DOWN:
1354 nfs_handle_cb_pathdown(clp); 1360 nfs_handle_cb_pathdown(clp);
1355 return 0; 1361 break;
1356 case -NFS4ERR_NO_GRACE: 1362 case -NFS4ERR_NO_GRACE:
1357 nfs4_state_end_reclaim_reboot(clp); 1363 nfs4_state_end_reclaim_reboot(clp);
1358 return 0; 1364 break;
1359 case -NFS4ERR_STALE_CLIENTID: 1365 case -NFS4ERR_STALE_CLIENTID:
1360 case -NFS4ERR_LEASE_MOVED: 1366 case -NFS4ERR_LEASE_MOVED:
1361 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1367 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
@@ -1375,13 +1381,15 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1375 case -NFS4ERR_SEQ_MISORDERED: 1381 case -NFS4ERR_SEQ_MISORDERED:
1376 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1382 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1377 /* Zero session reset errors */ 1383 /* Zero session reset errors */
1378 return 0; 1384 break;
1379 case -EKEYEXPIRED: 1385 case -EKEYEXPIRED:
1380 /* Nothing we can do */ 1386 /* Nothing we can do */
1381 nfs4_warn_keyexpired(clp->cl_hostname); 1387 nfs4_warn_keyexpired(clp->cl_hostname);
1382 return 0; 1388 break;
1389 default:
1390 return error;
1383 } 1391 }
1384 return error; 1392 return 0;
1385} 1393}
1386 1394
1387static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) 1395static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
@@ -1428,7 +1436,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1428 struct rpc_cred *cred; 1436 struct rpc_cred *cred;
1429 const struct nfs4_state_maintenance_ops *ops = 1437 const struct nfs4_state_maintenance_ops *ops =
1430 clp->cl_mvops->state_renewal_ops; 1438 clp->cl_mvops->state_renewal_ops;
1431 int status = -NFS4ERR_EXPIRED; 1439 int status;
1432 1440
1433 /* Is the client already known to have an expired lease? */ 1441 /* Is the client already known to have an expired lease? */
1434 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1442 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
@@ -1438,6 +1446,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1438 spin_unlock(&clp->cl_lock); 1446 spin_unlock(&clp->cl_lock);
1439 if (cred == NULL) { 1447 if (cred == NULL) {
1440 cred = nfs4_get_setclientid_cred(clp); 1448 cred = nfs4_get_setclientid_cred(clp);
1449 status = -ENOKEY;
1441 if (cred == NULL) 1450 if (cred == NULL)
1442 goto out; 1451 goto out;
1443 } 1452 }
@@ -1525,16 +1534,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1525{ 1534{
1526 if (!flags) 1535 if (!flags)
1527 return; 1536 return;
1528 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1537 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1529 nfs41_handle_server_reboot(clp); 1538 nfs41_handle_server_reboot(clp);
1530 else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1539 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1531 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | 1540 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
1532 SEQ4_STATUS_ADMIN_STATE_REVOKED | 1541 SEQ4_STATUS_ADMIN_STATE_REVOKED |
1533 SEQ4_STATUS_LEASE_MOVED)) 1542 SEQ4_STATUS_LEASE_MOVED))
1534 nfs41_handle_state_revoked(clp); 1543 nfs41_handle_state_revoked(clp);
1535 else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1544 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1536 nfs41_handle_recallable_state_revoked(clp); 1545 nfs41_handle_recallable_state_revoked(clp);
1537 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1546 if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1538 SEQ4_STATUS_BACKCHANNEL_FAULT | 1547 SEQ4_STATUS_BACKCHANNEL_FAULT |
1539 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1548 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1540 nfs41_handle_cb_path_down(clp); 1549 nfs41_handle_cb_path_down(clp);
@@ -1662,10 +1671,10 @@ static void nfs4_state_manager(struct nfs_client *clp)
1662 1671
1663 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1672 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
1664 status = nfs4_check_lease(clp); 1673 status = nfs4_check_lease(clp);
1674 if (status < 0)
1675 goto out_error;
1665 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1676 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1666 continue; 1677 continue;
1667 if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN)
1668 goto out_error;
1669 } 1678 }
1670 1679
1671 /* Initialize or reset the session */ 1680 /* Initialize or reset the session */
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 41d6743d303c..ac258beeda3c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -625,6 +625,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
625 if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) 625 if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment)
626 goto out_free; 626 goto out_free;
627 627
628 if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size)
629 goto out_free;
630
628 len = argv[n].v_size * argv[n].v_nmembs; 631 len = argv[n].v_size * argv[n].v_nmembs;
629 base = (void __user *)(unsigned long)argv[n].v_base; 632 base = (void __user *)(unsigned long)argv[n].v_base;
630 if (len == 0) { 633 if (len == 0) {
@@ -842,6 +845,19 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
842 case FS_IOC32_GETVERSION: 845 case FS_IOC32_GETVERSION:
843 cmd = FS_IOC_GETVERSION; 846 cmd = FS_IOC_GETVERSION;
844 break; 847 break;
848 case NILFS_IOCTL_CHANGE_CPMODE:
849 case NILFS_IOCTL_DELETE_CHECKPOINT:
850 case NILFS_IOCTL_GET_CPINFO:
851 case NILFS_IOCTL_GET_CPSTAT:
852 case NILFS_IOCTL_GET_SUINFO:
853 case NILFS_IOCTL_GET_SUSTAT:
854 case NILFS_IOCTL_GET_VINFO:
855 case NILFS_IOCTL_GET_BDESCS:
856 case NILFS_IOCTL_CLEAN_SEGMENTS:
857 case NILFS_IOCTL_SYNC:
858 case NILFS_IOCTL_RESIZE:
859 case NILFS_IOCTL_SET_ALLOC_RANGE:
860 break;
845 default: 861 default:
846 return -ENOIOCTLCMD; 862 return -ENOIOCTLCMD;
847 } 863 }
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 2527a68057fc..d76ca6ae2b1b 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -31,7 +31,7 @@ static u64 get_idle_time(int cpu)
31 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; 31 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
32 idle += arch_idle_time(cpu); 32 idle += arch_idle_time(cpu);
33 } else 33 } else
34 idle = nsecs_to_jiffies64(1000 * idle_time); 34 idle = usecs_to_cputime64(idle_time);
35 35
36 return idle; 36 return idle;
37} 37}
@@ -44,7 +44,7 @@ static u64 get_iowait_time(int cpu)
44 /* !NO_HZ so we can rely on cpustat.iowait */ 44 /* !NO_HZ so we can rely on cpustat.iowait */
45 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; 45 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
46 else 46 else
47 iowait = nsecs_to_jiffies64(1000 * iowait_time); 47 iowait = usecs_to_cputime64(iowait_time);
48 48
49 return iowait; 49 return iowait;
50} 50}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3eca58f51ae9..8a899496fd5f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -869,27 +869,6 @@ xfs_fs_dirty_inode(
869} 869}
870 870
871STATIC int 871STATIC int
872xfs_log_inode(
873 struct xfs_inode *ip)
874{
875 struct xfs_mount *mp = ip->i_mount;
876 struct xfs_trans *tp;
877 int error;
878
879 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
880 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
881 if (error) {
882 xfs_trans_cancel(tp, 0);
883 return error;
884 }
885
886 xfs_ilock(ip, XFS_ILOCK_EXCL);
887 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
888 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
889 return xfs_trans_commit(tp, 0);
890}
891
892STATIC int
893xfs_fs_write_inode( 872xfs_fs_write_inode(
894 struct inode *inode, 873 struct inode *inode,
895 struct writeback_control *wbc) 874 struct writeback_control *wbc)
@@ -902,10 +881,8 @@ xfs_fs_write_inode(
902 881
903 if (XFS_FORCED_SHUTDOWN(mp)) 882 if (XFS_FORCED_SHUTDOWN(mp))
904 return -XFS_ERROR(EIO); 883 return -XFS_ERROR(EIO);
905 if (!ip->i_update_core)
906 return 0;
907 884
908 if (wbc->sync_mode == WB_SYNC_ALL) { 885 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
909 /* 886 /*
910 * Make sure the inode has made it it into the log. Instead 887 * Make sure the inode has made it it into the log. Instead
911 * of forcing it all the way to stable storage using a 888 * of forcing it all the way to stable storage using a
@@ -913,11 +890,14 @@ xfs_fs_write_inode(
913 * ->sync_fs call do that for thus, which reduces the number 890 * ->sync_fs call do that for thus, which reduces the number
914 * of synchronous log forces dramatically. 891 * of synchronous log forces dramatically.
915 */ 892 */
916 error = xfs_log_inode(ip); 893 error = xfs_log_dirty_inode(ip, NULL, 0);
917 if (error) 894 if (error)
918 goto out; 895 goto out;
919 return 0; 896 return 0;
920 } else { 897 } else {
898 if (!ip->i_update_core)
899 return 0;
900
921 /* 901 /*
922 * We make this non-blocking if the inode is contended, return 902 * We make this non-blocking if the inode is contended, return
923 * EAGAIN to indicate to the caller that they did not succeed. 903 * EAGAIN to indicate to the caller that they did not succeed.
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index be5c51d8f757..f0994aedcd15 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
336 return error; 336 return error;
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
339/* 365/*
340 * When remounting a filesystem read-only or freezing the filesystem, we have 366 * When remounting a filesystem read-only or freezing the filesystem, we have
341 * two phases to execute. This first phase is syncing the data before we 367 * two phases to execute. This first phase is syncing the data before we
@@ -359,6 +385,16 @@ xfs_quiesce_data(
359{ 385{
360 int error, error2 = 0; 386 int error, error2 = 0;
361 387
388 /*
389 * Log all pending size and timestamp updates. The vfs writeback
390 * code is supposed to do this, but due to its overagressive
391 * livelock detection it will skip inodes where appending writes
392 * were written out in the first non-blocking sync phase if their
393 * completion took long enough that it happened after taking the
394 * timestamp for the cut-off in the blocking phase.
395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
397
362 xfs_qm_sync(mp, SYNC_TRYLOCK); 398 xfs_qm_sync(mp, SYNC_TRYLOCK);
363 xfs_qm_sync(mp, SYNC_WAIT); 399 xfs_qm_sync(mp, SYNC_WAIT);
364 400
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 941202e7ac6e..fa965479d788 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,6 +34,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
34 34
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
38
37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 39int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
38int xfs_reclaim_inodes_count(struct xfs_mount *mp); 40int xfs_reclaim_inodes_count(struct xfs_mount *mp);
39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 41void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 77202e2c9fc5..9a62937c56ca 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -24,9 +24,11 @@ typedef u64 __nocast cputime64_t;
24 * Convert cputime to microseconds and back. 24 * Convert cputime to microseconds and back.
25 */ 25 */
26#define cputime_to_usecs(__ct) \ 26#define cputime_to_usecs(__ct) \
27 jiffies_to_usecs(cputime_to_jiffies(__ct)); 27 jiffies_to_usecs(cputime_to_jiffies(__ct))
28#define usecs_to_cputime(__msecs) \ 28#define usecs_to_cputime(__usec) \
29 jiffies_to_cputime(usecs_to_jiffies(__msecs)); 29 jiffies_to_cputime(usecs_to_jiffies(__usec))
30#define usecs_to_cputime64(__usec) \
31 jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
30 32
31/* 33/*
32 * Convert cputime to seconds and back. 34 * Convert cputime to seconds and back.
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a2a083..3c1063acb2ab 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -22,8 +22,14 @@ extern unsigned long __sw_hweight64(__u64 w);
22#include <asm/bitops.h> 22#include <asm/bitops.h>
23 23
24#define for_each_set_bit(bit, addr, size) \ 24#define for_each_set_bit(bit, addr, size) \
25 for ((bit) = find_first_bit((addr), (size)); \ 25 for ((bit) = find_first_bit((addr), (size)); \
26 (bit) < (size); \ 26 (bit) < (size); \
27 (bit) = find_next_bit((addr), (size), (bit) + 1))
28
29/* same as for_each_set_bit() but use bit as value to start with */
30#define for_each_set_bit_cont(bit, addr, size) \
31 for ((bit) = find_next_bit((addr), (size), (bit)); \
32 (bit) < (size); \
27 (bit) = find_next_bit((addr), (size), (bit) + 1)) 33 (bit) = find_next_bit((addr), (size), (bit) + 1))
28 34
29static __inline__ int get_bitmask_order(unsigned int count) 35static __inline__ int get_bitmask_order(unsigned int count)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7a6d3b5bc7b..94acd8172b5b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -805,9 +805,6 @@ extern void blk_unprep_request(struct request *);
805 */ 805 */
806extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, 806extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
807 spinlock_t *lock, int node_id); 807 spinlock_t *lock, int node_id);
808extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *,
809 request_fn_proc *,
810 spinlock_t *, int node_id);
811extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); 808extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
812extern struct request_queue *blk_init_allocated_queue(struct request_queue *, 809extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
813 request_fn_proc *, spinlock_t *); 810 request_fn_proc *, spinlock_t *);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index ab344a521105..66d3e954eb6c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -44,7 +44,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
44 unsigned long endpfn); 44 unsigned long endpfn);
45extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); 45extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
46 46
47unsigned long free_all_memory_core_early(int nodeid); 47extern unsigned long free_low_memory_core_early(int nodeid);
48extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); 48extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
49extern unsigned long free_all_bootmem(void); 49extern unsigned long free_all_bootmem(void);
50 50
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c86c940d1de3..081147da0564 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -71,7 +71,7 @@ struct timecounter {
71 71
72/** 72/**
73 * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds 73 * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds
74 * @tc: Pointer to cycle counter. 74 * @cc: Pointer to cycle counter.
75 * @cycles: Cycles 75 * @cycles: Cycles
76 * 76 *
77 * XXX - This could use some mult_lxl_ll() asm optimization. Same code 77 * XXX - This could use some mult_lxl_ll() asm optimization. Same code
@@ -114,7 +114,7 @@ extern u64 timecounter_read(struct timecounter *tc);
114 * time base as values returned by 114 * time base as values returned by
115 * timecounter_read() 115 * timecounter_read()
116 * @tc: Pointer to time counter. 116 * @tc: Pointer to time counter.
117 * @cycle: a value returned by tc->cc->read() 117 * @cycle_tstamp: a value returned by tc->cc->read()
118 * 118 *
119 * Cycle counts that are converted correctly as long as they 119 * Cycle counts that are converted correctly as long as they
120 * fall into the interval [-1/2 max cycle count, +1/2 max cycle count], 120 * fall into the interval [-1/2 max cycle count, +1/2 max cycle count],
@@ -156,11 +156,12 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
156 * @mult: cycle to nanosecond multiplier 156 * @mult: cycle to nanosecond multiplier
157 * @shift: cycle to nanosecond divisor (power of two) 157 * @shift: cycle to nanosecond divisor (power of two)
158 * @max_idle_ns: max idle time permitted by the clocksource (nsecs) 158 * @max_idle_ns: max idle time permitted by the clocksource (nsecs)
159 * @maxadj maximum adjustment value to mult (~11%) 159 * @maxadj: maximum adjustment value to mult (~11%)
160 * @flags: flags describing special properties 160 * @flags: flags describing special properties
161 * @archdata: arch-specific data 161 * @archdata: arch-specific data
162 * @suspend: suspend function for the clocksource, if necessary 162 * @suspend: suspend function for the clocksource, if necessary
163 * @resume: resume function for the clocksource, if necessary 163 * @resume: resume function for the clocksource, if necessary
164 * @cycle_last: most recent cycle counter value seen by ::read()
164 */ 165 */
165struct clocksource { 166struct clocksource {
166 /* 167 /*
@@ -187,6 +188,7 @@ struct clocksource {
187 void (*suspend)(struct clocksource *cs); 188 void (*suspend)(struct clocksource *cs);
188 void (*resume)(struct clocksource *cs); 189 void (*resume)(struct clocksource *cs);
189 190
191 /* private: */
190#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 192#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
191 /* Watchdog related data, used by the framework */ 193 /* Watchdog related data, used by the framework */
192 struct list_head wd_list; 194 struct list_head wd_list;
@@ -261,6 +263,9 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
261 263
262/** 264/**
263 * clocksource_cyc2ns - converts clocksource cycles to nanoseconds 265 * clocksource_cyc2ns - converts clocksource cycles to nanoseconds
266 * @cycles: cycles
267 * @mult: cycle to nanosecond multiplier
268 * @shift: cycle to nanosecond divisor (power of two)
264 * 269 *
265 * Converts cycles to nanoseconds, using the given mult and shift. 270 * Converts cycles to nanoseconds, using the given mult and shift.
266 * 271 *
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6cb60fd2ea84..305c263021e7 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -27,6 +27,7 @@ struct cpu {
27 27
28extern int register_cpu(struct cpu *cpu, int num); 28extern int register_cpu(struct cpu *cpu, int num);
29extern struct sys_device *get_cpu_sysdev(unsigned cpu); 29extern struct sys_device *get_cpu_sysdev(unsigned cpu);
30extern bool cpu_is_hotpluggable(unsigned cpu);
30 31
31extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr); 32extern int cpu_add_sysdev_attr(struct sysdev_attribute *attr);
32extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); 33extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr);
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 65970b811e22..0e5f5785d9f2 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -46,6 +46,8 @@ struct debug_obj {
46 * fails 46 * fails
47 * @fixup_free: fixup function, which is called when the free check 47 * @fixup_free: fixup function, which is called when the free check
48 * fails 48 * fails
49 * @fixup_assert_init: fixup function, which is called when the assert_init
50 * check fails
49 */ 51 */
50struct debug_obj_descr { 52struct debug_obj_descr {
51 const char *name; 53 const char *name;
@@ -54,6 +56,7 @@ struct debug_obj_descr {
54 int (*fixup_activate) (void *addr, enum debug_obj_state state); 56 int (*fixup_activate) (void *addr, enum debug_obj_state state);
55 int (*fixup_destroy) (void *addr, enum debug_obj_state state); 57 int (*fixup_destroy) (void *addr, enum debug_obj_state state);
56 int (*fixup_free) (void *addr, enum debug_obj_state state); 58 int (*fixup_free) (void *addr, enum debug_obj_state state);
59 int (*fixup_assert_init)(void *addr, enum debug_obj_state state);
57}; 60};
58 61
59#ifdef CONFIG_DEBUG_OBJECTS 62#ifdef CONFIG_DEBUG_OBJECTS
@@ -64,6 +67,7 @@ extern void debug_object_activate (void *addr, struct debug_obj_descr *descr);
64extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); 67extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr);
65extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); 68extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr);
66extern void debug_object_free (void *addr, struct debug_obj_descr *descr); 69extern void debug_object_free (void *addr, struct debug_obj_descr *descr);
70extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr);
67 71
68/* 72/*
69 * Active state: 73 * Active state:
@@ -89,6 +93,8 @@ static inline void
89debug_object_destroy (void *addr, struct debug_obj_descr *descr) { } 93debug_object_destroy (void *addr, struct debug_obj_descr *descr) { }
90static inline void 94static inline void
91debug_object_free (void *addr, struct debug_obj_descr *descr) { } 95debug_object_free (void *addr, struct debug_obj_descr *descr) { }
96static inline void
97debug_object_assert_init(void *addr, struct debug_obj_descr *descr) { }
92 98
93static inline void debug_objects_early_init(void) { } 99static inline void debug_objects_early_init(void) { }
94static inline void debug_objects_mem_init(void) { } 100static inline void debug_objects_mem_init(void) { }
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index ef90cbd8e173..57c9a8ae4f2d 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -31,6 +31,7 @@ extern void free_dmar_iommu(struct intel_iommu *iommu);
31extern int iommu_calculate_agaw(struct intel_iommu *iommu); 31extern int iommu_calculate_agaw(struct intel_iommu *iommu);
32extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); 32extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
33extern int dmar_disabled; 33extern int dmar_disabled;
34extern int intel_iommu_enabled;
34#else 35#else
35static inline int iommu_calculate_agaw(struct intel_iommu *iommu) 36static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
36{ 37{
@@ -44,6 +45,7 @@ static inline void free_dmar_iommu(struct intel_iommu *iommu)
44{ 45{
45} 46}
46#define dmar_disabled (1) 47#define dmar_disabled (1)
48#define intel_iommu_enabled (0)
47#endif 49#endif
48 50
49 51
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f743883f769e..bb7f30971858 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
139extern void account_system_vtime(struct task_struct *tsk); 139extern void account_system_vtime(struct task_struct *tsk);
140#endif 140#endif
141 141
142#if defined(CONFIG_NO_HZ)
143#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) 142#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
144extern void rcu_enter_nohz(void);
145extern void rcu_exit_nohz(void);
146
147static inline void rcu_irq_enter(void)
148{
149 rcu_exit_nohz();
150}
151
152static inline void rcu_irq_exit(void)
153{
154 rcu_enter_nohz();
155}
156 143
157static inline void rcu_nmi_enter(void) 144static inline void rcu_nmi_enter(void)
158{ 145{
@@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void)
163} 150}
164 151
165#else 152#else
166extern void rcu_irq_enter(void);
167extern void rcu_irq_exit(void);
168extern void rcu_nmi_enter(void); 153extern void rcu_nmi_enter(void);
169extern void rcu_nmi_exit(void); 154extern void rcu_nmi_exit(void);
170#endif 155#endif
171#else
172# define rcu_irq_enter() do { } while (0)
173# define rcu_irq_exit() do { } while (0)
174# define rcu_nmi_enter() do { } while (0)
175# define rcu_nmi_exit() do { } while (0)
176#endif /* #if defined(CONFIG_NO_HZ) */
177 156
178/* 157/*
179 * It is safe to do non-atomic ops on ->hardirq_context, 158 * It is safe to do non-atomic ops on ->hardirq_context,
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 388b0d425b50..5ce8b140428f 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/compiler.h> 5#include <linux/compiler.h>
6#include <linux/workqueue.h>
6 7
7#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) 8#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
8 9
@@ -14,6 +15,12 @@ struct jump_label_key {
14#endif 15#endif
15}; 16};
16 17
18struct jump_label_key_deferred {
19 struct jump_label_key key;
20 unsigned long timeout;
21 struct delayed_work work;
22};
23
17# include <asm/jump_label.h> 24# include <asm/jump_label.h>
18# define HAVE_JUMP_LABEL 25# define HAVE_JUMP_LABEL
19#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ 26#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
@@ -51,8 +58,11 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry,
51extern int jump_label_text_reserved(void *start, void *end); 58extern int jump_label_text_reserved(void *start, void *end);
52extern void jump_label_inc(struct jump_label_key *key); 59extern void jump_label_inc(struct jump_label_key *key);
53extern void jump_label_dec(struct jump_label_key *key); 60extern void jump_label_dec(struct jump_label_key *key);
61extern void jump_label_dec_deferred(struct jump_label_key_deferred *key);
54extern bool jump_label_enabled(struct jump_label_key *key); 62extern bool jump_label_enabled(struct jump_label_key *key);
55extern void jump_label_apply_nops(struct module *mod); 63extern void jump_label_apply_nops(struct module *mod);
64extern void jump_label_rate_limit(struct jump_label_key_deferred *key,
65 unsigned long rl);
56 66
57#else /* !HAVE_JUMP_LABEL */ 67#else /* !HAVE_JUMP_LABEL */
58 68
@@ -68,6 +78,10 @@ static __always_inline void jump_label_init(void)
68{ 78{
69} 79}
70 80
81struct jump_label_key_deferred {
82 struct jump_label_key key;
83};
84
71static __always_inline bool static_branch(struct jump_label_key *key) 85static __always_inline bool static_branch(struct jump_label_key *key)
72{ 86{
73 if (unlikely(atomic_read(&key->enabled))) 87 if (unlikely(atomic_read(&key->enabled)))
@@ -85,6 +99,11 @@ static inline void jump_label_dec(struct jump_label_key *key)
85 atomic_dec(&key->enabled); 99 atomic_dec(&key->enabled);
86} 100}
87 101
102static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key)
103{
104 jump_label_dec(&key->key);
105}
106
88static inline int jump_label_text_reserved(void *start, void *end) 107static inline int jump_label_text_reserved(void *start, void *end)
89{ 108{
90 return 0; 109 return 0;
@@ -102,6 +121,14 @@ static inline int jump_label_apply_nops(struct module *mod)
102{ 121{
103 return 0; 122 return 0;
104} 123}
124
125static inline void jump_label_rate_limit(struct jump_label_key_deferred *key,
126 unsigned long rl)
127{
128}
105#endif /* HAVE_JUMP_LABEL */ 129#endif /* HAVE_JUMP_LABEL */
106 130
131#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), })
132#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), })
133
107#endif /* _LINUX_JUMP_LABEL_H */ 134#endif /* _LINUX_JUMP_LABEL_H */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c3892fc1d538..68e67e50d028 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -557,6 +557,7 @@ struct kvm_ppc_pvinfo {
557#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ 557#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
558#define KVM_CAP_PPC_PAPR 68 558#define KVM_CAP_PPC_PAPR 68
559#define KVM_CAP_S390_GMAP 71 559#define KVM_CAP_S390_GMAP 71
560#define KVM_CAP_TSC_DEADLINE_TIMER 72
560 561
561#ifdef KVM_CAP_IRQ_ROUTING 562#ifdef KVM_CAP_IRQ_ROUTING
562 563
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index f549056fb20b..87f402ccec55 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -22,6 +22,7 @@
22#include <linux/spinlock.h> 22#include <linux/spinlock.h>
23#include <linux/lockdep.h> 23#include <linux/lockdep.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/cpu.h>
25 26
26/* can make br locks by using local lock for read side, global lock for write */ 27/* can make br locks by using local lock for read side, global lock for write */
27#define br_lock_init(name) name##_lock_init() 28#define br_lock_init(name) name##_lock_init()
@@ -72,9 +73,31 @@
72 73
73#define DEFINE_LGLOCK(name) \ 74#define DEFINE_LGLOCK(name) \
74 \ 75 \
76 DEFINE_SPINLOCK(name##_cpu_lock); \
77 cpumask_t name##_cpus __read_mostly; \
75 DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ 78 DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
76 DEFINE_LGLOCK_LOCKDEP(name); \ 79 DEFINE_LGLOCK_LOCKDEP(name); \
77 \ 80 \
81 static int \
82 name##_lg_cpu_callback(struct notifier_block *nb, \
83 unsigned long action, void *hcpu) \
84 { \
85 switch (action & ~CPU_TASKS_FROZEN) { \
86 case CPU_UP_PREPARE: \
87 spin_lock(&name##_cpu_lock); \
88 cpu_set((unsigned long)hcpu, name##_cpus); \
89 spin_unlock(&name##_cpu_lock); \
90 break; \
91 case CPU_UP_CANCELED: case CPU_DEAD: \
92 spin_lock(&name##_cpu_lock); \
93 cpu_clear((unsigned long)hcpu, name##_cpus); \
94 spin_unlock(&name##_cpu_lock); \
95 } \
96 return NOTIFY_OK; \
97 } \
98 static struct notifier_block name##_lg_cpu_notifier = { \
99 .notifier_call = name##_lg_cpu_callback, \
100 }; \
78 void name##_lock_init(void) { \ 101 void name##_lock_init(void) { \
79 int i; \ 102 int i; \
80 LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ 103 LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
@@ -83,6 +106,11 @@
83 lock = &per_cpu(name##_lock, i); \ 106 lock = &per_cpu(name##_lock, i); \
84 *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ 107 *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \
85 } \ 108 } \
109 register_hotcpu_notifier(&name##_lg_cpu_notifier); \
110 get_online_cpus(); \
111 for_each_online_cpu(i) \
112 cpu_set(i, name##_cpus); \
113 put_online_cpus(); \
86 } \ 114 } \
87 EXPORT_SYMBOL(name##_lock_init); \ 115 EXPORT_SYMBOL(name##_lock_init); \
88 \ 116 \
@@ -124,9 +152,9 @@
124 \ 152 \
125 void name##_global_lock_online(void) { \ 153 void name##_global_lock_online(void) { \
126 int i; \ 154 int i; \
127 preempt_disable(); \ 155 spin_lock(&name##_cpu_lock); \
128 rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ 156 rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
129 for_each_online_cpu(i) { \ 157 for_each_cpu(i, &name##_cpus) { \
130 arch_spinlock_t *lock; \ 158 arch_spinlock_t *lock; \
131 lock = &per_cpu(name##_lock, i); \ 159 lock = &per_cpu(name##_lock, i); \
132 arch_spin_lock(lock); \ 160 arch_spin_lock(lock); \
@@ -137,12 +165,12 @@
137 void name##_global_unlock_online(void) { \ 165 void name##_global_unlock_online(void) { \
138 int i; \ 166 int i; \
139 rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ 167 rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
140 for_each_online_cpu(i) { \ 168 for_each_cpu(i, &name##_cpus) { \
141 arch_spinlock_t *lock; \ 169 arch_spinlock_t *lock; \
142 lock = &per_cpu(name##_lock, i); \ 170 lock = &per_cpu(name##_lock, i); \
143 arch_spin_unlock(lock); \ 171 arch_spin_unlock(lock); \
144 } \ 172 } \
145 preempt_enable(); \ 173 spin_unlock(&name##_cpu_lock); \
146 } \ 174 } \
147 EXPORT_SYMBOL(name##_global_unlock_online); \ 175 EXPORT_SYMBOL(name##_global_unlock_online); \
148 \ 176 \
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b6a56e37284c..d36619ead3ba 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -343,6 +343,8 @@ extern void lockdep_trace_alloc(gfp_t mask);
343 343
344#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l)) 344#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l))
345 345
346#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
347
346#else /* !LOCKDEP */ 348#else /* !LOCKDEP */
347 349
348static inline void lockdep_off(void) 350static inline void lockdep_off(void)
@@ -392,6 +394,8 @@ struct lock_class_key { };
392 394
393#define lockdep_assert_held(l) do { } while (0) 395#define lockdep_assert_held(l) do { } while (0)
394 396
397#define lockdep_recursing(tsk) (0)
398
395#endif /* !LOCKDEP */ 399#endif /* !LOCKDEP */
396 400
397#ifdef CONFIG_LOCK_STAT 401#ifdef CONFIG_LOCK_STAT
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e6b843e16e81..a6bb10235148 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,8 +2,6 @@
2#define _LINUX_MEMBLOCK_H 2#define _LINUX_MEMBLOCK_H
3#ifdef __KERNEL__ 3#ifdef __KERNEL__
4 4
5#define MEMBLOCK_ERROR 0
6
7#ifdef CONFIG_HAVE_MEMBLOCK 5#ifdef CONFIG_HAVE_MEMBLOCK
8/* 6/*
9 * Logical memory blocks. 7 * Logical memory blocks.
@@ -19,81 +17,161 @@
19#include <linux/init.h> 17#include <linux/init.h>
20#include <linux/mm.h> 18#include <linux/mm.h>
21 19
22#include <asm/memblock.h>
23
24#define INIT_MEMBLOCK_REGIONS 128 20#define INIT_MEMBLOCK_REGIONS 128
25 21
26struct memblock_region { 22struct memblock_region {
27 phys_addr_t base; 23 phys_addr_t base;
28 phys_addr_t size; 24 phys_addr_t size;
25#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
26 int nid;
27#endif
29}; 28};
30 29
31struct memblock_type { 30struct memblock_type {
32 unsigned long cnt; /* number of regions */ 31 unsigned long cnt; /* number of regions */
33 unsigned long max; /* size of the allocated array */ 32 unsigned long max; /* size of the allocated array */
33 phys_addr_t total_size; /* size of all regions */
34 struct memblock_region *regions; 34 struct memblock_region *regions;
35}; 35};
36 36
37struct memblock { 37struct memblock {
38 phys_addr_t current_limit; 38 phys_addr_t current_limit;
39 phys_addr_t memory_size; /* Updated by memblock_analyze() */
40 struct memblock_type memory; 39 struct memblock_type memory;
41 struct memblock_type reserved; 40 struct memblock_type reserved;
42}; 41};
43 42
44extern struct memblock memblock; 43extern struct memblock memblock;
45extern int memblock_debug; 44extern int memblock_debug;
46extern int memblock_can_resize;
47 45
48#define memblock_dbg(fmt, ...) \ 46#define memblock_dbg(fmt, ...) \
49 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) 47 if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
50 48
51u64 memblock_find_in_range(u64 start, u64 end, u64 size, u64 align); 49phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end,
50 phys_addr_t size, phys_addr_t align, int nid);
51phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
52 phys_addr_t size, phys_addr_t align);
52int memblock_free_reserved_regions(void); 53int memblock_free_reserved_regions(void);
53int memblock_reserve_reserved_regions(void); 54int memblock_reserve_reserved_regions(void);
54 55
55extern void memblock_init(void); 56void memblock_allow_resize(void);
56extern void memblock_analyze(void); 57int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
57extern long memblock_add(phys_addr_t base, phys_addr_t size); 58int memblock_add(phys_addr_t base, phys_addr_t size);
58extern long memblock_remove(phys_addr_t base, phys_addr_t size); 59int memblock_remove(phys_addr_t base, phys_addr_t size);
59extern long memblock_free(phys_addr_t base, phys_addr_t size); 60int memblock_free(phys_addr_t base, phys_addr_t size);
60extern long memblock_reserve(phys_addr_t base, phys_addr_t size); 61int memblock_reserve(phys_addr_t base, phys_addr_t size);
62
63#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
64void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
65 unsigned long *out_end_pfn, int *out_nid);
66
67/**
68 * for_each_mem_pfn_range - early memory pfn range iterator
69 * @i: an integer used as loop variable
70 * @nid: node selector, %MAX_NUMNODES for all nodes
71 * @p_start: ptr to ulong for start pfn of the range, can be %NULL
72 * @p_end: ptr to ulong for end pfn of the range, can be %NULL
73 * @p_nid: ptr to int for nid of the range, can be %NULL
74 *
75 * Walks over configured memory ranges. Available after early_node_map is
76 * populated.
77 */
78#define for_each_mem_pfn_range(i, nid, p_start, p_end, p_nid) \
79 for (i = -1, __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid); \
80 i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
81#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
82
83void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
84 phys_addr_t *out_end, int *out_nid);
85
86/**
87 * for_each_free_mem_range - iterate through free memblock areas
88 * @i: u64 used as loop variable
89 * @nid: node selector, %MAX_NUMNODES for all nodes
90 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
91 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
92 * @p_nid: ptr to int for nid of the range, can be %NULL
93 *
94 * Walks over free (memory && !reserved) areas of memblock. Available as
95 * soon as memblock is initialized.
96 */
97#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \
98 for (i = 0, \
99 __next_free_mem_range(&i, nid, p_start, p_end, p_nid); \
100 i != (u64)ULLONG_MAX; \
101 __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
102
103void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
104 phys_addr_t *out_end, int *out_nid);
61 105
62/* The numa aware allocator is only available if 106/**
63 * CONFIG_ARCH_POPULATES_NODE_MAP is set 107 * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
108 * @i: u64 used as loop variable
109 * @nid: node selector, %MAX_NUMNODES for all nodes
110 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
111 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
112 * @p_nid: ptr to int for nid of the range, can be %NULL
113 *
114 * Walks over free (memory && !reserved) areas of memblock in reverse
115 * order. Available as soon as memblock is initialized.
64 */ 116 */
65extern phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, 117#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \
66 int nid); 118 for (i = (u64)ULLONG_MAX, \
67extern phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, 119 __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid); \
68 int nid); 120 i != (u64)ULLONG_MAX; \
121 __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
69 122
70extern phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align); 123#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
124int memblock_set_node(phys_addr_t base, phys_addr_t size, int nid);
125
126static inline void memblock_set_region_node(struct memblock_region *r, int nid)
127{
128 r->nid = nid;
129}
130
131static inline int memblock_get_region_node(const struct memblock_region *r)
132{
133 return r->nid;
134}
135#else
136static inline void memblock_set_region_node(struct memblock_region *r, int nid)
137{
138}
139
140static inline int memblock_get_region_node(const struct memblock_region *r)
141{
142 return 0;
143}
144#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
145
146phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
147phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
148
149phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
71 150
72/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */ 151/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
73#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) 152#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
74#define MEMBLOCK_ALLOC_ACCESSIBLE 0 153#define MEMBLOCK_ALLOC_ACCESSIBLE 0
75 154
76extern phys_addr_t memblock_alloc_base(phys_addr_t size, 155phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
77 phys_addr_t align, 156 phys_addr_t max_addr);
78 phys_addr_t max_addr); 157phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
79extern phys_addr_t __memblock_alloc_base(phys_addr_t size, 158 phys_addr_t max_addr);
80 phys_addr_t align, 159phys_addr_t memblock_phys_mem_size(void);
81 phys_addr_t max_addr); 160phys_addr_t memblock_start_of_DRAM(void);
82extern phys_addr_t memblock_phys_mem_size(void); 161phys_addr_t memblock_end_of_DRAM(void);
83extern phys_addr_t memblock_start_of_DRAM(void); 162void memblock_enforce_memory_limit(phys_addr_t memory_limit);
84extern phys_addr_t memblock_end_of_DRAM(void); 163int memblock_is_memory(phys_addr_t addr);
85extern void memblock_enforce_memory_limit(phys_addr_t memory_limit); 164int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
86extern int memblock_is_memory(phys_addr_t addr); 165int memblock_is_reserved(phys_addr_t addr);
87extern int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); 166int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
88extern int memblock_is_reserved(phys_addr_t addr); 167
89extern int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); 168extern void __memblock_dump_all(void);
90 169
91extern void memblock_dump_all(void); 170static inline void memblock_dump_all(void)
92 171{
93/* Provided by the architecture */ 172 if (memblock_debug)
94extern phys_addr_t memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid); 173 __memblock_dump_all();
95extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, 174}
96 phys_addr_t addr2, phys_addr_t size2);
97 175
98/** 176/**
99 * memblock_set_current_limit - Set the current allocation limit to allow 177 * memblock_set_current_limit - Set the current allocation limit to allow
@@ -101,7 +179,7 @@ extern int memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
101 * accessible during boot 179 * accessible during boot
102 * @limit: New limit value (physical address) 180 * @limit: New limit value (physical address)
103 */ 181 */
104extern void memblock_set_current_limit(phys_addr_t limit); 182void memblock_set_current_limit(phys_addr_t limit);
105 183
106 184
107/* 185/*
@@ -154,9 +232,9 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
154 region++) 232 region++)
155 233
156 234
157#ifdef ARCH_DISCARD_MEMBLOCK 235#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
158#define __init_memblock __init 236#define __init_memblock __meminit
159#define __initdata_memblock __initdata 237#define __initdata_memblock __meminitdata
160#else 238#else
161#define __init_memblock 239#define __init_memblock
162#define __initdata_memblock 240#define __initdata_memblock
@@ -165,7 +243,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
165#else 243#else
166static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) 244static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
167{ 245{
168 return MEMBLOCK_ERROR; 246 return 0;
169} 247}
170 248
171#endif /* CONFIG_HAVE_MEMBLOCK */ 249#endif /* CONFIG_HAVE_MEMBLOCK */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4baadd18f4ad..5d9b4c9813bd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1253,41 +1253,34 @@ static inline void pgtable_page_dtor(struct page *page)
1253extern void free_area_init(unsigned long * zones_size); 1253extern void free_area_init(unsigned long * zones_size);
1254extern void free_area_init_node(int nid, unsigned long * zones_size, 1254extern void free_area_init_node(int nid, unsigned long * zones_size,
1255 unsigned long zone_start_pfn, unsigned long *zholes_size); 1255 unsigned long zone_start_pfn, unsigned long *zholes_size);
1256#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 1256#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
1257/* 1257/*
1258 * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its 1258 * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
1259 * zones, allocate the backing mem_map and account for memory holes in a more 1259 * zones, allocate the backing mem_map and account for memory holes in a more
1260 * architecture independent manner. This is a substitute for creating the 1260 * architecture independent manner. This is a substitute for creating the
1261 * zone_sizes[] and zholes_size[] arrays and passing them to 1261 * zone_sizes[] and zholes_size[] arrays and passing them to
1262 * free_area_init_node() 1262 * free_area_init_node()
1263 * 1263 *
1264 * An architecture is expected to register range of page frames backed by 1264 * An architecture is expected to register range of page frames backed by
1265 * physical memory with add_active_range() before calling 1265 * physical memory with memblock_add[_node]() before calling
1266 * free_area_init_nodes() passing in the PFN each zone ends at. At a basic 1266 * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
1267 * usage, an architecture is expected to do something like 1267 * usage, an architecture is expected to do something like
1268 * 1268 *
1269 * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, 1269 * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
1270 * max_highmem_pfn}; 1270 * max_highmem_pfn};
1271 * for_each_valid_physical_page_range() 1271 * for_each_valid_physical_page_range()
1272 * add_active_range(node_id, start_pfn, end_pfn) 1272 * memblock_add_node(base, size, nid)
1273 * free_area_init_nodes(max_zone_pfns); 1273 * free_area_init_nodes(max_zone_pfns);
1274 * 1274 *
1275 * If the architecture guarantees that there are no holes in the ranges 1275 * free_bootmem_with_active_regions() calls free_bootmem_node() for each
1276 * registered with add_active_range(), free_bootmem_active_regions() 1276 * registered physical page range. Similarly
1277 * will call free_bootmem_node() for each registered physical page range. 1277 * sparse_memory_present_with_active_regions() calls memory_present() for
1278 * Similarly sparse_memory_present_with_active_regions() calls 1278 * each range when SPARSEMEM is enabled.
1279 * memory_present() for each range when SPARSEMEM is enabled.
1280 * 1279 *
1281 * See mm/page_alloc.c for more information on each function exposed by 1280 * See mm/page_alloc.c for more information on each function exposed by
1282 * CONFIG_ARCH_POPULATES_NODE_MAP 1281 * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
1283 */ 1282 */
1284extern void free_area_init_nodes(unsigned long *max_zone_pfn); 1283extern void free_area_init_nodes(unsigned long *max_zone_pfn);
1285extern void add_active_range(unsigned int nid, unsigned long start_pfn,
1286 unsigned long end_pfn);
1287extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
1288 unsigned long end_pfn);
1289extern void remove_all_active_ranges(void);
1290void sort_node_map(void);
1291unsigned long node_map_pfn_alignment(void); 1284unsigned long node_map_pfn_alignment(void);
1292unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, 1285unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
1293 unsigned long end_pfn); 1286 unsigned long end_pfn);
@@ -1300,14 +1293,11 @@ extern void free_bootmem_with_active_regions(int nid,
1300 unsigned long max_low_pfn); 1293 unsigned long max_low_pfn);
1301int add_from_early_node_map(struct range *range, int az, 1294int add_from_early_node_map(struct range *range, int az,
1302 int nr_range, int nid); 1295 int nr_range, int nid);
1303u64 __init find_memory_core_early(int nid, u64 size, u64 align,
1304 u64 goal, u64 limit);
1305typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1306extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1307extern void sparse_memory_present_with_active_regions(int nid); 1296extern void sparse_memory_present_with_active_regions(int nid);
1308#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
1309 1297
1310#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ 1298#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
1299
1300#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
1311 !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) 1301 !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
1312static inline int __early_pfn_to_nid(unsigned long pfn) 1302static inline int __early_pfn_to_nid(unsigned long pfn)
1313{ 1303{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 188cb2ffe8db..3ac040f19369 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -598,13 +598,13 @@ struct zonelist {
598#endif 598#endif
599}; 599};
600 600
601#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 601#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
602struct node_active_region { 602struct node_active_region {
603 unsigned long start_pfn; 603 unsigned long start_pfn;
604 unsigned long end_pfn; 604 unsigned long end_pfn;
605 int nid; 605 int nid;
606}; 606};
607#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 607#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
608 608
609#ifndef CONFIG_DISCONTIGMEM 609#ifndef CONFIG_DISCONTIGMEM
610/* The array of struct pages - for discontigmem use pgdat->lmem_map */ 610/* The array of struct pages - for discontigmem use pgdat->lmem_map */
@@ -720,7 +720,7 @@ extern int movable_zone;
720 720
721static inline int zone_movable_is_highmem(void) 721static inline int zone_movable_is_highmem(void)
722{ 722{
723#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP) 723#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE)
724 return movable_zone == ZONE_HIGHMEM; 724 return movable_zone == ZONE_HIGHMEM;
725#else 725#else
726 return 0; 726 return 0;
@@ -938,7 +938,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
938#endif 938#endif
939 939
940#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ 940#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
941 !defined(CONFIG_ARCH_POPULATES_NODE_MAP) 941 !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
942static inline unsigned long early_pfn_to_nid(unsigned long pfn) 942static inline unsigned long early_pfn_to_nid(unsigned long pfn)
943{ 943{
944 return 0; 944 return 0;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b1f89122bf6a..08855613ceb3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -54,6 +54,7 @@ enum perf_hw_id {
54 PERF_COUNT_HW_BUS_CYCLES = 6, 54 PERF_COUNT_HW_BUS_CYCLES = 6,
55 PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, 55 PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
56 PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, 56 PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
57 PERF_COUNT_HW_REF_CPU_CYCLES = 9,
57 58
58 PERF_COUNT_HW_MAX, /* non-ABI */ 59 PERF_COUNT_HW_MAX, /* non-ABI */
59}; 60};
@@ -890,6 +891,7 @@ struct perf_event_context {
890 int nr_active; 891 int nr_active;
891 int is_active; 892 int is_active;
892 int nr_stat; 893 int nr_stat;
894 int nr_freq;
893 int rotate_disable; 895 int rotate_disable;
894 atomic_t refcount; 896 atomic_t refcount;
895 struct task_struct *task; 897 struct task_struct *task;
@@ -1063,12 +1065,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1063 } 1065 }
1064} 1066}
1065 1067
1066extern struct jump_label_key perf_sched_events; 1068extern struct jump_label_key_deferred perf_sched_events;
1067 1069
1068static inline void perf_event_task_sched_in(struct task_struct *prev, 1070static inline void perf_event_task_sched_in(struct task_struct *prev,
1069 struct task_struct *task) 1071 struct task_struct *task)
1070{ 1072{
1071 if (static_branch(&perf_sched_events)) 1073 if (static_branch(&perf_sched_events.key))
1072 __perf_event_task_sched_in(prev, task); 1074 __perf_event_task_sched_in(prev, task);
1073} 1075}
1074 1076
@@ -1077,7 +1079,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
1077{ 1079{
1078 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); 1080 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
1079 1081
1080 if (static_branch(&perf_sched_events)) 1082 if (static_branch(&perf_sched_events.key))
1081 __perf_event_task_sched_out(prev, next); 1083 __perf_event_task_sched_out(prev, next);
1082} 1084}
1083 1085
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 79159de0e341..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -40,12 +40,6 @@
40#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ 40#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
41#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ 41#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
42 42
43#ifdef CONFIG_PHYS_ADDR_T_64BIT
44#define MEMBLOCK_INACTIVE 0x3a84fb0144c9e71bULL
45#else
46#define MEMBLOCK_INACTIVE 0x44c9e71bUL
47#endif
48
49#define SLUB_RED_INACTIVE 0xbb 43#define SLUB_RED_INACTIVE 0xbb
50#define SLUB_RED_ACTIVE 0xcc 44#define SLUB_RED_ACTIVE 0xcc
51 45
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2cf4226ade7e..81c04f4348ec 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,6 +51,8 @@ extern int rcutorture_runnable; /* for sysctl */
51#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 51#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
52extern void rcutorture_record_test_transition(void); 52extern void rcutorture_record_test_transition(void);
53extern void rcutorture_record_progress(unsigned long vernum); 53extern void rcutorture_record_progress(unsigned long vernum);
54extern void do_trace_rcu_torture_read(char *rcutorturename,
55 struct rcu_head *rhp);
54#else 56#else
55static inline void rcutorture_record_test_transition(void) 57static inline void rcutorture_record_test_transition(void)
56{ 58{
@@ -58,6 +60,12 @@ static inline void rcutorture_record_test_transition(void)
58static inline void rcutorture_record_progress(unsigned long vernum) 60static inline void rcutorture_record_progress(unsigned long vernum)
59{ 61{
60} 62}
63#ifdef CONFIG_RCU_TRACE
64extern void do_trace_rcu_torture_read(char *rcutorturename,
65 struct rcu_head *rhp);
66#else
67#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
68#endif
61#endif 69#endif
62 70
63#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) 71#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
@@ -177,23 +185,10 @@ extern void rcu_sched_qs(int cpu);
177extern void rcu_bh_qs(int cpu); 185extern void rcu_bh_qs(int cpu);
178extern void rcu_check_callbacks(int cpu, int user); 186extern void rcu_check_callbacks(int cpu, int user);
179struct notifier_block; 187struct notifier_block;
180 188extern void rcu_idle_enter(void);
181#ifdef CONFIG_NO_HZ 189extern void rcu_idle_exit(void);
182 190extern void rcu_irq_enter(void);
183extern void rcu_enter_nohz(void); 191extern void rcu_irq_exit(void);
184extern void rcu_exit_nohz(void);
185
186#else /* #ifdef CONFIG_NO_HZ */
187
188static inline void rcu_enter_nohz(void)
189{
190}
191
192static inline void rcu_exit_nohz(void)
193{
194}
195
196#endif /* #else #ifdef CONFIG_NO_HZ */
197 192
198/* 193/*
199 * Infrastructure to implement the synchronize_() primitives in 194 * Infrastructure to implement the synchronize_() primitives in
@@ -233,22 +228,30 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
233 228
234#ifdef CONFIG_DEBUG_LOCK_ALLOC 229#ifdef CONFIG_DEBUG_LOCK_ALLOC
235 230
236extern struct lockdep_map rcu_lock_map; 231#ifdef CONFIG_PROVE_RCU
237# define rcu_read_acquire() \ 232extern int rcu_is_cpu_idle(void);
238 lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) 233#else /* !CONFIG_PROVE_RCU */
239# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) 234static inline int rcu_is_cpu_idle(void)
235{
236 return 0;
237}
238#endif /* else !CONFIG_PROVE_RCU */
240 239
241extern struct lockdep_map rcu_bh_lock_map; 240static inline void rcu_lock_acquire(struct lockdep_map *map)
242# define rcu_read_acquire_bh() \ 241{
243 lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) 242 WARN_ON_ONCE(rcu_is_cpu_idle());
244# define rcu_read_release_bh() lock_release(&rcu_bh_lock_map, 1, _THIS_IP_) 243 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
244}
245 245
246extern struct lockdep_map rcu_sched_lock_map; 246static inline void rcu_lock_release(struct lockdep_map *map)
247# define rcu_read_acquire_sched() \ 247{
248 lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) 248 WARN_ON_ONCE(rcu_is_cpu_idle());
249# define rcu_read_release_sched() \ 249 lock_release(map, 1, _THIS_IP_);
250 lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) 250}
251 251
252extern struct lockdep_map rcu_lock_map;
253extern struct lockdep_map rcu_bh_lock_map;
254extern struct lockdep_map rcu_sched_lock_map;
252extern int debug_lockdep_rcu_enabled(void); 255extern int debug_lockdep_rcu_enabled(void);
253 256
254/** 257/**
@@ -262,11 +265,18 @@ extern int debug_lockdep_rcu_enabled(void);
262 * 265 *
263 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 266 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
264 * and while lockdep is disabled. 267 * and while lockdep is disabled.
268 *
269 * Note that rcu_read_lock() and the matching rcu_read_unlock() must
270 * occur in the same context, for example, it is illegal to invoke
271 * rcu_read_unlock() in process context if the matching rcu_read_lock()
272 * was invoked from within an irq handler.
265 */ 273 */
266static inline int rcu_read_lock_held(void) 274static inline int rcu_read_lock_held(void)
267{ 275{
268 if (!debug_lockdep_rcu_enabled()) 276 if (!debug_lockdep_rcu_enabled())
269 return 1; 277 return 1;
278 if (rcu_is_cpu_idle())
279 return 0;
270 return lock_is_held(&rcu_lock_map); 280 return lock_is_held(&rcu_lock_map);
271} 281}
272 282
@@ -290,6 +300,19 @@ extern int rcu_read_lock_bh_held(void);
290 * 300 *
291 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 301 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
292 * and while lockdep is disabled. 302 * and while lockdep is disabled.
303 *
304 * Note that if the CPU is in the idle loop from an RCU point of
305 * view (ie: that we are in the section between rcu_idle_enter() and
306 * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
307 * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
308 * that are in such a section, considering these as in extended quiescent
309 * state, so such a CPU is effectively never in an RCU read-side critical
310 * section regardless of what RCU primitives it invokes. This state of
311 * affairs is required --- we need to keep an RCU-free window in idle
312 * where the CPU may possibly enter into low power mode. This way we can
313 * notice an extended quiescent state to other CPUs that started a grace
314 * period. Otherwise we would delay any grace period as long as we run in
315 * the idle task.
293 */ 316 */
294#ifdef CONFIG_PREEMPT_COUNT 317#ifdef CONFIG_PREEMPT_COUNT
295static inline int rcu_read_lock_sched_held(void) 318static inline int rcu_read_lock_sched_held(void)
@@ -298,6 +321,8 @@ static inline int rcu_read_lock_sched_held(void)
298 321
299 if (!debug_lockdep_rcu_enabled()) 322 if (!debug_lockdep_rcu_enabled())
300 return 1; 323 return 1;
324 if (rcu_is_cpu_idle())
325 return 0;
301 if (debug_locks) 326 if (debug_locks)
302 lockdep_opinion = lock_is_held(&rcu_sched_lock_map); 327 lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
303 return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); 328 return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
@@ -311,12 +336,8 @@ static inline int rcu_read_lock_sched_held(void)
311 336
312#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 337#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
313 338
314# define rcu_read_acquire() do { } while (0) 339# define rcu_lock_acquire(a) do { } while (0)
315# define rcu_read_release() do { } while (0) 340# define rcu_lock_release(a) do { } while (0)
316# define rcu_read_acquire_bh() do { } while (0)
317# define rcu_read_release_bh() do { } while (0)
318# define rcu_read_acquire_sched() do { } while (0)
319# define rcu_read_release_sched() do { } while (0)
320 341
321static inline int rcu_read_lock_held(void) 342static inline int rcu_read_lock_held(void)
322{ 343{
@@ -637,7 +658,7 @@ static inline void rcu_read_lock(void)
637{ 658{
638 __rcu_read_lock(); 659 __rcu_read_lock();
639 __acquire(RCU); 660 __acquire(RCU);
640 rcu_read_acquire(); 661 rcu_lock_acquire(&rcu_lock_map);
641} 662}
642 663
643/* 664/*
@@ -657,7 +678,7 @@ static inline void rcu_read_lock(void)
657 */ 678 */
658static inline void rcu_read_unlock(void) 679static inline void rcu_read_unlock(void)
659{ 680{
660 rcu_read_release(); 681 rcu_lock_release(&rcu_lock_map);
661 __release(RCU); 682 __release(RCU);
662 __rcu_read_unlock(); 683 __rcu_read_unlock();
663} 684}
@@ -673,12 +694,17 @@ static inline void rcu_read_unlock(void)
673 * critical sections in interrupt context can use just rcu_read_lock(), 694 * critical sections in interrupt context can use just rcu_read_lock(),
674 * though this should at least be commented to avoid confusing people 695 * though this should at least be commented to avoid confusing people
675 * reading the code. 696 * reading the code.
697 *
698 * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
699 * must occur in the same context, for example, it is illegal to invoke
700 * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
701 * was invoked from some other task.
676 */ 702 */
677static inline void rcu_read_lock_bh(void) 703static inline void rcu_read_lock_bh(void)
678{ 704{
679 local_bh_disable(); 705 local_bh_disable();
680 __acquire(RCU_BH); 706 __acquire(RCU_BH);
681 rcu_read_acquire_bh(); 707 rcu_lock_acquire(&rcu_bh_lock_map);
682} 708}
683 709
684/* 710/*
@@ -688,7 +714,7 @@ static inline void rcu_read_lock_bh(void)
688 */ 714 */
689static inline void rcu_read_unlock_bh(void) 715static inline void rcu_read_unlock_bh(void)
690{ 716{
691 rcu_read_release_bh(); 717 rcu_lock_release(&rcu_bh_lock_map);
692 __release(RCU_BH); 718 __release(RCU_BH);
693 local_bh_enable(); 719 local_bh_enable();
694} 720}
@@ -700,12 +726,17 @@ static inline void rcu_read_unlock_bh(void)
700 * are being done using call_rcu_sched() or synchronize_rcu_sched(). 726 * are being done using call_rcu_sched() or synchronize_rcu_sched().
701 * Read-side critical sections can also be introduced by anything that 727 * Read-side critical sections can also be introduced by anything that
702 * disables preemption, including local_irq_disable() and friends. 728 * disables preemption, including local_irq_disable() and friends.
729 *
730 * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
731 * must occur in the same context, for example, it is illegal to invoke
732 * rcu_read_unlock_sched() from process context if the matching
733 * rcu_read_lock_sched() was invoked from an NMI handler.
703 */ 734 */
704static inline void rcu_read_lock_sched(void) 735static inline void rcu_read_lock_sched(void)
705{ 736{
706 preempt_disable(); 737 preempt_disable();
707 __acquire(RCU_SCHED); 738 __acquire(RCU_SCHED);
708 rcu_read_acquire_sched(); 739 rcu_lock_acquire(&rcu_sched_lock_map);
709} 740}
710 741
711/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 742/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -722,7 +753,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
722 */ 753 */
723static inline void rcu_read_unlock_sched(void) 754static inline void rcu_read_unlock_sched(void)
724{ 755{
725 rcu_read_release_sched(); 756 rcu_lock_release(&rcu_sched_lock_map);
726 __release(RCU_SCHED); 757 __release(RCU_SCHED);
727 preempt_enable(); 758 preempt_enable();
728} 759}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a2ab3c2757d..cf0eb342bcba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2085,6 +2085,14 @@ extern int sched_setscheduler(struct task_struct *, int,
2085extern int sched_setscheduler_nocheck(struct task_struct *, int, 2085extern int sched_setscheduler_nocheck(struct task_struct *, int,
2086 const struct sched_param *); 2086 const struct sched_param *);
2087extern struct task_struct *idle_task(int cpu); 2087extern struct task_struct *idle_task(int cpu);
2088/**
2089 * is_idle_task - is the specified task an idle task?
2090 * @tsk: the task in question.
2091 */
2092static inline bool is_idle_task(struct task_struct *p)
2093{
2094 return p->pid == 0;
2095}
2088extern struct task_struct *curr_task(int cpu); 2096extern struct task_struct *curr_task(int cpu);
2089extern void set_curr_task(int cpu, struct task_struct *p); 2097extern void set_curr_task(int cpu, struct task_struct *p);
2090 2098
diff --git a/include/linux/security.h b/include/linux/security.h
index 19d8e04e1688..e8c619d39291 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2056,7 +2056,7 @@ static inline int security_old_inode_init_security(struct inode *inode,
2056 char **name, void **value, 2056 char **name, void **value,
2057 size_t *len) 2057 size_t *len)
2058{ 2058{
2059 return 0; 2059 return -EOPNOTSUPP;
2060} 2060}
2061 2061
2062static inline int security_inode_create(struct inode *dir, 2062static inline int security_inode_create(struct inode *dir,
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 58971e891f48..e1b005918bbb 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -28,6 +28,7 @@
28#define _LINUX_SRCU_H 28#define _LINUX_SRCU_H
29 29
30#include <linux/mutex.h> 30#include <linux/mutex.h>
31#include <linux/rcupdate.h>
31 32
32struct srcu_struct_array { 33struct srcu_struct_array {
33 int c[2]; 34 int c[2];
@@ -60,18 +61,10 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
60 __init_srcu_struct((sp), #sp, &__srcu_key); \ 61 __init_srcu_struct((sp), #sp, &__srcu_key); \
61}) 62})
62 63
63# define srcu_read_acquire(sp) \
64 lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_)
65# define srcu_read_release(sp) \
66 lock_release(&(sp)->dep_map, 1, _THIS_IP_)
67
68#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 64#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
69 65
70int init_srcu_struct(struct srcu_struct *sp); 66int init_srcu_struct(struct srcu_struct *sp);
71 67
72# define srcu_read_acquire(sp) do { } while (0)
73# define srcu_read_release(sp) do { } while (0)
74
75#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 68#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
76 69
77void cleanup_srcu_struct(struct srcu_struct *sp); 70void cleanup_srcu_struct(struct srcu_struct *sp);
@@ -90,12 +83,32 @@ long srcu_batches_completed(struct srcu_struct *sp);
90 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 83 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
91 * this assumes we are in an SRCU read-side critical section unless it can 84 * this assumes we are in an SRCU read-side critical section unless it can
92 * prove otherwise. 85 * prove otherwise.
86 *
87 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
88 * and while lockdep is disabled.
89 *
90 * Note that if the CPU is in the idle loop from an RCU point of view
91 * (ie: that we are in the section between rcu_idle_enter() and
92 * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
93 * the CPU did an srcu_read_lock(). The reason for this is that RCU
94 * ignores CPUs that are in such a section, considering these as in
95 * extended quiescent state, so such a CPU is effectively never in an
96 * RCU read-side critical section regardless of what RCU primitives it
97 * invokes. This state of affairs is required --- we need to keep an
98 * RCU-free window in idle where the CPU may possibly enter into low
99 * power mode. This way we can notice an extended quiescent state to
100 * other CPUs that started a grace period. Otherwise we would delay any
101 * grace period as long as we run in the idle task.
93 */ 102 */
94static inline int srcu_read_lock_held(struct srcu_struct *sp) 103static inline int srcu_read_lock_held(struct srcu_struct *sp)
95{ 104{
96 if (debug_locks) 105 if (rcu_is_cpu_idle())
97 return lock_is_held(&sp->dep_map); 106 return 0;
98 return 1; 107
108 if (!debug_lockdep_rcu_enabled())
109 return 1;
110
111 return lock_is_held(&sp->dep_map);
99} 112}
100 113
101#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 114#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -145,12 +158,17 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
145 * one way to indirectly wait on an SRCU grace period is to acquire 158 * one way to indirectly wait on an SRCU grace period is to acquire
146 * a mutex that is held elsewhere while calling synchronize_srcu() or 159 * a mutex that is held elsewhere while calling synchronize_srcu() or
147 * synchronize_srcu_expedited(). 160 * synchronize_srcu_expedited().
161 *
162 * Note that srcu_read_lock() and the matching srcu_read_unlock() must
163 * occur in the same context, for example, it is illegal to invoke
164 * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
165 * was invoked in process context.
148 */ 166 */
149static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) 167static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
150{ 168{
151 int retval = __srcu_read_lock(sp); 169 int retval = __srcu_read_lock(sp);
152 170
153 srcu_read_acquire(sp); 171 rcu_lock_acquire(&(sp)->dep_map);
154 return retval; 172 return retval;
155} 173}
156 174
@@ -164,8 +182,51 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
164static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) 182static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
165 __releases(sp) 183 __releases(sp)
166{ 184{
167 srcu_read_release(sp); 185 rcu_lock_release(&(sp)->dep_map);
186 __srcu_read_unlock(sp, idx);
187}
188
189/**
190 * srcu_read_lock_raw - register a new reader for an SRCU-protected structure.
191 * @sp: srcu_struct in which to register the new reader.
192 *
193 * Enter an SRCU read-side critical section. Similar to srcu_read_lock(),
194 * but avoids the RCU-lockdep checking. This means that it is legal to
195 * use srcu_read_lock_raw() in one context, for example, in an exception
196 * handler, and then have the matching srcu_read_unlock_raw() in another
197 * context, for example in the task that took the exception.
198 *
199 * However, the entire SRCU read-side critical section must reside within a
200 * single task. For example, beware of using srcu_read_lock_raw() in
201 * a device interrupt handler and srcu_read_unlock() in the interrupted
202 * task: This will not work if interrupts are threaded.
203 */
204static inline int srcu_read_lock_raw(struct srcu_struct *sp)
205{
206 unsigned long flags;
207 int ret;
208
209 local_irq_save(flags);
210 ret = __srcu_read_lock(sp);
211 local_irq_restore(flags);
212 return ret;
213}
214
215/**
216 * srcu_read_unlock_raw - unregister reader from an SRCU-protected structure.
217 * @sp: srcu_struct in which to unregister the old reader.
218 * @idx: return value from corresponding srcu_read_lock_raw().
219 *
220 * Exit an SRCU read-side critical section without lockdep-RCU checking.
221 * See srcu_read_lock_raw() for more details.
222 */
223static inline void srcu_read_unlock_raw(struct srcu_struct *sp, int idx)
224{
225 unsigned long flags;
226
227 local_irq_save(flags);
168 __srcu_read_unlock(sp, idx); 228 __srcu_read_unlock(sp, idx);
229 local_irq_restore(flags);
169} 230}
170 231
171#endif 232#endif
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc0ee29..ab8be90b5cc9 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -7,6 +7,7 @@
7#define _LINUX_TICK_H 7#define _LINUX_TICK_H
8 8
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/irqflags.h>
10 11
11#ifdef CONFIG_GENERIC_CLOCKEVENTS 12#ifdef CONFIG_GENERIC_CLOCKEVENTS
12 13
@@ -121,14 +122,16 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
121#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ 122#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
122 123
123# ifdef CONFIG_NO_HZ 124# ifdef CONFIG_NO_HZ
124extern void tick_nohz_stop_sched_tick(int inidle); 125extern void tick_nohz_idle_enter(void);
125extern void tick_nohz_restart_sched_tick(void); 126extern void tick_nohz_idle_exit(void);
127extern void tick_nohz_irq_exit(void);
126extern ktime_t tick_nohz_get_sleep_length(void); 128extern ktime_t tick_nohz_get_sleep_length(void);
127extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 129extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
128extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 130extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
129# else 131# else
130static inline void tick_nohz_stop_sched_tick(int inidle) { } 132static inline void tick_nohz_idle_enter(void) { }
131static inline void tick_nohz_restart_sched_tick(void) { } 133static inline void tick_nohz_idle_exit(void) { }
134
132static inline ktime_t tick_nohz_get_sleep_length(void) 135static inline ktime_t tick_nohz_get_sleep_length(void)
133{ 136{
134 ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; 137 ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3efc9f3f43a0..a9ce45e8501c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,13 +77,13 @@ struct task_struct;
77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
78 { .flags = word, .bit_nr = bit, } 78 { .flags = word, .bit_nr = bit, }
79 79
80extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *); 80extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
81 81
82#define init_waitqueue_head(q) \ 82#define init_waitqueue_head(q) \
83 do { \ 83 do { \
84 static struct lock_class_key __key; \ 84 static struct lock_class_key __key; \
85 \ 85 \
86 __init_waitqueue_head((q), &__key); \ 86 __init_waitqueue_head((q), #q, &__key); \
87 } while (0) 87 } while (0)
88 88
89#ifdef CONFIG_LOCKDEP 89#ifdef CONFIG_LOCKDEP
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index b1377b931eb7..5fb2c3d10c05 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -254,7 +254,7 @@ unsigned long soc_camera_apply_board_flags(struct soc_camera_link *icl,
254static inline struct video_device *soc_camera_i2c_to_vdev(const struct i2c_client *client) 254static inline struct video_device *soc_camera_i2c_to_vdev(const struct i2c_client *client)
255{ 255{
256 struct v4l2_subdev *sd = i2c_get_clientdata(client); 256 struct v4l2_subdev *sd = i2c_get_clientdata(client);
257 struct soc_camera_device *icd = (struct soc_camera_device *)sd->grp_id; 257 struct soc_camera_device *icd = v4l2_get_subdev_hostdata(sd);
258 return icd ? icd->vdev : NULL; 258 return icd ? icd->vdev : NULL;
259} 259}
260 260
@@ -279,6 +279,11 @@ static inline struct soc_camera_device *soc_camera_from_vbq(const struct videobu
279 return container_of(vq, struct soc_camera_device, vb_vidq); 279 return container_of(vq, struct soc_camera_device, vb_vidq);
280} 280}
281 281
282static inline u32 soc_camera_grp_id(const struct soc_camera_device *icd)
283{
284 return (icd->iface << 8) | (icd->devnum + 1);
285}
286
282void soc_camera_lock(struct vb2_queue *vq); 287void soc_camera_lock(struct vb2_queue *vq);
283void soc_camera_unlock(struct vb2_queue *vq); 288void soc_camera_unlock(struct vb2_queue *vq);
284 289
diff --git a/include/net/dst.h b/include/net/dst.h
index 6faec1a60216..75766b42660e 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -53,6 +53,7 @@ struct dst_entry {
53#define DST_NOHASH 0x0008 53#define DST_NOHASH 0x0008
54#define DST_NOCACHE 0x0010 54#define DST_NOCACHE 0x0010
55#define DST_NOCOUNT 0x0020 55#define DST_NOCOUNT 0x0020
56#define DST_NOPEER 0x0040
56 57
57 short error; 58 short error;
58 short obsolete; 59 short obsolete;
diff --git a/include/net/flow.h b/include/net/flow.h
index a09447749e2d..57f15a7f1cdd 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -207,6 +207,7 @@ extern struct flow_cache_object *flow_cache_lookup(
207 u8 dir, flow_resolve_t resolver, void *ctx); 207 u8 dir, flow_resolve_t resolver, void *ctx);
208 208
209extern void flow_cache_flush(void); 209extern void flow_cache_flush(void);
210extern void flow_cache_flush_deferred(void);
210extern atomic_t flow_cache_genid; 211extern atomic_t flow_cache_genid;
211 212
212#endif 213#endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 873d5be7926c..e5a7b9aaf552 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1207,7 +1207,7 @@ extern void ip_vs_control_cleanup(void);
1207extern struct ip_vs_dest * 1207extern struct ip_vs_dest *
1208ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, 1208ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
1209 __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, 1209 __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
1210 __u16 protocol, __u32 fwmark); 1210 __u16 protocol, __u32 fwmark, __u32 flags);
1211extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); 1211extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
1212 1212
1213 1213
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e90e7a9935dd..a15432da27c3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -241,6 +241,9 @@ extern struct sctp_globals {
241 * bits is an indicator of when to send and window update SACK. 241 * bits is an indicator of when to send and window update SACK.
242 */ 242 */
243 int rwnd_update_shift; 243 int rwnd_update_shift;
244
245 /* Threshold for autoclose timeout, in seconds. */
246 unsigned long max_autoclose;
244} sctp_globals; 247} sctp_globals;
245 248
246#define sctp_rto_initial (sctp_globals.rto_initial) 249#define sctp_rto_initial (sctp_globals.rto_initial)
@@ -281,6 +284,7 @@ extern struct sctp_globals {
281#define sctp_auth_enable (sctp_globals.auth_enable) 284#define sctp_auth_enable (sctp_globals.auth_enable)
282#define sctp_checksum_disable (sctp_globals.checksum_disable) 285#define sctp_checksum_disable (sctp_globals.checksum_disable)
283#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) 286#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift)
287#define sctp_max_autoclose (sctp_globals.max_autoclose)
284 288
285/* SCTP Socket type: UDP or TCP style. */ 289/* SCTP Socket type: UDP or TCP style. */
286typedef enum { 290typedef enum {
diff --git a/include/net/sock.h b/include/net/sock.h
index abb6e0f0c3c3..32e39371fba6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -637,12 +637,14 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
637 637
638/* 638/*
639 * Take into account size of receive queue and backlog queue 639 * Take into account size of receive queue and backlog queue
640 * Do not take into account this skb truesize,
641 * to allow even a single big packet to come.
640 */ 642 */
641static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) 643static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
642{ 644{
643 unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); 645 unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
644 646
645 return qsize + skb->truesize > sk->sk_rcvbuf; 647 return qsize > sk->sk_rcvbuf;
646} 648}
647 649
648/* The per-socket spinlock must be held here. */ 650/* The per-socket spinlock must be held here. */
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index d1e95c6ac776..5a35a2a2d3c5 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -147,6 +147,7 @@ struct fcoe_ctlr {
147 u8 map_dest; 147 u8 map_dest;
148 u8 spma; 148 u8 spma;
149 u8 probe_tries; 149 u8 probe_tries;
150 u8 priority;
150 u8 dest_addr[ETH_ALEN]; 151 u8 dest_addr[ETH_ALEN];
151 u8 ctl_src_addr[ETH_ALEN]; 152 u8 ctl_src_addr[ETH_ALEN];
152 153
@@ -301,6 +302,7 @@ struct fcoe_percpu_s {
301 * @lport: The associated local port 302 * @lport: The associated local port
302 * @fcoe_pending_queue: The pending Rx queue of skbs 303 * @fcoe_pending_queue: The pending Rx queue of skbs
303 * @fcoe_pending_queue_active: Indicates if the pending queue is active 304 * @fcoe_pending_queue_active: Indicates if the pending queue is active
305 * @priority: Packet priority (DCB)
304 * @max_queue_depth: Max queue depth of pending queue 306 * @max_queue_depth: Max queue depth of pending queue
305 * @min_queue_depth: Min queue depth of pending queue 307 * @min_queue_depth: Min queue depth of pending queue
306 * @timer: The queue timer 308 * @timer: The queue timer
@@ -316,6 +318,7 @@ struct fcoe_port {
316 struct fc_lport *lport; 318 struct fc_lport *lport;
317 struct sk_buff_head fcoe_pending_queue; 319 struct sk_buff_head fcoe_pending_queue;
318 u8 fcoe_pending_queue_active; 320 u8 fcoe_pending_queue_active;
321 u8 priority;
319 u32 max_queue_depth; 322 u32 max_queue_depth;
320 u32 min_queue_depth; 323 u32 min_queue_depth;
321 struct timer_list timer; 324 struct timer_list timer;
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 669fbd62ec25..d2d88bed891b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -241,24 +241,73 @@ TRACE_EVENT(rcu_fqs,
241 241
242/* 242/*
243 * Tracepoint for dyntick-idle entry/exit events. These take a string 243 * Tracepoint for dyntick-idle entry/exit events. These take a string
244 * as argument: "Start" for entering dyntick-idle mode and "End" for 244 * as argument: "Start" for entering dyntick-idle mode, "End" for
245 * leaving it. 245 * leaving it, "--=" for events moving towards idle, and "++=" for events
246 * moving away from idle. "Error on entry: not idle task" and "Error on
247 * exit: not idle task" indicate that a non-idle task is erroneously
248 * toying with the idle loop.
249 *
250 * These events also take a pair of numbers, which indicate the nesting
251 * depth before and after the event of interest. Note that task-related
252 * events use the upper bits of each number, while interrupt-related
253 * events use the lower bits.
246 */ 254 */
247TRACE_EVENT(rcu_dyntick, 255TRACE_EVENT(rcu_dyntick,
248 256
249 TP_PROTO(char *polarity), 257 TP_PROTO(char *polarity, long long oldnesting, long long newnesting),
250 258
251 TP_ARGS(polarity), 259 TP_ARGS(polarity, oldnesting, newnesting),
252 260
253 TP_STRUCT__entry( 261 TP_STRUCT__entry(
254 __field(char *, polarity) 262 __field(char *, polarity)
263 __field(long long, oldnesting)
264 __field(long long, newnesting)
255 ), 265 ),
256 266
257 TP_fast_assign( 267 TP_fast_assign(
258 __entry->polarity = polarity; 268 __entry->polarity = polarity;
269 __entry->oldnesting = oldnesting;
270 __entry->newnesting = newnesting;
271 ),
272
273 TP_printk("%s %llx %llx", __entry->polarity,
274 __entry->oldnesting, __entry->newnesting)
275);
276
277/*
278 * Tracepoint for RCU preparation for idle, the goal being to get RCU
279 * processing done so that the current CPU can shut off its scheduling
280 * clock and enter dyntick-idle mode. One way to accomplish this is
281 * to drain all RCU callbacks from this CPU, and the other is to have
282 * done everything RCU requires for the current grace period. In this
283 * latter case, the CPU will be awakened at the end of the current grace
284 * period in order to process the remainder of its callbacks.
285 *
286 * These tracepoints take a string as argument:
287 *
288 * "No callbacks": Nothing to do, no callbacks on this CPU.
289 * "In holdoff": Nothing to do, holding off after unsuccessful attempt.
290 * "Begin holdoff": Attempt failed, don't retry until next jiffy.
291 * "Dyntick with callbacks": Entering dyntick-idle despite callbacks.
292 * "More callbacks": Still more callbacks, try again to clear them out.
293 * "Callbacks drained": All callbacks processed, off to dyntick idle!
294 * "Timer": Timer fired to cause CPU to continue processing callbacks.
295 */
296TRACE_EVENT(rcu_prep_idle,
297
298 TP_PROTO(char *reason),
299
300 TP_ARGS(reason),
301
302 TP_STRUCT__entry(
303 __field(char *, reason)
304 ),
305
306 TP_fast_assign(
307 __entry->reason = reason;
259 ), 308 ),
260 309
261 TP_printk("%s", __entry->polarity) 310 TP_printk("%s", __entry->reason)
262); 311);
263 312
264/* 313/*
@@ -412,27 +461,71 @@ TRACE_EVENT(rcu_invoke_kfree_callback,
412 461
413/* 462/*
414 * Tracepoint for exiting rcu_do_batch after RCU callbacks have been 463 * Tracepoint for exiting rcu_do_batch after RCU callbacks have been
415 * invoked. The first argument is the name of the RCU flavor and 464 * invoked. The first argument is the name of the RCU flavor,
416 * the second argument is number of callbacks actually invoked. 465 * the second argument is number of callbacks actually invoked,
466 * the third argument (cb) is whether or not any of the callbacks that
467 * were ready to invoke at the beginning of this batch are still
468 * queued, the fourth argument (nr) is the return value of need_resched(),
469 * the fifth argument (iit) is 1 if the current task is the idle task,
470 * and the sixth argument (risk) is the return value from
471 * rcu_is_callbacks_kthread().
417 */ 472 */
418TRACE_EVENT(rcu_batch_end, 473TRACE_EVENT(rcu_batch_end,
419 474
420 TP_PROTO(char *rcuname, int callbacks_invoked), 475 TP_PROTO(char *rcuname, int callbacks_invoked,
476 bool cb, bool nr, bool iit, bool risk),
421 477
422 TP_ARGS(rcuname, callbacks_invoked), 478 TP_ARGS(rcuname, callbacks_invoked, cb, nr, iit, risk),
423 479
424 TP_STRUCT__entry( 480 TP_STRUCT__entry(
425 __field(char *, rcuname) 481 __field(char *, rcuname)
426 __field(int, callbacks_invoked) 482 __field(int, callbacks_invoked)
483 __field(bool, cb)
484 __field(bool, nr)
485 __field(bool, iit)
486 __field(bool, risk)
427 ), 487 ),
428 488
429 TP_fast_assign( 489 TP_fast_assign(
430 __entry->rcuname = rcuname; 490 __entry->rcuname = rcuname;
431 __entry->callbacks_invoked = callbacks_invoked; 491 __entry->callbacks_invoked = callbacks_invoked;
492 __entry->cb = cb;
493 __entry->nr = nr;
494 __entry->iit = iit;
495 __entry->risk = risk;
496 ),
497
498 TP_printk("%s CBs-invoked=%d idle=%c%c%c%c",
499 __entry->rcuname, __entry->callbacks_invoked,
500 __entry->cb ? 'C' : '.',
501 __entry->nr ? 'S' : '.',
502 __entry->iit ? 'I' : '.',
503 __entry->risk ? 'R' : '.')
504);
505
506/*
507 * Tracepoint for rcutorture readers. The first argument is the name
508 * of the RCU flavor from rcutorture's viewpoint and the second argument
509 * is the callback address.
510 */
511TRACE_EVENT(rcu_torture_read,
512
513 TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
514
515 TP_ARGS(rcutorturename, rhp),
516
517 TP_STRUCT__entry(
518 __field(char *, rcutorturename)
519 __field(struct rcu_head *, rhp)
520 ),
521
522 TP_fast_assign(
523 __entry->rcutorturename = rcutorturename;
524 __entry->rhp = rhp;
432 ), 525 ),
433 526
434 TP_printk("%s CBs-invoked=%d", 527 TP_printk("%s torture read %p",
435 __entry->rcuname, __entry->callbacks_invoked) 528 __entry->rcutorturename, __entry->rhp)
436); 529);
437 530
438#else /* #ifdef CONFIG_RCU_TRACE */ 531#else /* #ifdef CONFIG_RCU_TRACE */
@@ -443,13 +536,16 @@ TRACE_EVENT(rcu_batch_end,
443#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) 536#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
444#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) 537#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
445#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) 538#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
446#define trace_rcu_dyntick(polarity) do { } while (0) 539#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
540#define trace_rcu_prep_idle(reason) do { } while (0)
447#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) 541#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
448#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) 542#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
449#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) 543#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
450#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) 544#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
451#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) 545#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
452#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) 546#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
547 do { } while (0)
548#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
453 549
454#endif /* #else #ifdef CONFIG_RCU_TRACE */ 550#endif /* #else #ifdef CONFIG_RCU_TRACE */
455 551
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index b99caa8b780c..99d1d0decf88 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -21,6 +21,16 @@
21 {I_REFERENCED, "I_REFERENCED"} \ 21 {I_REFERENCED, "I_REFERENCED"} \
22 ) 22 )
23 23
24#define WB_WORK_REASON \
25 {WB_REASON_BACKGROUND, "background"}, \
26 {WB_REASON_TRY_TO_FREE_PAGES, "try_to_free_pages"}, \
27 {WB_REASON_SYNC, "sync"}, \
28 {WB_REASON_PERIODIC, "periodic"}, \
29 {WB_REASON_LAPTOP_TIMER, "laptop_timer"}, \
30 {WB_REASON_FREE_MORE_MEM, "free_more_memory"}, \
31 {WB_REASON_FS_FREE_SPACE, "fs_free_space"}, \
32 {WB_REASON_FORKER_THREAD, "forker_thread"}
33
24struct wb_writeback_work; 34struct wb_writeback_work;
25 35
26DECLARE_EVENT_CLASS(writeback_work_class, 36DECLARE_EVENT_CLASS(writeback_work_class,
@@ -55,7 +65,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
55 __entry->for_kupdate, 65 __entry->for_kupdate,
56 __entry->range_cyclic, 66 __entry->range_cyclic,
57 __entry->for_background, 67 __entry->for_background,
58 wb_reason_name[__entry->reason] 68 __print_symbolic(__entry->reason, WB_WORK_REASON)
59 ) 69 )
60); 70);
61#define DEFINE_WRITEBACK_WORK_EVENT(name) \ 71#define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -184,7 +194,8 @@ TRACE_EVENT(writeback_queue_io,
184 __entry->older, /* older_than_this in jiffies */ 194 __entry->older, /* older_than_this in jiffies */
185 __entry->age, /* older_than_this in relative milliseconds */ 195 __entry->age, /* older_than_this in relative milliseconds */
186 __entry->moved, 196 __entry->moved,
187 wb_reason_name[__entry->reason]) 197 __print_symbolic(__entry->reason, WB_WORK_REASON)
198 )
188); 199);
189 200
190TRACE_EVENT(global_dirty_state, 201TRACE_EVENT(global_dirty_state,
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
index f0b6890370be..f6f07aa35af5 100644
--- a/include/xen/interface/io/xs_wire.h
+++ b/include/xen/interface/io/xs_wire.h
@@ -29,8 +29,7 @@ enum xsd_sockmsg_type
29 XS_IS_DOMAIN_INTRODUCED, 29 XS_IS_DOMAIN_INTRODUCED,
30 XS_RESUME, 30 XS_RESUME,
31 XS_SET_TARGET, 31 XS_SET_TARGET,
32 XS_RESTRICT, 32 XS_RESTRICT
33 XS_RESET_WATCHES
34}; 33};
35 34
36#define XS_WRITE_NONE "NONE" 35#define XS_WRITE_NONE "NONE"
diff --git a/init/Kconfig b/init/Kconfig
index 43298f9810fb..82b6a4c675b2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -469,14 +469,14 @@ config RCU_FANOUT_EXACT
469 469
470config RCU_FAST_NO_HZ 470config RCU_FAST_NO_HZ
471 bool "Accelerate last non-dyntick-idle CPU's grace periods" 471 bool "Accelerate last non-dyntick-idle CPU's grace periods"
472 depends on TREE_RCU && NO_HZ && SMP 472 depends on NO_HZ && SMP
473 default n 473 default n
474 help 474 help
475 This option causes RCU to attempt to accelerate grace periods 475 This option causes RCU to attempt to accelerate grace periods
476 in order to allow the final CPU to enter dynticks-idle state 476 in order to allow CPUs to enter dynticks-idle state more
477 more quickly. On the other hand, this option increases the 477 quickly. On the other hand, this option increases the overhead
478 overhead of the dynticks-idle checking, particularly on systems 478 of the dynticks-idle checking, particularly on systems with
479 with large numbers of CPUs. 479 large numbers of CPUs.
480 480
481 Say Y if energy efficiency is critically important, particularly 481 Say Y if energy efficiency is critically important, particularly
482 if you have relatively few CPUs. 482 if you have relatively few CPUs.
diff --git a/init/main.c b/init/main.c
index 217ed23e9487..2c76efb513c2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -469,13 +469,12 @@ asmlinkage void __init start_kernel(void)
469 char * command_line; 469 char * command_line;
470 extern const struct kernel_param __start___param[], __stop___param[]; 470 extern const struct kernel_param __start___param[], __stop___param[];
471 471
472 smp_setup_processor_id();
473
474 /* 472 /*
475 * Need to run as early as possible, to initialize the 473 * Need to run as early as possible, to initialize the
476 * lockdep hash: 474 * lockdep hash:
477 */ 475 */
478 lockdep_init(); 476 lockdep_init();
477 smp_setup_processor_id();
479 debug_objects_early_init(); 478 debug_objects_early_init();
480 479
481 /* 480 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d9d5648f3cdc..a184470cf9b5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2098,11 +2098,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2098 continue; 2098 continue;
2099 /* get old css_set pointer */ 2099 /* get old css_set pointer */
2100 task_lock(tsk); 2100 task_lock(tsk);
2101 if (tsk->flags & PF_EXITING) {
2102 /* ignore this task if it's going away */
2103 task_unlock(tsk);
2104 continue;
2105 }
2106 oldcg = tsk->cgroups; 2101 oldcg = tsk->cgroups;
2107 get_css_set(oldcg); 2102 get_css_set(oldcg);
2108 task_unlock(tsk); 2103 task_unlock(tsk);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3f8ee8a138c4..5ca38d5d238a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -379,6 +379,7 @@ out:
379 cpu_maps_update_done(); 379 cpu_maps_update_done();
380 return err; 380 return err;
381} 381}
382EXPORT_SYMBOL_GPL(cpu_up);
382 383
383#ifdef CONFIG_PM_SLEEP_SMP 384#ifdef CONFIG_PM_SLEEP_SMP
384static cpumask_var_t frozen_cpus; 385static cpumask_var_t frozen_cpus;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9fe58c46a426..0b1712dba587 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -123,6 +123,19 @@ static inline struct cpuset *task_cs(struct task_struct *task)
123 struct cpuset, css); 123 struct cpuset, css);
124} 124}
125 125
126#ifdef CONFIG_NUMA
127static inline bool task_has_mempolicy(struct task_struct *task)
128{
129 return task->mempolicy;
130}
131#else
132static inline bool task_has_mempolicy(struct task_struct *task)
133{
134 return false;
135}
136#endif
137
138
126/* bits in struct cpuset flags field */ 139/* bits in struct cpuset flags field */
127typedef enum { 140typedef enum {
128 CS_CPU_EXCLUSIVE, 141 CS_CPU_EXCLUSIVE,
@@ -949,7 +962,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
949static void cpuset_change_task_nodemask(struct task_struct *tsk, 962static void cpuset_change_task_nodemask(struct task_struct *tsk,
950 nodemask_t *newmems) 963 nodemask_t *newmems)
951{ 964{
952 bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed); 965 bool need_loop;
953 966
954repeat: 967repeat:
955 /* 968 /*
@@ -962,6 +975,14 @@ repeat:
962 return; 975 return;
963 976
964 task_lock(tsk); 977 task_lock(tsk);
978 /*
979 * Determine if a loop is necessary if another thread is doing
980 * get_mems_allowed(). If at least one node remains unchanged and
981 * tsk does not have a mempolicy, then an empty nodemask will not be
982 * possible when mems_allowed is larger than a word.
983 */
984 need_loop = task_has_mempolicy(tsk) ||
985 !nodes_intersects(*newmems, tsk->mems_allowed);
965 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); 986 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
966 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); 987 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
967 988
@@ -981,11 +1002,9 @@ repeat:
981 1002
982 /* 1003 /*
983 * Allocation of memory is very fast, we needn't sleep when waiting 1004 * Allocation of memory is very fast, we needn't sleep when waiting
984 * for the read-side. No wait is necessary, however, if at least one 1005 * for the read-side.
985 * node remains unchanged.
986 */ 1006 */
987 while (masks_disjoint && 1007 while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
988 ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
989 task_unlock(tsk); 1008 task_unlock(tsk);
990 if (!task_curr(tsk)) 1009 if (!task_curr(tsk))
991 yield(); 1010 yield();
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 5532dd37aa86..7d6fb40d2188 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -636,7 +636,7 @@ char kdb_task_state_char (const struct task_struct *p)
636 (p->exit_state & EXIT_ZOMBIE) ? 'Z' : 636 (p->exit_state & EXIT_ZOMBIE) ? 'Z' :
637 (p->exit_state & EXIT_DEAD) ? 'E' : 637 (p->exit_state & EXIT_DEAD) ? 'E' :
638 (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; 638 (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
639 if (p->pid == 0) { 639 if (is_idle_task(p)) {
640 /* Idle task. Is it really idle, apart from the kdb 640 /* Idle task. Is it really idle, apart from the kdb
641 * interrupt? */ 641 * interrupt? */
642 if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) { 642 if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 89e5e8aa4c36..22d901f9caf4 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
2CFLAGS_REMOVE_core.o = -pg 2CFLAGS_REMOVE_core.o = -pg
3endif 3endif
4 4
5obj-y := core.o ring_buffer.o 5obj-y := core.o ring_buffer.o callchain.o
6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
new file mode 100644
index 000000000000..057e24b665cf
--- /dev/null
+++ b/kernel/events/callchain.c
@@ -0,0 +1,191 @@
1/*
2 * Performance events callchain code, extracted from core.c:
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
8 *
9 * For licensing details see kernel-base/COPYING
10 */
11
12#include <linux/perf_event.h>
13#include <linux/slab.h>
14#include "internal.h"
15
16struct callchain_cpus_entries {
17 struct rcu_head rcu_head;
18 struct perf_callchain_entry *cpu_entries[0];
19};
20
21static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
22static atomic_t nr_callchain_events;
23static DEFINE_MUTEX(callchain_mutex);
24static struct callchain_cpus_entries *callchain_cpus_entries;
25
26
27__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
28 struct pt_regs *regs)
29{
30}
31
32__weak void perf_callchain_user(struct perf_callchain_entry *entry,
33 struct pt_regs *regs)
34{
35}
36
37static void release_callchain_buffers_rcu(struct rcu_head *head)
38{
39 struct callchain_cpus_entries *entries;
40 int cpu;
41
42 entries = container_of(head, struct callchain_cpus_entries, rcu_head);
43
44 for_each_possible_cpu(cpu)
45 kfree(entries->cpu_entries[cpu]);
46
47 kfree(entries);
48}
49
50static void release_callchain_buffers(void)
51{
52 struct callchain_cpus_entries *entries;
53
54 entries = callchain_cpus_entries;
55 rcu_assign_pointer(callchain_cpus_entries, NULL);
56 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
57}
58
59static int alloc_callchain_buffers(void)
60{
61 int cpu;
62 int size;
63 struct callchain_cpus_entries *entries;
64
65 /*
66 * We can't use the percpu allocation API for data that can be
67 * accessed from NMI. Use a temporary manual per cpu allocation
68 * until that gets sorted out.
69 */
70 size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
71
72 entries = kzalloc(size, GFP_KERNEL);
73 if (!entries)
74 return -ENOMEM;
75
76 size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
77
78 for_each_possible_cpu(cpu) {
79 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
80 cpu_to_node(cpu));
81 if (!entries->cpu_entries[cpu])
82 goto fail;
83 }
84
85 rcu_assign_pointer(callchain_cpus_entries, entries);
86
87 return 0;
88
89fail:
90 for_each_possible_cpu(cpu)
91 kfree(entries->cpu_entries[cpu]);
92 kfree(entries);
93
94 return -ENOMEM;
95}
96
97int get_callchain_buffers(void)
98{
99 int err = 0;
100 int count;
101
102 mutex_lock(&callchain_mutex);
103
104 count = atomic_inc_return(&nr_callchain_events);
105 if (WARN_ON_ONCE(count < 1)) {
106 err = -EINVAL;
107 goto exit;
108 }
109
110 if (count > 1) {
111 /* If the allocation failed, give up */
112 if (!callchain_cpus_entries)
113 err = -ENOMEM;
114 goto exit;
115 }
116
117 err = alloc_callchain_buffers();
118 if (err)
119 release_callchain_buffers();
120exit:
121 mutex_unlock(&callchain_mutex);
122
123 return err;
124}
125
126void put_callchain_buffers(void)
127{
128 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
129 release_callchain_buffers();
130 mutex_unlock(&callchain_mutex);
131 }
132}
133
134static struct perf_callchain_entry *get_callchain_entry(int *rctx)
135{
136 int cpu;
137 struct callchain_cpus_entries *entries;
138
139 *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
140 if (*rctx == -1)
141 return NULL;
142
143 entries = rcu_dereference(callchain_cpus_entries);
144 if (!entries)
145 return NULL;
146
147 cpu = smp_processor_id();
148
149 return &entries->cpu_entries[cpu][*rctx];
150}
151
152static void
153put_callchain_entry(int rctx)
154{
155 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
156}
157
158struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
159{
160 int rctx;
161 struct perf_callchain_entry *entry;
162
163
164 entry = get_callchain_entry(&rctx);
165 if (rctx == -1)
166 return NULL;
167
168 if (!entry)
169 goto exit_put;
170
171 entry->nr = 0;
172
173 if (!user_mode(regs)) {
174 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
175 perf_callchain_kernel(entry, regs);
176 if (current->mm)
177 regs = task_pt_regs(current);
178 else
179 regs = NULL;
180 }
181
182 if (regs) {
183 perf_callchain_store(entry, PERF_CONTEXT_USER);
184 perf_callchain_user(entry, regs);
185 }
186
187exit_put:
188 put_callchain_entry(rctx);
189
190 return entry;
191}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3b9df5962c2..890eb02c2f21 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -128,7 +128,7 @@ enum event_type_t {
128 * perf_sched_events : >0 events exist 128 * perf_sched_events : >0 events exist
129 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu 129 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
130 */ 130 */
131struct jump_label_key perf_sched_events __read_mostly; 131struct jump_label_key_deferred perf_sched_events __read_mostly;
132static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); 132static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
133 133
134static atomic_t nr_mmap_events __read_mostly; 134static atomic_t nr_mmap_events __read_mostly;
@@ -1130,6 +1130,8 @@ event_sched_out(struct perf_event *event,
1130 if (!is_software_event(event)) 1130 if (!is_software_event(event))
1131 cpuctx->active_oncpu--; 1131 cpuctx->active_oncpu--;
1132 ctx->nr_active--; 1132 ctx->nr_active--;
1133 if (event->attr.freq && event->attr.sample_freq)
1134 ctx->nr_freq--;
1133 if (event->attr.exclusive || !cpuctx->active_oncpu) 1135 if (event->attr.exclusive || !cpuctx->active_oncpu)
1134 cpuctx->exclusive = 0; 1136 cpuctx->exclusive = 0;
1135} 1137}
@@ -1325,6 +1327,7 @@ retry:
1325 } 1327 }
1326 raw_spin_unlock_irq(&ctx->lock); 1328 raw_spin_unlock_irq(&ctx->lock);
1327} 1329}
1330EXPORT_SYMBOL_GPL(perf_event_disable);
1328 1331
1329static void perf_set_shadow_time(struct perf_event *event, 1332static void perf_set_shadow_time(struct perf_event *event,
1330 struct perf_event_context *ctx, 1333 struct perf_event_context *ctx,
@@ -1406,6 +1409,8 @@ event_sched_in(struct perf_event *event,
1406 if (!is_software_event(event)) 1409 if (!is_software_event(event))
1407 cpuctx->active_oncpu++; 1410 cpuctx->active_oncpu++;
1408 ctx->nr_active++; 1411 ctx->nr_active++;
1412 if (event->attr.freq && event->attr.sample_freq)
1413 ctx->nr_freq++;
1409 1414
1410 if (event->attr.exclusive) 1415 if (event->attr.exclusive)
1411 cpuctx->exclusive = 1; 1416 cpuctx->exclusive = 1;
@@ -1662,8 +1667,7 @@ retry:
1662 * Note: this works for group members as well as group leaders 1667 * Note: this works for group members as well as group leaders
1663 * since the non-leader members' sibling_lists will be empty. 1668 * since the non-leader members' sibling_lists will be empty.
1664 */ 1669 */
1665static void __perf_event_mark_enabled(struct perf_event *event, 1670static void __perf_event_mark_enabled(struct perf_event *event)
1666 struct perf_event_context *ctx)
1667{ 1671{
1668 struct perf_event *sub; 1672 struct perf_event *sub;
1669 u64 tstamp = perf_event_time(event); 1673 u64 tstamp = perf_event_time(event);
@@ -1701,7 +1705,7 @@ static int __perf_event_enable(void *info)
1701 */ 1705 */
1702 perf_cgroup_set_timestamp(current, ctx); 1706 perf_cgroup_set_timestamp(current, ctx);
1703 1707
1704 __perf_event_mark_enabled(event, ctx); 1708 __perf_event_mark_enabled(event);
1705 1709
1706 if (!event_filter_match(event)) { 1710 if (!event_filter_match(event)) {
1707 if (is_cgroup_event(event)) 1711 if (is_cgroup_event(event))
@@ -1782,7 +1786,7 @@ void perf_event_enable(struct perf_event *event)
1782 1786
1783retry: 1787retry:
1784 if (!ctx->is_active) { 1788 if (!ctx->is_active) {
1785 __perf_event_mark_enabled(event, ctx); 1789 __perf_event_mark_enabled(event);
1786 goto out; 1790 goto out;
1787 } 1791 }
1788 1792
@@ -1809,6 +1813,7 @@ retry:
1809out: 1813out:
1810 raw_spin_unlock_irq(&ctx->lock); 1814 raw_spin_unlock_irq(&ctx->lock);
1811} 1815}
1816EXPORT_SYMBOL_GPL(perf_event_enable);
1812 1817
1813int perf_event_refresh(struct perf_event *event, int refresh) 1818int perf_event_refresh(struct perf_event *event, int refresh)
1814{ 1819{
@@ -2327,6 +2332,9 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2327 u64 interrupts, now; 2332 u64 interrupts, now;
2328 s64 delta; 2333 s64 delta;
2329 2334
2335 if (!ctx->nr_freq)
2336 return;
2337
2330 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2338 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2331 if (event->state != PERF_EVENT_STATE_ACTIVE) 2339 if (event->state != PERF_EVENT_STATE_ACTIVE)
2332 continue; 2340 continue;
@@ -2382,12 +2390,14 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2382{ 2390{
2383 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; 2391 u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
2384 struct perf_event_context *ctx = NULL; 2392 struct perf_event_context *ctx = NULL;
2385 int rotate = 0, remove = 1; 2393 int rotate = 0, remove = 1, freq = 0;
2386 2394
2387 if (cpuctx->ctx.nr_events) { 2395 if (cpuctx->ctx.nr_events) {
2388 remove = 0; 2396 remove = 0;
2389 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 2397 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2390 rotate = 1; 2398 rotate = 1;
2399 if (cpuctx->ctx.nr_freq)
2400 freq = 1;
2391 } 2401 }
2392 2402
2393 ctx = cpuctx->task_ctx; 2403 ctx = cpuctx->task_ctx;
@@ -2395,33 +2405,40 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2395 remove = 0; 2405 remove = 0;
2396 if (ctx->nr_events != ctx->nr_active) 2406 if (ctx->nr_events != ctx->nr_active)
2397 rotate = 1; 2407 rotate = 1;
2408 if (ctx->nr_freq)
2409 freq = 1;
2398 } 2410 }
2399 2411
2412 if (!rotate && !freq)
2413 goto done;
2414
2400 perf_ctx_lock(cpuctx, cpuctx->task_ctx); 2415 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2401 perf_pmu_disable(cpuctx->ctx.pmu); 2416 perf_pmu_disable(cpuctx->ctx.pmu);
2402 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
2403 if (ctx)
2404 perf_ctx_adjust_freq(ctx, interval);
2405 2417
2406 if (!rotate) 2418 if (freq) {
2407 goto done; 2419 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
2420 if (ctx)
2421 perf_ctx_adjust_freq(ctx, interval);
2422 }
2408 2423
2409 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2424 if (rotate) {
2410 if (ctx) 2425 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2411 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); 2426 if (ctx)
2427 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2412 2428
2413 rotate_ctx(&cpuctx->ctx); 2429 rotate_ctx(&cpuctx->ctx);
2414 if (ctx) 2430 if (ctx)
2415 rotate_ctx(ctx); 2431 rotate_ctx(ctx);
2416 2432
2417 perf_event_sched_in(cpuctx, ctx, current); 2433 perf_event_sched_in(cpuctx, ctx, current);
2434 }
2435
2436 perf_pmu_enable(cpuctx->ctx.pmu);
2437 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2418 2438
2419done: 2439done:
2420 if (remove) 2440 if (remove)
2421 list_del_init(&cpuctx->rotation_list); 2441 list_del_init(&cpuctx->rotation_list);
2422
2423 perf_pmu_enable(cpuctx->ctx.pmu);
2424 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2425} 2442}
2426 2443
2427void perf_event_task_tick(void) 2444void perf_event_task_tick(void)
@@ -2448,7 +2465,7 @@ static int event_enable_on_exec(struct perf_event *event,
2448 if (event->state >= PERF_EVENT_STATE_INACTIVE) 2465 if (event->state >= PERF_EVENT_STATE_INACTIVE)
2449 return 0; 2466 return 0;
2450 2467
2451 __perf_event_mark_enabled(event, ctx); 2468 __perf_event_mark_enabled(event);
2452 2469
2453 return 1; 2470 return 1;
2454} 2471}
@@ -2480,13 +2497,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
2480 raw_spin_lock(&ctx->lock); 2497 raw_spin_lock(&ctx->lock);
2481 task_ctx_sched_out(ctx); 2498 task_ctx_sched_out(ctx);
2482 2499
2483 list_for_each_entry(event, &ctx->pinned_groups, group_entry) { 2500 list_for_each_entry(event, &ctx->event_list, event_entry) {
2484 ret = event_enable_on_exec(event, ctx);
2485 if (ret)
2486 enabled = 1;
2487 }
2488
2489 list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
2490 ret = event_enable_on_exec(event, ctx); 2501 ret = event_enable_on_exec(event, ctx);
2491 if (ret) 2502 if (ret)
2492 enabled = 1; 2503 enabled = 1;
@@ -2574,215 +2585,6 @@ static u64 perf_event_read(struct perf_event *event)
2574} 2585}
2575 2586
2576/* 2587/*
2577 * Callchain support
2578 */
2579
2580struct callchain_cpus_entries {
2581 struct rcu_head rcu_head;
2582 struct perf_callchain_entry *cpu_entries[0];
2583};
2584
2585static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
2586static atomic_t nr_callchain_events;
2587static DEFINE_MUTEX(callchain_mutex);
2588struct callchain_cpus_entries *callchain_cpus_entries;
2589
2590
2591__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
2592 struct pt_regs *regs)
2593{
2594}
2595
2596__weak void perf_callchain_user(struct perf_callchain_entry *entry,
2597 struct pt_regs *regs)
2598{
2599}
2600
2601static void release_callchain_buffers_rcu(struct rcu_head *head)
2602{
2603 struct callchain_cpus_entries *entries;
2604 int cpu;
2605
2606 entries = container_of(head, struct callchain_cpus_entries, rcu_head);
2607
2608 for_each_possible_cpu(cpu)
2609 kfree(entries->cpu_entries[cpu]);
2610
2611 kfree(entries);
2612}
2613
2614static void release_callchain_buffers(void)
2615{
2616 struct callchain_cpus_entries *entries;
2617
2618 entries = callchain_cpus_entries;
2619 rcu_assign_pointer(callchain_cpus_entries, NULL);
2620 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
2621}
2622
2623static int alloc_callchain_buffers(void)
2624{
2625 int cpu;
2626 int size;
2627 struct callchain_cpus_entries *entries;
2628
2629 /*
2630 * We can't use the percpu allocation API for data that can be
2631 * accessed from NMI. Use a temporary manual per cpu allocation
2632 * until that gets sorted out.
2633 */
2634 size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
2635
2636 entries = kzalloc(size, GFP_KERNEL);
2637 if (!entries)
2638 return -ENOMEM;
2639
2640 size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
2641
2642 for_each_possible_cpu(cpu) {
2643 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
2644 cpu_to_node(cpu));
2645 if (!entries->cpu_entries[cpu])
2646 goto fail;
2647 }
2648
2649 rcu_assign_pointer(callchain_cpus_entries, entries);
2650
2651 return 0;
2652
2653fail:
2654 for_each_possible_cpu(cpu)
2655 kfree(entries->cpu_entries[cpu]);
2656 kfree(entries);
2657
2658 return -ENOMEM;
2659}
2660
2661static int get_callchain_buffers(void)
2662{
2663 int err = 0;
2664 int count;
2665
2666 mutex_lock(&callchain_mutex);
2667
2668 count = atomic_inc_return(&nr_callchain_events);
2669 if (WARN_ON_ONCE(count < 1)) {
2670 err = -EINVAL;
2671 goto exit;
2672 }
2673
2674 if (count > 1) {
2675 /* If the allocation failed, give up */
2676 if (!callchain_cpus_entries)
2677 err = -ENOMEM;
2678 goto exit;
2679 }
2680
2681 err = alloc_callchain_buffers();
2682 if (err)
2683 release_callchain_buffers();
2684exit:
2685 mutex_unlock(&callchain_mutex);
2686
2687 return err;
2688}
2689
2690static void put_callchain_buffers(void)
2691{
2692 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
2693 release_callchain_buffers();
2694 mutex_unlock(&callchain_mutex);
2695 }
2696}
2697
2698static int get_recursion_context(int *recursion)
2699{
2700 int rctx;
2701
2702 if (in_nmi())
2703 rctx = 3;
2704 else if (in_irq())
2705 rctx = 2;
2706 else if (in_softirq())
2707 rctx = 1;
2708 else
2709 rctx = 0;
2710
2711 if (recursion[rctx])
2712 return -1;
2713
2714 recursion[rctx]++;
2715 barrier();
2716
2717 return rctx;
2718}
2719
2720static inline void put_recursion_context(int *recursion, int rctx)
2721{
2722 barrier();
2723 recursion[rctx]--;
2724}
2725
2726static struct perf_callchain_entry *get_callchain_entry(int *rctx)
2727{
2728 int cpu;
2729 struct callchain_cpus_entries *entries;
2730
2731 *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
2732 if (*rctx == -1)
2733 return NULL;
2734
2735 entries = rcu_dereference(callchain_cpus_entries);
2736 if (!entries)
2737 return NULL;
2738
2739 cpu = smp_processor_id();
2740
2741 return &entries->cpu_entries[cpu][*rctx];
2742}
2743
2744static void
2745put_callchain_entry(int rctx)
2746{
2747 put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
2748}
2749
2750static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2751{
2752 int rctx;
2753 struct perf_callchain_entry *entry;
2754
2755
2756 entry = get_callchain_entry(&rctx);
2757 if (rctx == -1)
2758 return NULL;
2759
2760 if (!entry)
2761 goto exit_put;
2762
2763 entry->nr = 0;
2764
2765 if (!user_mode(regs)) {
2766 perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
2767 perf_callchain_kernel(entry, regs);
2768 if (current->mm)
2769 regs = task_pt_regs(current);
2770 else
2771 regs = NULL;
2772 }
2773
2774 if (regs) {
2775 perf_callchain_store(entry, PERF_CONTEXT_USER);
2776 perf_callchain_user(entry, regs);
2777 }
2778
2779exit_put:
2780 put_callchain_entry(rctx);
2781
2782 return entry;
2783}
2784
2785/*
2786 * Initialize the perf_event context in a task_struct: 2588 * Initialize the perf_event context in a task_struct:
2787 */ 2589 */
2788static void __perf_event_init_context(struct perf_event_context *ctx) 2590static void __perf_event_init_context(struct perf_event_context *ctx)
@@ -2946,7 +2748,7 @@ static void free_event(struct perf_event *event)
2946 2748
2947 if (!event->parent) { 2749 if (!event->parent) {
2948 if (event->attach_state & PERF_ATTACH_TASK) 2750 if (event->attach_state & PERF_ATTACH_TASK)
2949 jump_label_dec(&perf_sched_events); 2751 jump_label_dec_deferred(&perf_sched_events);
2950 if (event->attr.mmap || event->attr.mmap_data) 2752 if (event->attr.mmap || event->attr.mmap_data)
2951 atomic_dec(&nr_mmap_events); 2753 atomic_dec(&nr_mmap_events);
2952 if (event->attr.comm) 2754 if (event->attr.comm)
@@ -2957,7 +2759,7 @@ static void free_event(struct perf_event *event)
2957 put_callchain_buffers(); 2759 put_callchain_buffers();
2958 if (is_cgroup_event(event)) { 2760 if (is_cgroup_event(event)) {
2959 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); 2761 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2960 jump_label_dec(&perf_sched_events); 2762 jump_label_dec_deferred(&perf_sched_events);
2961 } 2763 }
2962 } 2764 }
2963 2765
@@ -3558,9 +3360,13 @@ static void ring_buffer_wakeup(struct perf_event *event)
3558 3360
3559 rcu_read_lock(); 3361 rcu_read_lock();
3560 rb = rcu_dereference(event->rb); 3362 rb = rcu_dereference(event->rb);
3561 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { 3363 if (!rb)
3364 goto unlock;
3365
3366 list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
3562 wake_up_all(&event->waitq); 3367 wake_up_all(&event->waitq);
3563 } 3368
3369unlock:
3564 rcu_read_unlock(); 3370 rcu_read_unlock();
3565} 3371}
3566 3372
@@ -4816,7 +4622,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
4816 struct hw_perf_event *hwc = &event->hw; 4622 struct hw_perf_event *hwc = &event->hw;
4817 int throttle = 0; 4623 int throttle = 0;
4818 4624
4819 data->period = event->hw.last_period;
4820 if (!overflow) 4625 if (!overflow)
4821 overflow = perf_swevent_set_period(event); 4626 overflow = perf_swevent_set_period(event);
4822 4627
@@ -4850,6 +4655,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
4850 if (!is_sampling_event(event)) 4655 if (!is_sampling_event(event))
4851 return; 4656 return;
4852 4657
4658 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
4659 data->period = nr;
4660 return perf_swevent_overflow(event, 1, data, regs);
4661 } else
4662 data->period = event->hw.last_period;
4663
4853 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4664 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
4854 return perf_swevent_overflow(event, 1, data, regs); 4665 return perf_swevent_overflow(event, 1, data, regs);
4855 4666
@@ -5362,7 +5173,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
5362 regs = get_irq_regs(); 5173 regs = get_irq_regs();
5363 5174
5364 if (regs && !perf_exclude_event(event, regs)) { 5175 if (regs && !perf_exclude_event(event, regs)) {
5365 if (!(event->attr.exclude_idle && current->pid == 0)) 5176 if (!(event->attr.exclude_idle && is_idle_task(current)))
5366 if (perf_event_overflow(event, &data, regs)) 5177 if (perf_event_overflow(event, &data, regs))
5367 ret = HRTIMER_NORESTART; 5178 ret = HRTIMER_NORESTART;
5368 } 5179 }
@@ -5977,7 +5788,7 @@ done:
5977 5788
5978 if (!event->parent) { 5789 if (!event->parent) {
5979 if (event->attach_state & PERF_ATTACH_TASK) 5790 if (event->attach_state & PERF_ATTACH_TASK)
5980 jump_label_inc(&perf_sched_events); 5791 jump_label_inc(&perf_sched_events.key);
5981 if (event->attr.mmap || event->attr.mmap_data) 5792 if (event->attr.mmap || event->attr.mmap_data)
5982 atomic_inc(&nr_mmap_events); 5793 atomic_inc(&nr_mmap_events);
5983 if (event->attr.comm) 5794 if (event->attr.comm)
@@ -6215,7 +6026,7 @@ SYSCALL_DEFINE5(perf_event_open,
6215 * - that may need work on context switch 6026 * - that may need work on context switch
6216 */ 6027 */
6217 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); 6028 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
6218 jump_label_inc(&perf_sched_events); 6029 jump_label_inc(&perf_sched_events.key);
6219 } 6030 }
6220 6031
6221 /* 6032 /*
@@ -7061,6 +6872,9 @@ void __init perf_event_init(void)
7061 6872
7062 ret = init_hw_breakpoint(); 6873 ret = init_hw_breakpoint();
7063 WARN(ret, "hw_breakpoint initialization failed with: %d", ret); 6874 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6875
6876 /* do not patch jump label more than once per second */
6877 jump_label_rate_limit(&perf_sched_events, HZ);
7064} 6878}
7065 6879
7066static int __init perf_event_sysfs_init(void) 6880static int __init perf_event_sysfs_init(void)
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 64568a699375..b0b107f90afc 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -1,6 +1,10 @@
1#ifndef _KERNEL_EVENTS_INTERNAL_H 1#ifndef _KERNEL_EVENTS_INTERNAL_H
2#define _KERNEL_EVENTS_INTERNAL_H 2#define _KERNEL_EVENTS_INTERNAL_H
3 3
4#include <linux/hardirq.h>
5
6/* Buffer handling */
7
4#define RING_BUFFER_WRITABLE 0x01 8#define RING_BUFFER_WRITABLE 0x01
5 9
6struct ring_buffer { 10struct ring_buffer {
@@ -67,7 +71,7 @@ static inline int page_order(struct ring_buffer *rb)
67} 71}
68#endif 72#endif
69 73
70static unsigned long perf_data_size(struct ring_buffer *rb) 74static inline unsigned long perf_data_size(struct ring_buffer *rb)
71{ 75{
72 return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); 76 return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
73} 77}
@@ -96,4 +100,37 @@ __output_copy(struct perf_output_handle *handle,
96 } while (len); 100 } while (len);
97} 101}
98 102
103/* Callchain handling */
104extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
105extern int get_callchain_buffers(void);
106extern void put_callchain_buffers(void);
107
108static inline int get_recursion_context(int *recursion)
109{
110 int rctx;
111
112 if (in_nmi())
113 rctx = 3;
114 else if (in_irq())
115 rctx = 2;
116 else if (in_softirq())
117 rctx = 1;
118 else
119 rctx = 0;
120
121 if (recursion[rctx])
122 return -1;
123
124 recursion[rctx]++;
125 barrier();
126
127 return rctx;
128}
129
130static inline void put_recursion_context(int *recursion, int rctx)
131{
132 barrier();
133 recursion[rctx]--;
134}
135
99#endif /* _KERNEL_EVENTS_INTERNAL_H */ 136#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 5e0d1f4c696e..d579a459309d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1530,8 +1530,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
1530 } 1530 }
1531 1531
1532 /* dead body doesn't have much to contribute */ 1532 /* dead body doesn't have much to contribute */
1533 if (p->exit_state == EXIT_DEAD) 1533 if (unlikely(p->exit_state == EXIT_DEAD)) {
1534 /*
1535 * But do not ignore this task until the tracer does
1536 * wait_task_zombie()->do_notify_parent().
1537 */
1538 if (likely(!ptrace) && unlikely(ptrace_reparented(p)))
1539 wo->notask_error = 0;
1534 return 0; 1540 return 0;
1541 }
1535 1542
1536 /* slay zombie? */ 1543 /* slay zombie? */
1537 if (p->exit_state == EXIT_ZOMBIE) { 1544 if (p->exit_state == EXIT_ZOMBIE) {
diff --git a/kernel/futex.c b/kernel/futex.c
index ea87f4d2f455..1614be20173d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -314,17 +314,29 @@ again:
314#endif 314#endif
315 315
316 lock_page(page_head); 316 lock_page(page_head);
317
318 /*
319 * If page_head->mapping is NULL, then it cannot be a PageAnon
320 * page; but it might be the ZERO_PAGE or in the gate area or
321 * in a special mapping (all cases which we are happy to fail);
322 * or it may have been a good file page when get_user_pages_fast
323 * found it, but truncated or holepunched or subjected to
324 * invalidate_complete_page2 before we got the page lock (also
325 * cases which we are happy to fail). And we hold a reference,
326 * so refcount care in invalidate_complete_page's remove_mapping
327 * prevents drop_caches from setting mapping to NULL beneath us.
328 *
329 * The case we do have to guard against is when memory pressure made
330 * shmem_writepage move it from filecache to swapcache beneath us:
331 * an unlikely race, but we do need to retry for page_head->mapping.
332 */
317 if (!page_head->mapping) { 333 if (!page_head->mapping) {
334 int shmem_swizzled = PageSwapCache(page_head);
318 unlock_page(page_head); 335 unlock_page(page_head);
319 put_page(page_head); 336 put_page(page_head);
320 /* 337 if (shmem_swizzled)
321 * ZERO_PAGE pages don't have a mapping. Avoid a busy loop 338 goto again;
322 * trying to find one. RW mapping would have COW'd (and thus 339 return -EFAULT;
323 * have a mapping) so this page is RO and won't ever change.
324 */
325 if ((page_head == ZERO_PAGE(address)))
326 return -EFAULT;
327 goto again;
328 } 340 }
329 341
330 /* 342 /*
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 8b1748d0172c..2e48ec0c2e91 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -74,11 +74,17 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
74 74
75 /* 75 /*
76 * Ensure the task is not frozen. 76 * Ensure the task is not frozen.
77 * Also, when a freshly created task is scheduled once, changes 77 * Also, skip vfork and any other user process that freezer should skip.
78 * its state to TASK_UNINTERRUPTIBLE without having ever been
79 * switched out once, it musn't be checked.
80 */ 78 */
81 if (unlikely(t->flags & PF_FROZEN || !switch_count)) 79 if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
80 return;
81
82 /*
83 * When a freshly created task is scheduled once, changes its state to
84 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
85 * musn't be checked.
86 */
87 if (unlikely(!switch_count))
82 return; 88 return;
83 89
84 if (switch_count != t->last_switch_count) { 90 if (switch_count != t->last_switch_count) {
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 66ff7109f697..30c3c7708132 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -72,15 +72,46 @@ void jump_label_inc(struct jump_label_key *key)
72 jump_label_unlock(); 72 jump_label_unlock();
73} 73}
74 74
75void jump_label_dec(struct jump_label_key *key) 75static void __jump_label_dec(struct jump_label_key *key,
76 unsigned long rate_limit, struct delayed_work *work)
76{ 77{
77 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) 78 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
78 return; 79 return;
79 80
80 jump_label_update(key, JUMP_LABEL_DISABLE); 81 if (rate_limit) {
82 atomic_inc(&key->enabled);
83 schedule_delayed_work(work, rate_limit);
84 } else
85 jump_label_update(key, JUMP_LABEL_DISABLE);
86
81 jump_label_unlock(); 87 jump_label_unlock();
82} 88}
83 89
90static void jump_label_update_timeout(struct work_struct *work)
91{
92 struct jump_label_key_deferred *key =
93 container_of(work, struct jump_label_key_deferred, work.work);
94 __jump_label_dec(&key->key, 0, NULL);
95}
96
97void jump_label_dec(struct jump_label_key *key)
98{
99 __jump_label_dec(key, 0, NULL);
100}
101
102void jump_label_dec_deferred(struct jump_label_key_deferred *key)
103{
104 __jump_label_dec(&key->key, key->timeout, &key->work);
105}
106
107
108void jump_label_rate_limit(struct jump_label_key_deferred *key,
109 unsigned long rl)
110{
111 key->timeout = rl;
112 INIT_DELAYED_WORK(&key->work, jump_label_update_timeout);
113}
114
84static int addr_conflict(struct jump_entry *entry, void *start, void *end) 115static int addr_conflict(struct jump_entry *entry, void *start, void *end)
85{ 116{
86 if (entry->code <= (unsigned long)end && 117 if (entry->code <= (unsigned long)end &&
@@ -111,7 +142,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
111 * running code can override this to make the non-live update case 142 * running code can override this to make the non-live update case
112 * cheaper. 143 * cheaper.
113 */ 144 */
114void __weak arch_jump_label_transform_static(struct jump_entry *entry, 145void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
115 enum jump_label_type type) 146 enum jump_label_type type)
116{ 147{
117 arch_jump_label_transform(entry, type); 148 arch_jump_label_transform(entry, type);
@@ -217,8 +248,13 @@ void jump_label_apply_nops(struct module *mod)
217 if (iter_start == iter_stop) 248 if (iter_start == iter_stop)
218 return; 249 return;
219 250
220 for (iter = iter_start; iter < iter_stop; iter++) 251 for (iter = iter_start; iter < iter_stop; iter++) {
221 arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); 252 struct jump_label_key *iterk;
253
254 iterk = (struct jump_label_key *)(unsigned long)iter->key;
255 arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
256 JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
257 }
222} 258}
223 259
224static int jump_label_add_module(struct module *mod) 260static int jump_label_add_module(struct module *mod)
@@ -258,8 +294,7 @@ static int jump_label_add_module(struct module *mod)
258 key->next = jlm; 294 key->next = jlm;
259 295
260 if (jump_label_enabled(key)) 296 if (jump_label_enabled(key))
261 __jump_label_update(key, iter, iter_stop, 297 __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
262 JUMP_LABEL_ENABLE);
263 } 298 }
264 299
265 return 0; 300 return 0;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b2e08c932d91..8889f7dd7c46 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -431,6 +431,7 @@ unsigned int max_lockdep_depth;
431 * about it later on, in lockdep_info(). 431 * about it later on, in lockdep_info().
432 */ 432 */
433static int lockdep_init_error; 433static int lockdep_init_error;
434static const char *lock_init_error;
434static unsigned long lockdep_init_trace_data[20]; 435static unsigned long lockdep_init_trace_data[20];
435static struct stack_trace lockdep_init_trace = { 436static struct stack_trace lockdep_init_trace = {
436 .max_entries = ARRAY_SIZE(lockdep_init_trace_data), 437 .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
@@ -499,36 +500,32 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
499 usage[i] = '\0'; 500 usage[i] = '\0';
500} 501}
501 502
502static int __print_lock_name(struct lock_class *class) 503static void __print_lock_name(struct lock_class *class)
503{ 504{
504 char str[KSYM_NAME_LEN]; 505 char str[KSYM_NAME_LEN];
505 const char *name; 506 const char *name;
506 507
507 name = class->name; 508 name = class->name;
508 if (!name)
509 name = __get_key_name(class->key, str);
510
511 return printk("%s", name);
512}
513
514static void print_lock_name(struct lock_class *class)
515{
516 char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
517 const char *name;
518
519 get_usage_chars(class, usage);
520
521 name = class->name;
522 if (!name) { 509 if (!name) {
523 name = __get_key_name(class->key, str); 510 name = __get_key_name(class->key, str);
524 printk(" (%s", name); 511 printk("%s", name);
525 } else { 512 } else {
526 printk(" (%s", name); 513 printk("%s", name);
527 if (class->name_version > 1) 514 if (class->name_version > 1)
528 printk("#%d", class->name_version); 515 printk("#%d", class->name_version);
529 if (class->subclass) 516 if (class->subclass)
530 printk("/%d", class->subclass); 517 printk("/%d", class->subclass);
531 } 518 }
519}
520
521static void print_lock_name(struct lock_class *class)
522{
523 char usage[LOCK_USAGE_CHARS];
524
525 get_usage_chars(class, usage);
526
527 printk(" (");
528 __print_lock_name(class);
532 printk("){%s}", usage); 529 printk("){%s}", usage);
533} 530}
534 531
@@ -568,11 +565,12 @@ static void lockdep_print_held_locks(struct task_struct *curr)
568 } 565 }
569} 566}
570 567
571static void print_kernel_version(void) 568static void print_kernel_ident(void)
572{ 569{
573 printk("%s %.*s\n", init_utsname()->release, 570 printk("%s %.*s %s\n", init_utsname()->release,
574 (int)strcspn(init_utsname()->version, " "), 571 (int)strcspn(init_utsname()->version, " "),
575 init_utsname()->version); 572 init_utsname()->version,
573 print_tainted());
576} 574}
577 575
578static int very_verbose(struct lock_class *class) 576static int very_verbose(struct lock_class *class)
@@ -656,6 +654,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
656 if (unlikely(!lockdep_initialized)) { 654 if (unlikely(!lockdep_initialized)) {
657 lockdep_init(); 655 lockdep_init();
658 lockdep_init_error = 1; 656 lockdep_init_error = 1;
657 lock_init_error = lock->name;
659 save_stack_trace(&lockdep_init_trace); 658 save_stack_trace(&lockdep_init_trace);
660 } 659 }
661#endif 660#endif
@@ -723,7 +722,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
723 722
724 class = look_up_lock_class(lock, subclass); 723 class = look_up_lock_class(lock, subclass);
725 if (likely(class)) 724 if (likely(class))
726 return class; 725 goto out_set_class_cache;
727 726
728 /* 727 /*
729 * Debug-check: all keys must be persistent! 728 * Debug-check: all keys must be persistent!
@@ -808,6 +807,7 @@ out_unlock_set:
808 graph_unlock(); 807 graph_unlock();
809 raw_local_irq_restore(flags); 808 raw_local_irq_restore(flags);
810 809
810out_set_class_cache:
811 if (!subclass || force) 811 if (!subclass || force)
812 lock->class_cache[0] = class; 812 lock->class_cache[0] = class;
813 else if (subclass < NR_LOCKDEP_CACHING_CLASSES) 813 else if (subclass < NR_LOCKDEP_CACHING_CLASSES)
@@ -1149,7 +1149,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1149 printk("\n"); 1149 printk("\n");
1150 printk("======================================================\n"); 1150 printk("======================================================\n");
1151 printk("[ INFO: possible circular locking dependency detected ]\n"); 1151 printk("[ INFO: possible circular locking dependency detected ]\n");
1152 print_kernel_version(); 1152 print_kernel_ident();
1153 printk("-------------------------------------------------------\n"); 1153 printk("-------------------------------------------------------\n");
1154 printk("%s/%d is trying to acquire lock:\n", 1154 printk("%s/%d is trying to acquire lock:\n",
1155 curr->comm, task_pid_nr(curr)); 1155 curr->comm, task_pid_nr(curr));
@@ -1488,7 +1488,7 @@ print_bad_irq_dependency(struct task_struct *curr,
1488 printk("======================================================\n"); 1488 printk("======================================================\n");
1489 printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", 1489 printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
1490 irqclass, irqclass); 1490 irqclass, irqclass);
1491 print_kernel_version(); 1491 print_kernel_ident();
1492 printk("------------------------------------------------------\n"); 1492 printk("------------------------------------------------------\n");
1493 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", 1493 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
1494 curr->comm, task_pid_nr(curr), 1494 curr->comm, task_pid_nr(curr),
@@ -1717,7 +1717,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
1717 printk("\n"); 1717 printk("\n");
1718 printk("=============================================\n"); 1718 printk("=============================================\n");
1719 printk("[ INFO: possible recursive locking detected ]\n"); 1719 printk("[ INFO: possible recursive locking detected ]\n");
1720 print_kernel_version(); 1720 print_kernel_ident();
1721 printk("---------------------------------------------\n"); 1721 printk("---------------------------------------------\n");
1722 printk("%s/%d is trying to acquire lock:\n", 1722 printk("%s/%d is trying to acquire lock:\n",
1723 curr->comm, task_pid_nr(curr)); 1723 curr->comm, task_pid_nr(curr));
@@ -2224,7 +2224,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
2224 printk("\n"); 2224 printk("\n");
2225 printk("=================================\n"); 2225 printk("=================================\n");
2226 printk("[ INFO: inconsistent lock state ]\n"); 2226 printk("[ INFO: inconsistent lock state ]\n");
2227 print_kernel_version(); 2227 print_kernel_ident();
2228 printk("---------------------------------\n"); 2228 printk("---------------------------------\n");
2229 2229
2230 printk("inconsistent {%s} -> {%s} usage.\n", 2230 printk("inconsistent {%s} -> {%s} usage.\n",
@@ -2289,7 +2289,7 @@ print_irq_inversion_bug(struct task_struct *curr,
2289 printk("\n"); 2289 printk("\n");
2290 printk("=========================================================\n"); 2290 printk("=========================================================\n");
2291 printk("[ INFO: possible irq lock inversion dependency detected ]\n"); 2291 printk("[ INFO: possible irq lock inversion dependency detected ]\n");
2292 print_kernel_version(); 2292 print_kernel_ident();
2293 printk("---------------------------------------------------------\n"); 2293 printk("---------------------------------------------------------\n");
2294 printk("%s/%d just changed the state of lock:\n", 2294 printk("%s/%d just changed the state of lock:\n",
2295 curr->comm, task_pid_nr(curr)); 2295 curr->comm, task_pid_nr(curr));
@@ -3175,6 +3175,7 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3175 printk("\n"); 3175 printk("\n");
3176 printk("=====================================\n"); 3176 printk("=====================================\n");
3177 printk("[ BUG: bad unlock balance detected! ]\n"); 3177 printk("[ BUG: bad unlock balance detected! ]\n");
3178 print_kernel_ident();
3178 printk("-------------------------------------\n"); 3179 printk("-------------------------------------\n");
3179 printk("%s/%d is trying to release lock (", 3180 printk("%s/%d is trying to release lock (",
3180 curr->comm, task_pid_nr(curr)); 3181 curr->comm, task_pid_nr(curr));
@@ -3619,6 +3620,7 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
3619 printk("\n"); 3620 printk("\n");
3620 printk("=================================\n"); 3621 printk("=================================\n");
3621 printk("[ BUG: bad contention detected! ]\n"); 3622 printk("[ BUG: bad contention detected! ]\n");
3623 print_kernel_ident();
3622 printk("---------------------------------\n"); 3624 printk("---------------------------------\n");
3623 printk("%s/%d is trying to contend lock (", 3625 printk("%s/%d is trying to contend lock (",
3624 curr->comm, task_pid_nr(curr)); 3626 curr->comm, task_pid_nr(curr));
@@ -3974,7 +3976,8 @@ void __init lockdep_info(void)
3974 3976
3975#ifdef CONFIG_DEBUG_LOCKDEP 3977#ifdef CONFIG_DEBUG_LOCKDEP
3976 if (lockdep_init_error) { 3978 if (lockdep_init_error) {
3977 printk("WARNING: lockdep init error! Arch code didn't call lockdep_init() early enough?\n"); 3979 printk("WARNING: lockdep init error! lock-%s was acquired"
3980 "before lockdep_init\n", lock_init_error);
3978 printk("Call stack leading to lockdep invocation was:\n"); 3981 printk("Call stack leading to lockdep invocation was:\n");
3979 print_stack_trace(&lockdep_init_trace, 0); 3982 print_stack_trace(&lockdep_init_trace, 0);
3980 } 3983 }
@@ -3993,6 +3996,7 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3993 printk("\n"); 3996 printk("\n");
3994 printk("=========================\n"); 3997 printk("=========================\n");
3995 printk("[ BUG: held lock freed! ]\n"); 3998 printk("[ BUG: held lock freed! ]\n");
3999 print_kernel_ident();
3996 printk("-------------------------\n"); 4000 printk("-------------------------\n");
3997 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", 4001 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
3998 curr->comm, task_pid_nr(curr), mem_from, mem_to-1); 4002 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
@@ -4050,6 +4054,7 @@ static void print_held_locks_bug(struct task_struct *curr)
4050 printk("\n"); 4054 printk("\n");
4051 printk("=====================================\n"); 4055 printk("=====================================\n");
4052 printk("[ BUG: lock held at task exit time! ]\n"); 4056 printk("[ BUG: lock held at task exit time! ]\n");
4057 print_kernel_ident();
4053 printk("-------------------------------------\n"); 4058 printk("-------------------------------------\n");
4054 printk("%s/%d is exiting with locks still held!\n", 4059 printk("%s/%d is exiting with locks still held!\n",
4055 curr->comm, task_pid_nr(curr)); 4060 curr->comm, task_pid_nr(curr));
@@ -4147,6 +4152,7 @@ void lockdep_sys_exit(void)
4147 printk("\n"); 4152 printk("\n");
4148 printk("================================================\n"); 4153 printk("================================================\n");
4149 printk("[ BUG: lock held when returning to user space! ]\n"); 4154 printk("[ BUG: lock held when returning to user space! ]\n");
4155 print_kernel_ident();
4150 printk("------------------------------------------------\n"); 4156 printk("------------------------------------------------\n");
4151 printk("%s/%d is leaving the kernel with locks still held!\n", 4157 printk("%s/%d is leaving the kernel with locks still held!\n",
4152 curr->comm, curr->pid); 4158 curr->comm, curr->pid);
@@ -4166,10 +4172,33 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4166 printk("\n"); 4172 printk("\n");
4167 printk("===============================\n"); 4173 printk("===============================\n");
4168 printk("[ INFO: suspicious RCU usage. ]\n"); 4174 printk("[ INFO: suspicious RCU usage. ]\n");
4175 print_kernel_ident();
4169 printk("-------------------------------\n"); 4176 printk("-------------------------------\n");
4170 printk("%s:%d %s!\n", file, line, s); 4177 printk("%s:%d %s!\n", file, line, s);
4171 printk("\nother info that might help us debug this:\n\n"); 4178 printk("\nother info that might help us debug this:\n\n");
4172 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); 4179 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
4180
4181 /*
4182 * If a CPU is in the RCU-free window in idle (ie: in the section
4183 * between rcu_idle_enter() and rcu_idle_exit(), then RCU
4184 * considers that CPU to be in an "extended quiescent state",
4185 * which means that RCU will be completely ignoring that CPU.
4186 * Therefore, rcu_read_lock() and friends have absolutely no
4187 * effect on a CPU running in that state. In other words, even if
4188 * such an RCU-idle CPU has called rcu_read_lock(), RCU might well
4189 * delete data structures out from under it. RCU really has no
4190 * choice here: we need to keep an RCU-free window in idle where
4191 * the CPU may possibly enter into low power mode. This way we can
4192 * notice an extended quiescent state to other CPUs that started a grace
4193 * period. Otherwise we would delay any grace period as long as we run
4194 * in the idle task.
4195 *
4196 * So complain bitterly if someone does call rcu_read_lock(),
4197 * rcu_read_lock_bh() and so on from extended quiescent states.
4198 */
4199 if (rcu_is_cpu_idle())
4200 printk("RCU used illegally from extended quiescent state!\n");
4201
4173 lockdep_print_held_locks(curr); 4202 lockdep_print_held_locks(curr);
4174 printk("\nstack backtrace:\n"); 4203 printk("\nstack backtrace:\n");
4175 dump_stack(); 4204 dump_stack();
diff --git a/kernel/panic.c b/kernel/panic.c
index b26593604214..3458469eb7c3 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -237,11 +237,20 @@ void add_taint(unsigned flag)
237 * Can't trust the integrity of the kernel anymore. 237 * Can't trust the integrity of the kernel anymore.
238 * We don't call directly debug_locks_off() because the issue 238 * We don't call directly debug_locks_off() because the issue
239 * is not necessarily serious enough to set oops_in_progress to 1 239 * is not necessarily serious enough to set oops_in_progress to 1
240 * Also we want to keep up lockdep for staging development and 240 * Also we want to keep up lockdep for staging/out-of-tree
241 * post-warning case. 241 * development and post-warning case.
242 */ 242 */
243 if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off()) 243 switch (flag) {
244 printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n"); 244 case TAINT_CRAP:
245 case TAINT_OOT_MODULE:
246 case TAINT_WARN:
247 case TAINT_FIRMWARE_WORKAROUND:
248 break;
249
250 default:
251 if (__debug_locks_off())
252 printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
253 }
245 254
246 set_bit(flag, &tainted_mask); 255 set_bit(flag, &tainted_mask);
247} 256}
diff --git a/kernel/printk.c b/kernel/printk.c
index 7982a0a841ea..989e4a52da76 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -199,7 +199,7 @@ void __init setup_log_buf(int early)
199 unsigned long mem; 199 unsigned long mem;
200 200
201 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE); 201 mem = memblock_alloc(new_log_buf_len, PAGE_SIZE);
202 if (mem == MEMBLOCK_ERROR) 202 if (!mem)
203 return; 203 return;
204 new_log_buf = __va(mem); 204 new_log_buf = __va(mem);
205 } else { 205 } else {
@@ -688,6 +688,7 @@ static void zap_locks(void)
688 688
689 oops_timestamp = jiffies; 689 oops_timestamp = jiffies;
690 690
691 debug_locks_off();
691 /* If a crash is occurring, make sure we can't deadlock */ 692 /* If a crash is occurring, make sure we can't deadlock */
692 raw_spin_lock_init(&logbuf_lock); 693 raw_spin_lock_init(&logbuf_lock);
693 /* And make sure that we print immediately */ 694 /* And make sure that we print immediately */
@@ -840,9 +841,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
840 boot_delay_msec(); 841 boot_delay_msec();
841 printk_delay(); 842 printk_delay();
842 843
843 preempt_disable();
844 /* This stops the holder of console_sem just where we want him */ 844 /* This stops the holder of console_sem just where we want him */
845 raw_local_irq_save(flags); 845 local_irq_save(flags);
846 this_cpu = smp_processor_id(); 846 this_cpu = smp_processor_id();
847 847
848 /* 848 /*
@@ -856,7 +856,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
856 * recursion and return - but flag the recursion so that 856 * recursion and return - but flag the recursion so that
857 * it can be printed at the next appropriate moment: 857 * it can be printed at the next appropriate moment:
858 */ 858 */
859 if (!oops_in_progress) { 859 if (!oops_in_progress && !lockdep_recursing(current)) {
860 recursion_bug = 1; 860 recursion_bug = 1;
861 goto out_restore_irqs; 861 goto out_restore_irqs;
862 } 862 }
@@ -962,9 +962,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
962 962
963 lockdep_on(); 963 lockdep_on();
964out_restore_irqs: 964out_restore_irqs:
965 raw_local_irq_restore(flags); 965 local_irq_restore(flags);
966 966
967 preempt_enable();
968 return printed_len; 967 return printed_len;
969} 968}
970EXPORT_SYMBOL(printk); 969EXPORT_SYMBOL(printk);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 24d04477b257..78ab24a7b0e4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -96,9 +96,20 @@ void __ptrace_unlink(struct task_struct *child)
96 */ 96 */
97 if (!(child->flags & PF_EXITING) && 97 if (!(child->flags & PF_EXITING) &&
98 (child->signal->flags & SIGNAL_STOP_STOPPED || 98 (child->signal->flags & SIGNAL_STOP_STOPPED ||
99 child->signal->group_stop_count)) 99 child->signal->group_stop_count)) {
100 child->jobctl |= JOBCTL_STOP_PENDING; 100 child->jobctl |= JOBCTL_STOP_PENDING;
101 101
102 /*
103 * This is only possible if this thread was cloned by the
104 * traced task running in the stopped group, set the signal
105 * for the future reports.
106 * FIXME: we should change ptrace_init_task() to handle this
107 * case.
108 */
109 if (!(child->jobctl & JOBCTL_STOP_SIGMASK))
110 child->jobctl |= SIGSTOP;
111 }
112
102 /* 113 /*
103 * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick 114 * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
104 * @child in the butt. Note that @resume should be used iff @child 115 * @child in the butt. Note that @resume should be used iff @child
diff --git a/kernel/rcu.h b/kernel/rcu.h
index f600868d550d..aa88baab5f78 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -30,6 +30,13 @@
30#endif /* #else #ifdef CONFIG_RCU_TRACE */ 30#endif /* #else #ifdef CONFIG_RCU_TRACE */
31 31
32/* 32/*
33 * Process-level increment to ->dynticks_nesting field. This allows for
34 * architectures that use half-interrupts and half-exceptions from
35 * process context.
36 */
37#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1)
38
39/*
33 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally 40 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
34 * by call_rcu() and rcu callback execution, and are therefore not part of the 41 * by call_rcu() and rcu callback execution, and are therefore not part of the
35 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. 42 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c5b98e565aee..2bc4e135ff23 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -93,6 +93,8 @@ int rcu_read_lock_bh_held(void)
93{ 93{
94 if (!debug_lockdep_rcu_enabled()) 94 if (!debug_lockdep_rcu_enabled())
95 return 1; 95 return 1;
96 if (rcu_is_cpu_idle())
97 return 0;
96 return in_softirq() || irqs_disabled(); 98 return in_softirq() || irqs_disabled();
97} 99}
98EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); 100EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
@@ -316,3 +318,13 @@ struct debug_obj_descr rcuhead_debug_descr = {
316}; 318};
317EXPORT_SYMBOL_GPL(rcuhead_debug_descr); 319EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
318#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 320#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
321
322#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
323void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
324{
325 trace_rcu_torture_read(rcutorturename, rhp);
326}
327EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
328#else
329#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
330#endif
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 636af6d9c6e5..977296dca0a4 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -53,31 +53,137 @@ static void __call_rcu(struct rcu_head *head,
53 53
54#include "rcutiny_plugin.h" 54#include "rcutiny_plugin.h"
55 55
56#ifdef CONFIG_NO_HZ 56static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
57 57
58static long rcu_dynticks_nesting = 1; 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(long long oldval)
60{
61 if (rcu_dynticks_nesting) {
62 RCU_TRACE(trace_rcu_dyntick("--=",
63 oldval, rcu_dynticks_nesting));
64 return;
65 }
66 RCU_TRACE(trace_rcu_dyntick("Start", oldval, rcu_dynticks_nesting));
67 if (!is_idle_task(current)) {
68 struct task_struct *idle = idle_task(smp_processor_id());
69
70 RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
71 oldval, rcu_dynticks_nesting));
72 ftrace_dump(DUMP_ALL);
73 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
74 current->pid, current->comm,
75 idle->pid, idle->comm); /* must be idle task! */
76 }
77 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
78}
59 79
60/* 80/*
61 * Enter dynticks-idle mode, which is an extended quiescent state 81 * Enter idle, which is an extended quiescent state if we have fully
62 * if we have fully entered that mode (i.e., if the new value of 82 * entered that mode (i.e., if the new value of dynticks_nesting is zero).
63 * dynticks_nesting is zero).
64 */ 83 */
65void rcu_enter_nohz(void) 84void rcu_idle_enter(void)
66{ 85{
67 if (--rcu_dynticks_nesting == 0) 86 unsigned long flags;
68 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 87 long long oldval;
88
89 local_irq_save(flags);
90 oldval = rcu_dynticks_nesting;
91 rcu_dynticks_nesting = 0;
92 rcu_idle_enter_common(oldval);
93 local_irq_restore(flags);
69} 94}
70 95
71/* 96/*
72 * Exit dynticks-idle mode, so that we are no longer in an extended 97 * Exit an interrupt handler towards idle.
73 * quiescent state.
74 */ 98 */
75void rcu_exit_nohz(void) 99void rcu_irq_exit(void)
100{
101 unsigned long flags;
102 long long oldval;
103
104 local_irq_save(flags);
105 oldval = rcu_dynticks_nesting;
106 rcu_dynticks_nesting--;
107 WARN_ON_ONCE(rcu_dynticks_nesting < 0);
108 rcu_idle_enter_common(oldval);
109 local_irq_restore(flags);
110}
111
112/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
113static void rcu_idle_exit_common(long long oldval)
76{ 114{
115 if (oldval) {
116 RCU_TRACE(trace_rcu_dyntick("++=",
117 oldval, rcu_dynticks_nesting));
118 return;
119 }
120 RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting));
121 if (!is_idle_task(current)) {
122 struct task_struct *idle = idle_task(smp_processor_id());
123
124 RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
125 oldval, rcu_dynticks_nesting));
126 ftrace_dump(DUMP_ALL);
127 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
128 current->pid, current->comm,
129 idle->pid, idle->comm); /* must be idle task! */
130 }
131}
132
133/*
134 * Exit idle, so that we are no longer in an extended quiescent state.
135 */
136void rcu_idle_exit(void)
137{
138 unsigned long flags;
139 long long oldval;
140
141 local_irq_save(flags);
142 oldval = rcu_dynticks_nesting;
143 WARN_ON_ONCE(oldval != 0);
144 rcu_dynticks_nesting = DYNTICK_TASK_NESTING;
145 rcu_idle_exit_common(oldval);
146 local_irq_restore(flags);
147}
148
149/*
150 * Enter an interrupt handler, moving away from idle.
151 */
152void rcu_irq_enter(void)
153{
154 unsigned long flags;
155 long long oldval;
156
157 local_irq_save(flags);
158 oldval = rcu_dynticks_nesting;
77 rcu_dynticks_nesting++; 159 rcu_dynticks_nesting++;
160 WARN_ON_ONCE(rcu_dynticks_nesting == 0);
161 rcu_idle_exit_common(oldval);
162 local_irq_restore(flags);
163}
164
165#ifdef CONFIG_PROVE_RCU
166
167/*
168 * Test whether RCU thinks that the current CPU is idle.
169 */
170int rcu_is_cpu_idle(void)
171{
172 return !rcu_dynticks_nesting;
78} 173}
174EXPORT_SYMBOL(rcu_is_cpu_idle);
175
176#endif /* #ifdef CONFIG_PROVE_RCU */
79 177
80#endif /* #ifdef CONFIG_NO_HZ */ 178/*
179 * Test whether the current CPU was interrupted from idle. Nested
180 * interrupts don't count, we must be running at the first interrupt
181 * level.
182 */
183int rcu_is_cpu_rrupt_from_idle(void)
184{
185 return rcu_dynticks_nesting <= 0;
186}
81 187
82/* 188/*
83 * Helper function for rcu_sched_qs() and rcu_bh_qs(). 189 * Helper function for rcu_sched_qs() and rcu_bh_qs().
@@ -126,14 +232,13 @@ void rcu_bh_qs(int cpu)
126 232
127/* 233/*
128 * Check to see if the scheduling-clock interrupt came from an extended 234 * Check to see if the scheduling-clock interrupt came from an extended
129 * quiescent state, and, if so, tell RCU about it. 235 * quiescent state, and, if so, tell RCU about it. This function must
236 * be called from hardirq context. It is normally called from the
237 * scheduling-clock interrupt.
130 */ 238 */
131void rcu_check_callbacks(int cpu, int user) 239void rcu_check_callbacks(int cpu, int user)
132{ 240{
133 if (user || 241 if (user || rcu_is_cpu_rrupt_from_idle())
134 (idle_cpu(cpu) &&
135 !in_softirq() &&
136 hardirq_count() <= (1 << HARDIRQ_SHIFT)))
137 rcu_sched_qs(cpu); 242 rcu_sched_qs(cpu);
138 else if (!in_softirq()) 243 else if (!in_softirq())
139 rcu_bh_qs(cpu); 244 rcu_bh_qs(cpu);
@@ -154,7 +259,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
154 /* If no RCU callbacks ready to invoke, just return. */ 259 /* If no RCU callbacks ready to invoke, just return. */
155 if (&rcp->rcucblist == rcp->donetail) { 260 if (&rcp->rcucblist == rcp->donetail) {
156 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); 261 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
157 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); 262 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
263 ACCESS_ONCE(rcp->rcucblist),
264 need_resched(),
265 is_idle_task(current),
266 rcu_is_callbacks_kthread()));
158 return; 267 return;
159 } 268 }
160 269
@@ -183,7 +292,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
183 RCU_TRACE(cb_count++); 292 RCU_TRACE(cb_count++);
184 } 293 }
185 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); 294 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
186 RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); 295 RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
296 is_idle_task(current),
297 rcu_is_callbacks_kthread()));
187} 298}
188 299
189static void rcu_process_callbacks(struct softirq_action *unused) 300static void rcu_process_callbacks(struct softirq_action *unused)
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 2b0484a5dc28..9cb1ae4aabdd 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -312,8 +312,8 @@ static int rcu_boost(void)
312 rt_mutex_lock(&mtx); 312 rt_mutex_lock(&mtx);
313 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 313 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
314 314
315 return rcu_preempt_ctrlblk.boost_tasks != NULL || 315 return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
316 rcu_preempt_ctrlblk.exp_tasks != NULL; 316 ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
317} 317}
318 318
319/* 319/*
@@ -885,6 +885,19 @@ static void invoke_rcu_callbacks(void)
885 wake_up(&rcu_kthread_wq); 885 wake_up(&rcu_kthread_wq);
886} 886}
887 887
888#ifdef CONFIG_RCU_TRACE
889
890/*
891 * Is the current CPU running the RCU-callbacks kthread?
892 * Caller must have preemption disabled.
893 */
894static bool rcu_is_callbacks_kthread(void)
895{
896 return rcu_kthread_task == current;
897}
898
899#endif /* #ifdef CONFIG_RCU_TRACE */
900
888/* 901/*
889 * This kthread invokes RCU callbacks whose grace periods have 902 * This kthread invokes RCU callbacks whose grace periods have
890 * elapsed. It is awakened as needed, and takes the place of the 903 * elapsed. It is awakened as needed, and takes the place of the
@@ -938,6 +951,18 @@ void invoke_rcu_callbacks(void)
938 raise_softirq(RCU_SOFTIRQ); 951 raise_softirq(RCU_SOFTIRQ);
939} 952}
940 953
954#ifdef CONFIG_RCU_TRACE
955
956/*
957 * There is no callback kthread, so this thread is never it.
958 */
959static bool rcu_is_callbacks_kthread(void)
960{
961 return false;
962}
963
964#endif /* #ifdef CONFIG_RCU_TRACE */
965
941void rcu_init(void) 966void rcu_init(void)
942{ 967{
943 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 968 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 764825c2685c..88f17b8a3b1d 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -61,9 +61,11 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
61static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ 61static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
62static int stutter = 5; /* Start/stop testing interval (in sec) */ 62static int stutter = 5; /* Start/stop testing interval (in sec) */
63static int irqreader = 1; /* RCU readers from irq (timers). */ 63static int irqreader = 1; /* RCU readers from irq (timers). */
64static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ 64static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
65static int fqs_holdoff = 0; /* Hold time within burst (us). */ 65static int fqs_holdoff; /* Hold time within burst (us). */
66static int fqs_stutter = 3; /* Wait time between bursts (s). */ 66static int fqs_stutter = 3; /* Wait time between bursts (s). */
67static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
68static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
67static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ 69static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
68static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ 70static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
69static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ 71static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
@@ -91,6 +93,10 @@ module_param(fqs_holdoff, int, 0444);
91MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); 93MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
92module_param(fqs_stutter, int, 0444); 94module_param(fqs_stutter, int, 0444);
93MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); 95MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
96module_param(onoff_interval, int, 0444);
97MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
98module_param(shutdown_secs, int, 0444);
99MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable.");
94module_param(test_boost, int, 0444); 100module_param(test_boost, int, 0444);
95MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); 101MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
96module_param(test_boost_interval, int, 0444); 102module_param(test_boost_interval, int, 0444);
@@ -119,6 +125,10 @@ static struct task_struct *shuffler_task;
119static struct task_struct *stutter_task; 125static struct task_struct *stutter_task;
120static struct task_struct *fqs_task; 126static struct task_struct *fqs_task;
121static struct task_struct *boost_tasks[NR_CPUS]; 127static struct task_struct *boost_tasks[NR_CPUS];
128static struct task_struct *shutdown_task;
129#ifdef CONFIG_HOTPLUG_CPU
130static struct task_struct *onoff_task;
131#endif /* #ifdef CONFIG_HOTPLUG_CPU */
122 132
123#define RCU_TORTURE_PIPE_LEN 10 133#define RCU_TORTURE_PIPE_LEN 10
124 134
@@ -149,6 +159,10 @@ static long n_rcu_torture_boost_rterror;
149static long n_rcu_torture_boost_failure; 159static long n_rcu_torture_boost_failure;
150static long n_rcu_torture_boosts; 160static long n_rcu_torture_boosts;
151static long n_rcu_torture_timers; 161static long n_rcu_torture_timers;
162static long n_offline_attempts;
163static long n_offline_successes;
164static long n_online_attempts;
165static long n_online_successes;
152static struct list_head rcu_torture_removed; 166static struct list_head rcu_torture_removed;
153static cpumask_var_t shuffle_tmp_mask; 167static cpumask_var_t shuffle_tmp_mask;
154 168
@@ -160,6 +174,8 @@ static int stutter_pause_test;
160#define RCUTORTURE_RUNNABLE_INIT 0 174#define RCUTORTURE_RUNNABLE_INIT 0
161#endif 175#endif
162int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 176int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
177module_param(rcutorture_runnable, int, 0444);
178MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
163 179
164#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) 180#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
165#define rcu_can_boost() 1 181#define rcu_can_boost() 1
@@ -167,6 +183,7 @@ int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
167#define rcu_can_boost() 0 183#define rcu_can_boost() 0
168#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ 184#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
169 185
186static unsigned long shutdown_time; /* jiffies to system shutdown. */
170static unsigned long boost_starttime; /* jiffies of next boost test start. */ 187static unsigned long boost_starttime; /* jiffies of next boost test start. */
171DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 188DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
172 /* and boost task create/destroy. */ 189 /* and boost task create/destroy. */
@@ -182,6 +199,9 @@ static int fullstop = FULLSTOP_RMMOD;
182 */ 199 */
183static DEFINE_MUTEX(fullstop_mutex); 200static DEFINE_MUTEX(fullstop_mutex);
184 201
202/* Forward reference. */
203static void rcu_torture_cleanup(void);
204
185/* 205/*
186 * Detect and respond to a system shutdown. 206 * Detect and respond to a system shutdown.
187 */ 207 */
@@ -612,6 +632,30 @@ static struct rcu_torture_ops srcu_ops = {
612 .name = "srcu" 632 .name = "srcu"
613}; 633};
614 634
635static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl)
636{
637 return srcu_read_lock_raw(&srcu_ctl);
638}
639
640static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
641{
642 srcu_read_unlock_raw(&srcu_ctl, idx);
643}
644
645static struct rcu_torture_ops srcu_raw_ops = {
646 .init = srcu_torture_init,
647 .cleanup = srcu_torture_cleanup,
648 .readlock = srcu_torture_read_lock_raw,
649 .read_delay = srcu_read_delay,
650 .readunlock = srcu_torture_read_unlock_raw,
651 .completed = srcu_torture_completed,
652 .deferred_free = rcu_sync_torture_deferred_free,
653 .sync = srcu_torture_synchronize,
654 .cb_barrier = NULL,
655 .stats = srcu_torture_stats,
656 .name = "srcu_raw"
657};
658
615static void srcu_torture_synchronize_expedited(void) 659static void srcu_torture_synchronize_expedited(void)
616{ 660{
617 synchronize_srcu_expedited(&srcu_ctl); 661 synchronize_srcu_expedited(&srcu_ctl);
@@ -913,6 +957,18 @@ rcu_torture_fakewriter(void *arg)
913 return 0; 957 return 0;
914} 958}
915 959
960void rcutorture_trace_dump(void)
961{
962 static atomic_t beenhere = ATOMIC_INIT(0);
963
964 if (atomic_read(&beenhere))
965 return;
966 if (atomic_xchg(&beenhere, 1) != 0)
967 return;
968 do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
969 ftrace_dump(DUMP_ALL);
970}
971
916/* 972/*
917 * RCU torture reader from timer handler. Dereferences rcu_torture_current, 973 * RCU torture reader from timer handler. Dereferences rcu_torture_current,
918 * incrementing the corresponding element of the pipeline array. The 974 * incrementing the corresponding element of the pipeline array. The
@@ -934,6 +990,7 @@ static void rcu_torture_timer(unsigned long unused)
934 rcu_read_lock_bh_held() || 990 rcu_read_lock_bh_held() ||
935 rcu_read_lock_sched_held() || 991 rcu_read_lock_sched_held() ||
936 srcu_read_lock_held(&srcu_ctl)); 992 srcu_read_lock_held(&srcu_ctl));
993 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
937 if (p == NULL) { 994 if (p == NULL) {
938 /* Leave because rcu_torture_writer is not yet underway */ 995 /* Leave because rcu_torture_writer is not yet underway */
939 cur_ops->readunlock(idx); 996 cur_ops->readunlock(idx);
@@ -951,6 +1008,8 @@ static void rcu_torture_timer(unsigned long unused)
951 /* Should not happen, but... */ 1008 /* Should not happen, but... */
952 pipe_count = RCU_TORTURE_PIPE_LEN; 1009 pipe_count = RCU_TORTURE_PIPE_LEN;
953 } 1010 }
1011 if (pipe_count > 1)
1012 rcutorture_trace_dump();
954 __this_cpu_inc(rcu_torture_count[pipe_count]); 1013 __this_cpu_inc(rcu_torture_count[pipe_count]);
955 completed = cur_ops->completed() - completed; 1014 completed = cur_ops->completed() - completed;
956 if (completed > RCU_TORTURE_PIPE_LEN) { 1015 if (completed > RCU_TORTURE_PIPE_LEN) {
@@ -994,6 +1053,7 @@ rcu_torture_reader(void *arg)
994 rcu_read_lock_bh_held() || 1053 rcu_read_lock_bh_held() ||
995 rcu_read_lock_sched_held() || 1054 rcu_read_lock_sched_held() ||
996 srcu_read_lock_held(&srcu_ctl)); 1055 srcu_read_lock_held(&srcu_ctl));
1056 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
997 if (p == NULL) { 1057 if (p == NULL) {
998 /* Wait for rcu_torture_writer to get underway */ 1058 /* Wait for rcu_torture_writer to get underway */
999 cur_ops->readunlock(idx); 1059 cur_ops->readunlock(idx);
@@ -1009,6 +1069,8 @@ rcu_torture_reader(void *arg)
1009 /* Should not happen, but... */ 1069 /* Should not happen, but... */
1010 pipe_count = RCU_TORTURE_PIPE_LEN; 1070 pipe_count = RCU_TORTURE_PIPE_LEN;
1011 } 1071 }
1072 if (pipe_count > 1)
1073 rcutorture_trace_dump();
1012 __this_cpu_inc(rcu_torture_count[pipe_count]); 1074 __this_cpu_inc(rcu_torture_count[pipe_count]);
1013 completed = cur_ops->completed() - completed; 1075 completed = cur_ops->completed() - completed;
1014 if (completed > RCU_TORTURE_PIPE_LEN) { 1076 if (completed > RCU_TORTURE_PIPE_LEN) {
@@ -1056,7 +1118,8 @@ rcu_torture_printk(char *page)
1056 cnt += sprintf(&page[cnt], 1118 cnt += sprintf(&page[cnt],
1057 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " 1119 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d "
1058 "rtmbe: %d rtbke: %ld rtbre: %ld " 1120 "rtmbe: %d rtbke: %ld rtbre: %ld "
1059 "rtbf: %ld rtb: %ld nt: %ld", 1121 "rtbf: %ld rtb: %ld nt: %ld "
1122 "onoff: %ld/%ld:%ld/%ld",
1060 rcu_torture_current, 1123 rcu_torture_current,
1061 rcu_torture_current_version, 1124 rcu_torture_current_version,
1062 list_empty(&rcu_torture_freelist), 1125 list_empty(&rcu_torture_freelist),
@@ -1068,7 +1131,11 @@ rcu_torture_printk(char *page)
1068 n_rcu_torture_boost_rterror, 1131 n_rcu_torture_boost_rterror,
1069 n_rcu_torture_boost_failure, 1132 n_rcu_torture_boost_failure,
1070 n_rcu_torture_boosts, 1133 n_rcu_torture_boosts,
1071 n_rcu_torture_timers); 1134 n_rcu_torture_timers,
1135 n_online_successes,
1136 n_online_attempts,
1137 n_offline_successes,
1138 n_offline_attempts);
1072 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1139 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1073 n_rcu_torture_boost_ktrerror != 0 || 1140 n_rcu_torture_boost_ktrerror != 0 ||
1074 n_rcu_torture_boost_rterror != 0 || 1141 n_rcu_torture_boost_rterror != 0 ||
@@ -1232,12 +1299,14 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
1232 "shuffle_interval=%d stutter=%d irqreader=%d " 1299 "shuffle_interval=%d stutter=%d irqreader=%d "
1233 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " 1300 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
1234 "test_boost=%d/%d test_boost_interval=%d " 1301 "test_boost=%d/%d test_boost_interval=%d "
1235 "test_boost_duration=%d\n", 1302 "test_boost_duration=%d shutdown_secs=%d "
1303 "onoff_interval=%d\n",
1236 torture_type, tag, nrealreaders, nfakewriters, 1304 torture_type, tag, nrealreaders, nfakewriters,
1237 stat_interval, verbose, test_no_idle_hz, shuffle_interval, 1305 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
1238 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, 1306 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
1239 test_boost, cur_ops->can_boost, 1307 test_boost, cur_ops->can_boost,
1240 test_boost_interval, test_boost_duration); 1308 test_boost_interval, test_boost_duration, shutdown_secs,
1309 onoff_interval);
1241} 1310}
1242 1311
1243static struct notifier_block rcutorture_shutdown_nb = { 1312static struct notifier_block rcutorture_shutdown_nb = {
@@ -1287,6 +1356,131 @@ static int rcutorture_booster_init(int cpu)
1287 return 0; 1356 return 0;
1288} 1357}
1289 1358
1359/*
1360 * Cause the rcutorture test to shutdown the system after the test has
1361 * run for the time specified by the shutdown_secs module parameter.
1362 */
1363static int
1364rcu_torture_shutdown(void *arg)
1365{
1366 long delta;
1367 unsigned long jiffies_snap;
1368
1369 VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started");
1370 jiffies_snap = ACCESS_ONCE(jiffies);
1371 while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
1372 !kthread_should_stop()) {
1373 delta = shutdown_time - jiffies_snap;
1374 if (verbose)
1375 printk(KERN_ALERT "%s" TORTURE_FLAG
1376 "rcu_torture_shutdown task: %lu "
1377 "jiffies remaining\n",
1378 torture_type, delta);
1379 schedule_timeout_interruptible(delta);
1380 jiffies_snap = ACCESS_ONCE(jiffies);
1381 }
1382 if (kthread_should_stop()) {
1383 VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping");
1384 return 0;
1385 }
1386
1387 /* OK, shut down the system. */
1388
1389 VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system");
1390 shutdown_task = NULL; /* Avoid self-kill deadlock. */
1391 rcu_torture_cleanup(); /* Get the success/failure message. */
1392 kernel_power_off(); /* Shut down the system. */
1393 return 0;
1394}
1395
1396#ifdef CONFIG_HOTPLUG_CPU
1397
1398/*
1399 * Execute random CPU-hotplug operations at the interval specified
1400 * by the onoff_interval.
1401 */
1402static int
1403rcu_torture_onoff(void *arg)
1404{
1405 int cpu;
1406 int maxcpu = -1;
1407 DEFINE_RCU_RANDOM(rand);
1408
1409 VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
1410 for_each_online_cpu(cpu)
1411 maxcpu = cpu;
1412 WARN_ON(maxcpu < 0);
1413 while (!kthread_should_stop()) {
1414 cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
1415 if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
1416 if (verbose)
1417 printk(KERN_ALERT "%s" TORTURE_FLAG
1418 "rcu_torture_onoff task: offlining %d\n",
1419 torture_type, cpu);
1420 n_offline_attempts++;
1421 if (cpu_down(cpu) == 0) {
1422 if (verbose)
1423 printk(KERN_ALERT "%s" TORTURE_FLAG
1424 "rcu_torture_onoff task: "
1425 "offlined %d\n",
1426 torture_type, cpu);
1427 n_offline_successes++;
1428 }
1429 } else if (cpu_is_hotpluggable(cpu)) {
1430 if (verbose)
1431 printk(KERN_ALERT "%s" TORTURE_FLAG
1432 "rcu_torture_onoff task: onlining %d\n",
1433 torture_type, cpu);
1434 n_online_attempts++;
1435 if (cpu_up(cpu) == 0) {
1436 if (verbose)
1437 printk(KERN_ALERT "%s" TORTURE_FLAG
1438 "rcu_torture_onoff task: "
1439 "onlined %d\n",
1440 torture_type, cpu);
1441 n_online_successes++;
1442 }
1443 }
1444 schedule_timeout_interruptible(onoff_interval * HZ);
1445 }
1446 VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping");
1447 return 0;
1448}
1449
1450static int
1451rcu_torture_onoff_init(void)
1452{
1453 if (onoff_interval <= 0)
1454 return 0;
1455 onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
1456 if (IS_ERR(onoff_task)) {
1457 onoff_task = NULL;
1458 return PTR_ERR(onoff_task);
1459 }
1460 return 0;
1461}
1462
1463static void rcu_torture_onoff_cleanup(void)
1464{
1465 if (onoff_task == NULL)
1466 return;
1467 VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task");
1468 kthread_stop(onoff_task);
1469}
1470
1471#else /* #ifdef CONFIG_HOTPLUG_CPU */
1472
1473static void
1474rcu_torture_onoff_init(void)
1475{
1476}
1477
1478static void rcu_torture_onoff_cleanup(void)
1479{
1480}
1481
1482#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
1483
1290static int rcutorture_cpu_notify(struct notifier_block *self, 1484static int rcutorture_cpu_notify(struct notifier_block *self,
1291 unsigned long action, void *hcpu) 1485 unsigned long action, void *hcpu)
1292{ 1486{
@@ -1391,6 +1585,11 @@ rcu_torture_cleanup(void)
1391 for_each_possible_cpu(i) 1585 for_each_possible_cpu(i)
1392 rcutorture_booster_cleanup(i); 1586 rcutorture_booster_cleanup(i);
1393 } 1587 }
1588 if (shutdown_task != NULL) {
1589 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task");
1590 kthread_stop(shutdown_task);
1591 }
1592 rcu_torture_onoff_cleanup();
1394 1593
1395 /* Wait for all RCU callbacks to fire. */ 1594 /* Wait for all RCU callbacks to fire. */
1396 1595
@@ -1416,7 +1615,7 @@ rcu_torture_init(void)
1416 static struct rcu_torture_ops *torture_ops[] = 1615 static struct rcu_torture_ops *torture_ops[] =
1417 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1616 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1418 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, 1617 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1419 &srcu_ops, &srcu_expedited_ops, 1618 &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops,
1420 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1619 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1421 1620
1422 mutex_lock(&fullstop_mutex); 1621 mutex_lock(&fullstop_mutex);
@@ -1607,6 +1806,18 @@ rcu_torture_init(void)
1607 } 1806 }
1608 } 1807 }
1609 } 1808 }
1809 if (shutdown_secs > 0) {
1810 shutdown_time = jiffies + shutdown_secs * HZ;
1811 shutdown_task = kthread_run(rcu_torture_shutdown, NULL,
1812 "rcu_torture_shutdown");
1813 if (IS_ERR(shutdown_task)) {
1814 firsterr = PTR_ERR(shutdown_task);
1815 VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown");
1816 shutdown_task = NULL;
1817 goto unwind;
1818 }
1819 }
1820 rcu_torture_onoff_init();
1610 register_reboot_notifier(&rcutorture_shutdown_nb); 1821 register_reboot_notifier(&rcutorture_shutdown_nb);
1611 rcutorture_record_test_transition(); 1822 rcutorture_record_test_transition();
1612 mutex_unlock(&fullstop_mutex); 1823 mutex_unlock(&fullstop_mutex);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 6b76d812740c..6c4a6722abfd 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -69,7 +69,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
69 NUM_RCU_LVL_3, \ 69 NUM_RCU_LVL_3, \
70 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ 70 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
71 }, \ 71 }, \
72 .signaled = RCU_GP_IDLE, \ 72 .fqs_state = RCU_GP_IDLE, \
73 .gpnum = -300, \ 73 .gpnum = -300, \
74 .completed = -300, \ 74 .completed = -300, \
75 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ 75 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)
195} 195}
196EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 197
198#ifdef CONFIG_NO_HZ
199DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 198DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
200 .dynticks_nesting = 1, 199 .dynticks_nesting = DYNTICK_TASK_NESTING,
201 .dynticks = ATOMIC_INIT(1), 200 .dynticks = ATOMIC_INIT(1),
202}; 201};
203#endif /* #ifdef CONFIG_NO_HZ */
204 202
205static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 203static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
206static int qhimark = 10000; /* If this many pending, ignore blimit. */ 204static int qhimark = 10000; /* If this many pending, ignore blimit. */
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
328 return 1; 326 return 1;
329 } 327 }
330 328
331 /* If preemptible RCU, no point in sending reschedule IPI. */ 329 /*
332 if (rdp->preemptible) 330 * The CPU is online, so send it a reschedule IPI. This forces
333 return 0; 331 * it through the scheduler, and (inefficiently) also handles cases
334 332 * where idle loops fail to inform RCU about the CPU being idle.
335 /* The CPU is online, so send it a reschedule IPI. */ 333 */
336 if (rdp->cpu != smp_processor_id()) 334 if (rdp->cpu != smp_processor_id())
337 smp_send_reschedule(rdp->cpu); 335 smp_send_reschedule(rdp->cpu);
338 else 336 else
@@ -343,59 +341,181 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
343 341
344#endif /* #ifdef CONFIG_SMP */ 342#endif /* #ifdef CONFIG_SMP */
345 343
346#ifdef CONFIG_NO_HZ 344/*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 *
347 * If the new value of the ->dynticks_nesting counter now is zero,
348 * we really have entered idle, and must do the appropriate accounting.
349 * The caller must have disabled interrupts.
350 */
351static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
352{
353 trace_rcu_dyntick("Start", oldval, 0);
354 if (!is_idle_task(current)) {
355 struct task_struct *idle = idle_task(smp_processor_id());
356
357 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
358 ftrace_dump(DUMP_ALL);
359 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
360 current->pid, current->comm,
361 idle->pid, idle->comm); /* must be idle task! */
362 }
363 rcu_prepare_for_idle(smp_processor_id());
364 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
365 smp_mb__before_atomic_inc(); /* See above. */
366 atomic_inc(&rdtp->dynticks);
367 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
368 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
369}
347 370
348/** 371/**
349 * rcu_enter_nohz - inform RCU that current CPU is entering nohz 372 * rcu_idle_enter - inform RCU that current CPU is entering idle
350 * 373 *
351 * Enter nohz mode, in other words, -leave- the mode in which RCU 374 * Enter idle mode, in other words, -leave- the mode in which RCU
352 * read-side critical sections can occur. (Though RCU read-side 375 * read-side critical sections can occur. (Though RCU read-side
353 * critical sections can occur in irq handlers in nohz mode, a possibility 376 * critical sections can occur in irq handlers in idle, a possibility
354 * handled by rcu_irq_enter() and rcu_irq_exit()). 377 * handled by irq_enter() and irq_exit().)
378 *
379 * We crowbar the ->dynticks_nesting field to zero to allow for
380 * the possibility of usermode upcalls having messed up our count
381 * of interrupt nesting level during the prior busy period.
355 */ 382 */
356void rcu_enter_nohz(void) 383void rcu_idle_enter(void)
357{ 384{
358 unsigned long flags; 385 unsigned long flags;
386 long long oldval;
359 struct rcu_dynticks *rdtp; 387 struct rcu_dynticks *rdtp;
360 388
361 local_irq_save(flags); 389 local_irq_save(flags);
362 rdtp = &__get_cpu_var(rcu_dynticks); 390 rdtp = &__get_cpu_var(rcu_dynticks);
363 if (--rdtp->dynticks_nesting) { 391 oldval = rdtp->dynticks_nesting;
364 local_irq_restore(flags); 392 rdtp->dynticks_nesting = 0;
365 return; 393 rcu_idle_enter_common(rdtp, oldval);
366 }
367 trace_rcu_dyntick("Start");
368 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
369 smp_mb__before_atomic_inc(); /* See above. */
370 atomic_inc(&rdtp->dynticks);
371 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
372 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
373 local_irq_restore(flags); 394 local_irq_restore(flags);
374} 395}
375 396
376/* 397/**
377 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz 398 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
399 *
400 * Exit from an interrupt handler, which might possibly result in entering
401 * idle mode, in other words, leaving the mode in which read-side critical
402 * sections can occur.
378 * 403 *
379 * Exit nohz mode, in other words, -enter- the mode in which RCU 404 * This code assumes that the idle loop never does anything that might
380 * read-side critical sections normally occur. 405 * result in unbalanced calls to irq_enter() and irq_exit(). If your
406 * architecture violates this assumption, RCU will give you what you
407 * deserve, good and hard. But very infrequently and irreproducibly.
408 *
409 * Use things like work queues to work around this limitation.
410 *
411 * You have been warned.
381 */ 412 */
382void rcu_exit_nohz(void) 413void rcu_irq_exit(void)
383{ 414{
384 unsigned long flags; 415 unsigned long flags;
416 long long oldval;
385 struct rcu_dynticks *rdtp; 417 struct rcu_dynticks *rdtp;
386 418
387 local_irq_save(flags); 419 local_irq_save(flags);
388 rdtp = &__get_cpu_var(rcu_dynticks); 420 rdtp = &__get_cpu_var(rcu_dynticks);
389 if (rdtp->dynticks_nesting++) { 421 oldval = rdtp->dynticks_nesting;
390 local_irq_restore(flags); 422 rdtp->dynticks_nesting--;
391 return; 423 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
392 } 424 if (rdtp->dynticks_nesting)
425 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
426 else
427 rcu_idle_enter_common(rdtp, oldval);
428 local_irq_restore(flags);
429}
430
431/*
432 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
433 *
434 * If the new value of the ->dynticks_nesting counter was previously zero,
435 * we really have exited idle, and must do the appropriate accounting.
436 * The caller must have disabled interrupts.
437 */
438static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
439{
393 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 440 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
394 atomic_inc(&rdtp->dynticks); 441 atomic_inc(&rdtp->dynticks);
395 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 442 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
396 smp_mb__after_atomic_inc(); /* See above. */ 443 smp_mb__after_atomic_inc(); /* See above. */
397 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 444 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
398 trace_rcu_dyntick("End"); 445 rcu_cleanup_after_idle(smp_processor_id());
446 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
447 if (!is_idle_task(current)) {
448 struct task_struct *idle = idle_task(smp_processor_id());
449
450 trace_rcu_dyntick("Error on exit: not idle task",
451 oldval, rdtp->dynticks_nesting);
452 ftrace_dump(DUMP_ALL);
453 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
454 current->pid, current->comm,
455 idle->pid, idle->comm); /* must be idle task! */
456 }
457}
458
459/**
460 * rcu_idle_exit - inform RCU that current CPU is leaving idle
461 *
462 * Exit idle mode, in other words, -enter- the mode in which RCU
463 * read-side critical sections can occur.
464 *
465 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to
466 * allow for the possibility of usermode upcalls messing up our count
467 * of interrupt nesting level during the busy period that is just
468 * now starting.
469 */
470void rcu_idle_exit(void)
471{
472 unsigned long flags;
473 struct rcu_dynticks *rdtp;
474 long long oldval;
475
476 local_irq_save(flags);
477 rdtp = &__get_cpu_var(rcu_dynticks);
478 oldval = rdtp->dynticks_nesting;
479 WARN_ON_ONCE(oldval != 0);
480 rdtp->dynticks_nesting = DYNTICK_TASK_NESTING;
481 rcu_idle_exit_common(rdtp, oldval);
482 local_irq_restore(flags);
483}
484
485/**
486 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
487 *
488 * Enter an interrupt handler, which might possibly result in exiting
489 * idle mode, in other words, entering the mode in which read-side critical
490 * sections can occur.
491 *
492 * Note that the Linux kernel is fully capable of entering an interrupt
493 * handler that it never exits, for example when doing upcalls to
494 * user mode! This code assumes that the idle loop never does upcalls to
495 * user mode. If your architecture does do upcalls from the idle loop (or
496 * does anything else that results in unbalanced calls to the irq_enter()
497 * and irq_exit() functions), RCU will give you what you deserve, good
498 * and hard. But very infrequently and irreproducibly.
499 *
500 * Use things like work queues to work around this limitation.
501 *
502 * You have been warned.
503 */
504void rcu_irq_enter(void)
505{
506 unsigned long flags;
507 struct rcu_dynticks *rdtp;
508 long long oldval;
509
510 local_irq_save(flags);
511 rdtp = &__get_cpu_var(rcu_dynticks);
512 oldval = rdtp->dynticks_nesting;
513 rdtp->dynticks_nesting++;
514 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
515 if (oldval)
516 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
517 else
518 rcu_idle_exit_common(rdtp, oldval);
399 local_irq_restore(flags); 519 local_irq_restore(flags);
400} 520}
401 521
@@ -442,27 +562,37 @@ void rcu_nmi_exit(void)
442 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 562 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
443} 563}
444 564
565#ifdef CONFIG_PROVE_RCU
566
445/** 567/**
446 * rcu_irq_enter - inform RCU of entry to hard irq context 568 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
447 * 569 *
448 * If the CPU was idle with dynamic ticks active, this updates the 570 * If the current CPU is in its idle loop and is neither in an interrupt
449 * rdtp->dynticks to let the RCU handling know that the CPU is active. 571 * or NMI handler, return true.
450 */ 572 */
451void rcu_irq_enter(void) 573int rcu_is_cpu_idle(void)
452{ 574{
453 rcu_exit_nohz(); 575 int ret;
576
577 preempt_disable();
578 ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
579 preempt_enable();
580 return ret;
454} 581}
582EXPORT_SYMBOL(rcu_is_cpu_idle);
583
584#endif /* #ifdef CONFIG_PROVE_RCU */
455 585
456/** 586/**
457 * rcu_irq_exit - inform RCU of exit from hard irq context 587 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
458 * 588 *
459 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks 589 * If the current CPU is idle or running at a first-level (not nested)
460 * to put let the RCU handling be aware that the CPU is going back to idle 590 * interrupt from idle, return true. The caller must have at least
461 * with no ticks. 591 * disabled preemption.
462 */ 592 */
463void rcu_irq_exit(void) 593int rcu_is_cpu_rrupt_from_idle(void)
464{ 594{
465 rcu_enter_nohz(); 595 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
466} 596}
467 597
468#ifdef CONFIG_SMP 598#ifdef CONFIG_SMP
@@ -475,7 +605,7 @@ void rcu_irq_exit(void)
475static int dyntick_save_progress_counter(struct rcu_data *rdp) 605static int dyntick_save_progress_counter(struct rcu_data *rdp)
476{ 606{
477 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 607 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
478 return 0; 608 return (rdp->dynticks_snap & 0x1) == 0;
479} 609}
480 610
481/* 611/*
@@ -512,26 +642,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
512 642
513#endif /* #ifdef CONFIG_SMP */ 643#endif /* #ifdef CONFIG_SMP */
514 644
515#else /* #ifdef CONFIG_NO_HZ */
516
517#ifdef CONFIG_SMP
518
519static int dyntick_save_progress_counter(struct rcu_data *rdp)
520{
521 return 0;
522}
523
524static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
525{
526 return rcu_implicit_offline_qs(rdp);
527}
528
529#endif /* #ifdef CONFIG_SMP */
530
531#endif /* #else #ifdef CONFIG_NO_HZ */
532
533int rcu_cpu_stall_suppress __read_mostly;
534
535static void record_gp_stall_check_time(struct rcu_state *rsp) 645static void record_gp_stall_check_time(struct rcu_state *rsp)
536{ 646{
537 rsp->gp_start = jiffies; 647 rsp->gp_start = jiffies;
@@ -866,8 +976,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
866 /* Advance to a new grace period and initialize state. */ 976 /* Advance to a new grace period and initialize state. */
867 rsp->gpnum++; 977 rsp->gpnum++;
868 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); 978 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
869 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 979 WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT);
870 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 980 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
871 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 981 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
872 record_gp_stall_check_time(rsp); 982 record_gp_stall_check_time(rsp);
873 983
@@ -877,7 +987,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
877 rnp->qsmask = rnp->qsmaskinit; 987 rnp->qsmask = rnp->qsmaskinit;
878 rnp->gpnum = rsp->gpnum; 988 rnp->gpnum = rsp->gpnum;
879 rnp->completed = rsp->completed; 989 rnp->completed = rsp->completed;
880 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 990 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
881 rcu_start_gp_per_cpu(rsp, rnp, rdp); 991 rcu_start_gp_per_cpu(rsp, rnp, rdp);
882 rcu_preempt_boost_start_gp(rnp); 992 rcu_preempt_boost_start_gp(rnp);
883 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 993 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
@@ -927,7 +1037,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
927 1037
928 rnp = rcu_get_root(rsp); 1038 rnp = rcu_get_root(rsp);
929 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1039 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
930 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ 1040 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
931 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1041 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
932 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 1042 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
933} 1043}
@@ -991,7 +1101,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
991 1101
992 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ 1102 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
993 trace_rcu_grace_period(rsp->name, rsp->completed, "end"); 1103 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
994 rsp->signaled = RCU_GP_IDLE; 1104 rsp->fqs_state = RCU_GP_IDLE;
995 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 1105 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
996} 1106}
997 1107
@@ -1221,7 +1331,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1221 else 1331 else
1222 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1332 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1223 if (need_report & RCU_OFL_TASKS_EXP_GP) 1333 if (need_report & RCU_OFL_TASKS_EXP_GP)
1224 rcu_report_exp_rnp(rsp, rnp); 1334 rcu_report_exp_rnp(rsp, rnp, true);
1225 rcu_node_kthread_setaffinity(rnp, -1); 1335 rcu_node_kthread_setaffinity(rnp, -1);
1226} 1336}
1227 1337
@@ -1263,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1263 /* If no callbacks are ready, just return.*/ 1373 /* If no callbacks are ready, just return.*/
1264 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1374 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1265 trace_rcu_batch_start(rsp->name, 0, 0); 1375 trace_rcu_batch_start(rsp->name, 0, 0);
1266 trace_rcu_batch_end(rsp->name, 0); 1376 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
1377 need_resched(), is_idle_task(current),
1378 rcu_is_callbacks_kthread());
1267 return; 1379 return;
1268 } 1380 }
1269 1381
@@ -1291,12 +1403,17 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1291 debug_rcu_head_unqueue(list); 1403 debug_rcu_head_unqueue(list);
1292 __rcu_reclaim(rsp->name, list); 1404 __rcu_reclaim(rsp->name, list);
1293 list = next; 1405 list = next;
1294 if (++count >= bl) 1406 /* Stop only if limit reached and CPU has something to do. */
1407 if (++count >= bl &&
1408 (need_resched() ||
1409 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
1295 break; 1410 break;
1296 } 1411 }
1297 1412
1298 local_irq_save(flags); 1413 local_irq_save(flags);
1299 trace_rcu_batch_end(rsp->name, count); 1414 trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
1415 is_idle_task(current),
1416 rcu_is_callbacks_kthread());
1300 1417
1301 /* Update count, and requeue any remaining callbacks. */ 1418 /* Update count, and requeue any remaining callbacks. */
1302 rdp->qlen -= count; 1419 rdp->qlen -= count;
@@ -1334,16 +1451,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1334 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1451 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1335 * Also schedule RCU core processing. 1452 * Also schedule RCU core processing.
1336 * 1453 *
1337 * This function must be called with hardirqs disabled. It is normally 1454 * This function must be called from hardirq context. It is normally
1338 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1455 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1339 * false, there is no point in invoking rcu_check_callbacks(). 1456 * false, there is no point in invoking rcu_check_callbacks().
1340 */ 1457 */
1341void rcu_check_callbacks(int cpu, int user) 1458void rcu_check_callbacks(int cpu, int user)
1342{ 1459{
1343 trace_rcu_utilization("Start scheduler-tick"); 1460 trace_rcu_utilization("Start scheduler-tick");
1344 if (user || 1461 if (user || rcu_is_cpu_rrupt_from_idle()) {
1345 (idle_cpu(cpu) && rcu_scheduler_active &&
1346 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
1347 1462
1348 /* 1463 /*
1349 * Get here if this CPU took its interrupt from user 1464 * Get here if this CPU took its interrupt from user
@@ -1457,7 +1572,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1457 goto unlock_fqs_ret; /* no GP in progress, time updated. */ 1572 goto unlock_fqs_ret; /* no GP in progress, time updated. */
1458 } 1573 }
1459 rsp->fqs_active = 1; 1574 rsp->fqs_active = 1;
1460 switch (rsp->signaled) { 1575 switch (rsp->fqs_state) {
1461 case RCU_GP_IDLE: 1576 case RCU_GP_IDLE:
1462 case RCU_GP_INIT: 1577 case RCU_GP_INIT:
1463 1578
@@ -1473,7 +1588,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1473 force_qs_rnp(rsp, dyntick_save_progress_counter); 1588 force_qs_rnp(rsp, dyntick_save_progress_counter);
1474 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1589 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1475 if (rcu_gp_in_progress(rsp)) 1590 if (rcu_gp_in_progress(rsp))
1476 rsp->signaled = RCU_FORCE_QS; 1591 rsp->fqs_state = RCU_FORCE_QS;
1477 break; 1592 break;
1478 1593
1479 case RCU_FORCE_QS: 1594 case RCU_FORCE_QS:
@@ -1812,7 +1927,7 @@ static int rcu_pending(int cpu)
1812 * by the current CPU, even if none need be done immediately, returning 1927 * by the current CPU, even if none need be done immediately, returning
1813 * 1 if so. 1928 * 1 if so.
1814 */ 1929 */
1815static int rcu_needs_cpu_quick_check(int cpu) 1930static int rcu_cpu_has_callbacks(int cpu)
1816{ 1931{
1817 /* RCU callbacks either ready or pending? */ 1932 /* RCU callbacks either ready or pending? */
1818 return per_cpu(rcu_sched_data, cpu).nxtlist || 1933 return per_cpu(rcu_sched_data, cpu).nxtlist ||
@@ -1913,9 +2028,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1913 for (i = 0; i < RCU_NEXT_SIZE; i++) 2028 for (i = 0; i < RCU_NEXT_SIZE; i++)
1914 rdp->nxttail[i] = &rdp->nxtlist; 2029 rdp->nxttail[i] = &rdp->nxtlist;
1915 rdp->qlen = 0; 2030 rdp->qlen = 0;
1916#ifdef CONFIG_NO_HZ
1917 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2031 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1918#endif /* #ifdef CONFIG_NO_HZ */ 2032 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
2033 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
1919 rdp->cpu = cpu; 2034 rdp->cpu = cpu;
1920 rdp->rsp = rsp; 2035 rdp->rsp = rsp;
1921 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2036 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1942,6 +2057,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1942 rdp->qlen_last_fqs_check = 0; 2057 rdp->qlen_last_fqs_check = 0;
1943 rdp->n_force_qs_snap = rsp->n_force_qs; 2058 rdp->n_force_qs_snap = rsp->n_force_qs;
1944 rdp->blimit = blimit; 2059 rdp->blimit = blimit;
2060 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
2061 atomic_set(&rdp->dynticks->dynticks,
2062 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2063 rcu_prepare_for_idle_init(cpu);
1945 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2064 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1946 2065
1947 /* 2066 /*
@@ -2023,6 +2142,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2023 rcu_send_cbs_to_online(&rcu_bh_state); 2142 rcu_send_cbs_to_online(&rcu_bh_state);
2024 rcu_send_cbs_to_online(&rcu_sched_state); 2143 rcu_send_cbs_to_online(&rcu_sched_state);
2025 rcu_preempt_send_cbs_to_online(); 2144 rcu_preempt_send_cbs_to_online();
2145 rcu_cleanup_after_idle(cpu);
2026 break; 2146 break;
2027 case CPU_DEAD: 2147 case CPU_DEAD:
2028 case CPU_DEAD_FROZEN: 2148 case CPU_DEAD_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 849ce9ec51fe..fddff92d6676 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,9 +84,10 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track irq/process nesting level. */ 87 long long dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 /* Process level is worth LLONG_MAX/2. */
89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ 89 int dynticks_nmi_nesting; /* Track NMI nesting level. */
90 atomic_t dynticks; /* Even value for idle, else odd. */
90}; 91};
91 92
92/* RCU's kthread states for tracing. */ 93/* RCU's kthread states for tracing. */
@@ -274,16 +275,12 @@ struct rcu_data {
274 /* did other CPU force QS recently? */ 275 /* did other CPU force QS recently? */
275 long blimit; /* Upper limit on a processed batch */ 276 long blimit; /* Upper limit on a processed batch */
276 277
277#ifdef CONFIG_NO_HZ
278 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
280 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
281#endif /* #ifdef CONFIG_NO_HZ */
282 281
283 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 282 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
284#ifdef CONFIG_NO_HZ
285 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 283 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
286#endif /* #ifdef CONFIG_NO_HZ */
287 unsigned long offline_fqs; /* Kicked due to being offline. */ 284 unsigned long offline_fqs; /* Kicked due to being offline. */
288 unsigned long resched_ipi; /* Sent a resched IPI. */ 285 unsigned long resched_ipi; /* Sent a resched IPI. */
289 286
@@ -302,16 +299,12 @@ struct rcu_data {
302 struct rcu_state *rsp; 299 struct rcu_state *rsp;
303}; 300};
304 301
305/* Values for signaled field in struct rcu_state. */ 302/* Values for fqs_state field in struct rcu_state. */
306#define RCU_GP_IDLE 0 /* No grace period in progress. */ 303#define RCU_GP_IDLE 0 /* No grace period in progress. */
307#define RCU_GP_INIT 1 /* Grace period being initialized. */ 304#define RCU_GP_INIT 1 /* Grace period being initialized. */
308#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ 305#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
309#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 306#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
310#ifdef CONFIG_NO_HZ
311#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 307#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
312#else /* #ifdef CONFIG_NO_HZ */
313#define RCU_SIGNAL_INIT RCU_FORCE_QS
314#endif /* #else #ifdef CONFIG_NO_HZ */
315 308
316#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 309#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
317 310
@@ -361,7 +354,7 @@ struct rcu_state {
361 354
362 /* The following fields are guarded by the root rcu_node's lock. */ 355 /* The following fields are guarded by the root rcu_node's lock. */
363 356
364 u8 signaled ____cacheline_internodealigned_in_smp; 357 u8 fqs_state ____cacheline_internodealigned_in_smp;
365 /* Force QS state. */ 358 /* Force QS state. */
366 u8 fqs_active; /* force_quiescent_state() */ 359 u8 fqs_active; /* force_quiescent_state() */
367 /* is running. */ 360 /* is running. */
@@ -451,7 +444,8 @@ static void rcu_preempt_check_callbacks(int cpu);
451static void rcu_preempt_process_callbacks(void); 444static void rcu_preempt_process_callbacks(void);
452void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 445void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
453#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 446#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
454static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); 447static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
448 bool wake);
455#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 449#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
456static int rcu_preempt_pending(int cpu); 450static int rcu_preempt_pending(int cpu);
457static int rcu_preempt_needs_cpu(int cpu); 451static int rcu_preempt_needs_cpu(int cpu);
@@ -461,6 +455,7 @@ static void __init __rcu_init_preempt(void);
461static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 455static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
462static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 456static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
463static void invoke_rcu_callbacks_kthread(void); 457static void invoke_rcu_callbacks_kthread(void);
458static bool rcu_is_callbacks_kthread(void);
464#ifdef CONFIG_RCU_BOOST 459#ifdef CONFIG_RCU_BOOST
465static void rcu_preempt_do_callbacks(void); 460static void rcu_preempt_do_callbacks(void);
466static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 461static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
@@ -473,5 +468,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
473#endif /* #ifdef CONFIG_RCU_BOOST */ 468#endif /* #ifdef CONFIG_RCU_BOOST */
474static void rcu_cpu_kthread_setrt(int cpu, int to_rt); 469static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
475static void __cpuinit rcu_prepare_kthreads(int cpu); 470static void __cpuinit rcu_prepare_kthreads(int cpu);
471static void rcu_prepare_for_idle_init(int cpu);
472static void rcu_cleanup_after_idle(int cpu);
473static void rcu_prepare_for_idle(int cpu);
476 474
477#endif /* #ifndef RCU_TREE_NONCORE */ 475#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 4b9b9f8a4184..8bb35d73e1f9 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -312,6 +312,7 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
312{ 312{
313 int empty; 313 int empty;
314 int empty_exp; 314 int empty_exp;
315 int empty_exp_now;
315 unsigned long flags; 316 unsigned long flags;
316 struct list_head *np; 317 struct list_head *np;
317#ifdef CONFIG_RCU_BOOST 318#ifdef CONFIG_RCU_BOOST
@@ -382,8 +383,10 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
382 /* 383 /*
383 * If this was the last task on the current list, and if 384 * If this was the last task on the current list, and if
384 * we aren't waiting on any CPUs, report the quiescent state. 385 * we aren't waiting on any CPUs, report the quiescent state.
385 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. 386 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
387 * so we must take a snapshot of the expedited state.
386 */ 388 */
389 empty_exp_now = !rcu_preempted_readers_exp(rnp);
387 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 390 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
388 trace_rcu_quiescent_state_report("preempt_rcu", 391 trace_rcu_quiescent_state_report("preempt_rcu",
389 rnp->gpnum, 392 rnp->gpnum,
@@ -406,8 +409,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
406 * If this was the last task on the expedited lists, 409 * If this was the last task on the expedited lists,
407 * then we need to report up the rcu_node hierarchy. 410 * then we need to report up the rcu_node hierarchy.
408 */ 411 */
409 if (!empty_exp && !rcu_preempted_readers_exp(rnp)) 412 if (!empty_exp && empty_exp_now)
410 rcu_report_exp_rnp(&rcu_preempt_state, rnp); 413 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
411 } else { 414 } else {
412 local_irq_restore(flags); 415 local_irq_restore(flags);
413 } 416 }
@@ -729,9 +732,13 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
729 * recursively up the tree. (Calm down, calm down, we do the recursion 732 * recursively up the tree. (Calm down, calm down, we do the recursion
730 * iteratively!) 733 * iteratively!)
731 * 734 *
735 * Most callers will set the "wake" flag, but the task initiating the
736 * expedited grace period need not wake itself.
737 *
732 * Caller must hold sync_rcu_preempt_exp_mutex. 738 * Caller must hold sync_rcu_preempt_exp_mutex.
733 */ 739 */
734static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) 740static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
741 bool wake)
735{ 742{
736 unsigned long flags; 743 unsigned long flags;
737 unsigned long mask; 744 unsigned long mask;
@@ -744,7 +751,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
744 } 751 }
745 if (rnp->parent == NULL) { 752 if (rnp->parent == NULL) {
746 raw_spin_unlock_irqrestore(&rnp->lock, flags); 753 raw_spin_unlock_irqrestore(&rnp->lock, flags);
747 wake_up(&sync_rcu_preempt_exp_wq); 754 if (wake)
755 wake_up(&sync_rcu_preempt_exp_wq);
748 break; 756 break;
749 } 757 }
750 mask = rnp->grpmask; 758 mask = rnp->grpmask;
@@ -777,7 +785,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
777 must_wait = 1; 785 must_wait = 1;
778 } 786 }
779 if (!must_wait) 787 if (!must_wait)
780 rcu_report_exp_rnp(rsp, rnp); 788 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
781} 789}
782 790
783/* 791/*
@@ -1069,9 +1077,9 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1069 * report on tasks preempted in RCU read-side critical sections during 1077 * report on tasks preempted in RCU read-side critical sections during
1070 * expedited RCU grace periods. 1078 * expedited RCU grace periods.
1071 */ 1079 */
1072static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) 1080static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1081 bool wake)
1073{ 1082{
1074 return;
1075} 1083}
1076 1084
1077#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1085#endif /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -1157,8 +1165,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1157 1165
1158#endif /* #else #ifdef CONFIG_RCU_TRACE */ 1166#endif /* #else #ifdef CONFIG_RCU_TRACE */
1159 1167
1160static struct lock_class_key rcu_boost_class;
1161
1162/* 1168/*
1163 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1169 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1164 * or ->boost_tasks, advancing the pointer to the next task in the 1170 * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1221,15 +1227,13 @@ static int rcu_boost(struct rcu_node *rnp)
1221 */ 1227 */
1222 t = container_of(tb, struct task_struct, rcu_node_entry); 1228 t = container_of(tb, struct task_struct, rcu_node_entry);
1223 rt_mutex_init_proxy_locked(&mtx, t); 1229 rt_mutex_init_proxy_locked(&mtx, t);
1224 /* Avoid lockdep false positives. This rt_mutex is its own thing. */
1225 lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class,
1226 "rcu_boost_mutex");
1227 t->rcu_boost_mutex = &mtx; 1230 t->rcu_boost_mutex = &mtx;
1228 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1231 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1229 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1232 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1230 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1233 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1231 1234
1232 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; 1235 return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
1236 ACCESS_ONCE(rnp->boost_tasks) != NULL;
1233} 1237}
1234 1238
1235/* 1239/*
@@ -1329,6 +1333,15 @@ static void invoke_rcu_callbacks_kthread(void)
1329} 1333}
1330 1334
1331/* 1335/*
1336 * Is the current CPU running the RCU-callbacks kthread?
1337 * Caller must have preemption disabled.
1338 */
1339static bool rcu_is_callbacks_kthread(void)
1340{
1341 return __get_cpu_var(rcu_cpu_kthread_task) == current;
1342}
1343
1344/*
1332 * Set the affinity of the boost kthread. The CPU-hotplug locks are 1345 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1333 * held, so no one should be messing with the existence of the boost 1346 * held, so no one should be messing with the existence of the boost
1334 * kthread. 1347 * kthread.
@@ -1772,6 +1785,11 @@ static void invoke_rcu_callbacks_kthread(void)
1772 WARN_ON_ONCE(1); 1785 WARN_ON_ONCE(1);
1773} 1786}
1774 1787
1788static bool rcu_is_callbacks_kthread(void)
1789{
1790 return false;
1791}
1792
1775static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1793static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1776{ 1794{
1777} 1795}
@@ -1907,7 +1925,7 @@ void synchronize_sched_expedited(void)
1907 * grace period works for us. 1925 * grace period works for us.
1908 */ 1926 */
1909 get_online_cpus(); 1927 get_online_cpus();
1910 snap = atomic_read(&sync_sched_expedited_started) - 1; 1928 snap = atomic_read(&sync_sched_expedited_started);
1911 smp_mb(); /* ensure read is before try_stop_cpus(). */ 1929 smp_mb(); /* ensure read is before try_stop_cpus(). */
1912 } 1930 }
1913 1931
@@ -1939,88 +1957,243 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1939 * 1 if so. This function is part of the RCU implementation; it is -not- 1957 * 1 if so. This function is part of the RCU implementation; it is -not-
1940 * an exported member of the RCU API. 1958 * an exported member of the RCU API.
1941 * 1959 *
1942 * Because we have preemptible RCU, just check whether this CPU needs 1960 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1943 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption 1961 * any flavor of RCU.
1944 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1945 */ 1962 */
1946int rcu_needs_cpu(int cpu) 1963int rcu_needs_cpu(int cpu)
1947{ 1964{
1948 return rcu_needs_cpu_quick_check(cpu); 1965 return rcu_cpu_has_callbacks(cpu);
1966}
1967
1968/*
1969 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
1970 */
1971static void rcu_prepare_for_idle_init(int cpu)
1972{
1973}
1974
1975/*
1976 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
1977 * after it.
1978 */
1979static void rcu_cleanup_after_idle(int cpu)
1980{
1981}
1982
1983/*
1984 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y,
1985 * is nothing.
1986 */
1987static void rcu_prepare_for_idle(int cpu)
1988{
1949} 1989}
1950 1990
1951#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1991#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1952 1992
1953#define RCU_NEEDS_CPU_FLUSHES 5 1993/*
1994 * This code is invoked when a CPU goes idle, at which point we want
1995 * to have the CPU do everything required for RCU so that it can enter
1996 * the energy-efficient dyntick-idle mode. This is handled by a
1997 * state machine implemented by rcu_prepare_for_idle() below.
1998 *
1999 * The following three proprocessor symbols control this state machine:
2000 *
2001 * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
2002 * to satisfy RCU. Beyond this point, it is better to incur a periodic
2003 * scheduling-clock interrupt than to loop through the state machine
2004 * at full power.
2005 * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
2006 * optional if RCU does not need anything immediately from this
2007 * CPU, even if this CPU still has RCU callbacks queued. The first
2008 * times through the state machine are mandatory: we need to give
2009 * the state machine a chance to communicate a quiescent state
2010 * to the RCU core.
2011 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
2012 * to sleep in dyntick-idle mode with RCU callbacks pending. This
2013 * is sized to be roughly one RCU grace period. Those energy-efficiency
2014 * benchmarkers who might otherwise be tempted to set this to a large
2015 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
2016 * system. And if you are -that- concerned about energy efficiency,
2017 * just power the system down and be done with it!
2018 *
2019 * The values below work well in practice. If future workloads require
2020 * adjustment, they can be converted into kernel config parameters, though
2021 * making the state machine smarter might be a better option.
2022 */
2023#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
2024#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
2025#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
2026
1954static DEFINE_PER_CPU(int, rcu_dyntick_drain); 2027static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1955static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); 2028static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
2029static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
2030static ktime_t rcu_idle_gp_wait;
1956 2031
1957/* 2032/*
1958 * Check to see if any future RCU-related work will need to be done 2033 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
1959 * by the current CPU, even if none need be done immediately, returning 2034 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
1960 * 1 if so. This function is part of the RCU implementation; it is -not- 2035 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
1961 * an exported member of the RCU API. 2036 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
2037 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
2038 * it is better to incur scheduling-clock interrupts than to spin
2039 * continuously for the same time duration!
2040 */
2041int rcu_needs_cpu(int cpu)
2042{
2043 /* If no callbacks, RCU doesn't need the CPU. */
2044 if (!rcu_cpu_has_callbacks(cpu))
2045 return 0;
2046 /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
2047 return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
2048}
2049
2050/*
2051 * Timer handler used to force CPU to start pushing its remaining RCU
2052 * callbacks in the case where it entered dyntick-idle mode with callbacks
2053 * pending. The hander doesn't really need to do anything because the
2054 * real work is done upon re-entry to idle, or by the next scheduling-clock
2055 * interrupt should idle not be re-entered.
2056 */
2057static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp)
2058{
2059 trace_rcu_prep_idle("Timer");
2060 return HRTIMER_NORESTART;
2061}
2062
2063/*
2064 * Initialize the timer used to pull CPUs out of dyntick-idle mode.
2065 */
2066static void rcu_prepare_for_idle_init(int cpu)
2067{
2068 static int firsttime = 1;
2069 struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
2070
2071 hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2072 hrtp->function = rcu_idle_gp_timer_func;
2073 if (firsttime) {
2074 unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
2075
2076 rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
2077 firsttime = 0;
2078 }
2079}
2080
2081/*
2082 * Clean up for exit from idle. Because we are exiting from idle, there
2083 * is no longer any point to rcu_idle_gp_timer, so cancel it. This will
2084 * do nothing if this timer is not active, so just cancel it unconditionally.
2085 */
2086static void rcu_cleanup_after_idle(int cpu)
2087{
2088 hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu));
2089}
2090
2091/*
2092 * Check to see if any RCU-related work can be done by the current CPU,
2093 * and if so, schedule a softirq to get it done. This function is part
2094 * of the RCU implementation; it is -not- an exported member of the RCU API.
1962 * 2095 *
1963 * Because we are not supporting preemptible RCU, attempt to accelerate 2096 * The idea is for the current CPU to clear out all work required by the
1964 * any current grace periods so that RCU no longer needs this CPU, but 2097 * RCU core for the current grace period, so that this CPU can be permitted
1965 * only if all other CPUs are already in dynticks-idle mode. This will 2098 * to enter dyntick-idle mode. In some cases, it will need to be awakened
1966 * allow the CPU cores to be powered down immediately, as opposed to after 2099 * at the end of the grace period by whatever CPU ends the grace period.
1967 * waiting many milliseconds for grace periods to elapse. 2100 * This allows CPUs to go dyntick-idle more quickly, and to reduce the
2101 * number of wakeups by a modest integer factor.
1968 * 2102 *
1969 * Because it is not legal to invoke rcu_process_callbacks() with irqs 2103 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1970 * disabled, we do one pass of force_quiescent_state(), then do a 2104 * disabled, we do one pass of force_quiescent_state(), then do a
1971 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked 2105 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
1972 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. 2106 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
2107 *
2108 * The caller must have disabled interrupts.
1973 */ 2109 */
1974int rcu_needs_cpu(int cpu) 2110static void rcu_prepare_for_idle(int cpu)
1975{ 2111{
1976 int c = 0; 2112 unsigned long flags;
1977 int snap; 2113
1978 int thatcpu; 2114 local_irq_save(flags);
1979 2115
1980 /* Check for being in the holdoff period. */ 2116 /*
1981 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) 2117 * If there are no callbacks on this CPU, enter dyntick-idle mode.
1982 return rcu_needs_cpu_quick_check(cpu); 2118 * Also reset state to avoid prejudicing later attempts.
1983 2119 */
1984 /* Don't bother unless we are the last non-dyntick-idle CPU. */ 2120 if (!rcu_cpu_has_callbacks(cpu)) {
1985 for_each_online_cpu(thatcpu) { 2121 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1986 if (thatcpu == cpu) 2122 per_cpu(rcu_dyntick_drain, cpu) = 0;
1987 continue; 2123 local_irq_restore(flags);
1988 snap = atomic_add_return(0, &per_cpu(rcu_dynticks, 2124 trace_rcu_prep_idle("No callbacks");
1989 thatcpu).dynticks); 2125 return;
1990 smp_mb(); /* Order sampling of snap with end of grace period. */ 2126 }
1991 if ((snap & 0x1) != 0) { 2127
1992 per_cpu(rcu_dyntick_drain, cpu) = 0; 2128 /*
1993 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 2129 * If in holdoff mode, just return. We will presumably have
1994 return rcu_needs_cpu_quick_check(cpu); 2130 * refrained from disabling the scheduling-clock tick.
1995 } 2131 */
2132 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
2133 local_irq_restore(flags);
2134 trace_rcu_prep_idle("In holdoff");
2135 return;
1996 } 2136 }
1997 2137
1998 /* Check and update the rcu_dyntick_drain sequencing. */ 2138 /* Check and update the rcu_dyntick_drain sequencing. */
1999 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2139 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
2000 /* First time through, initialize the counter. */ 2140 /* First time through, initialize the counter. */
2001 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; 2141 per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
2142 } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
2143 !rcu_pending(cpu)) {
2144 /* Can we go dyntick-idle despite still having callbacks? */
2145 trace_rcu_prep_idle("Dyntick with callbacks");
2146 per_cpu(rcu_dyntick_drain, cpu) = 0;
2147 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2148 hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
2149 rcu_idle_gp_wait, HRTIMER_MODE_REL);
2150 return; /* Nothing more to do immediately. */
2002 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2151 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
2003 /* We have hit the limit, so time to give up. */ 2152 /* We have hit the limit, so time to give up. */
2004 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 2153 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
2005 return rcu_needs_cpu_quick_check(cpu); 2154 local_irq_restore(flags);
2155 trace_rcu_prep_idle("Begin holdoff");
2156 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
2157 return;
2006 } 2158 }
2007 2159
2008 /* Do one step pushing remaining RCU callbacks through. */ 2160 /*
2161 * Do one step of pushing the remaining RCU callbacks through
2162 * the RCU core state machine.
2163 */
2164#ifdef CONFIG_TREE_PREEMPT_RCU
2165 if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
2166 local_irq_restore(flags);
2167 rcu_preempt_qs(cpu);
2168 force_quiescent_state(&rcu_preempt_state, 0);
2169 local_irq_save(flags);
2170 }
2171#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2009 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 2172 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
2173 local_irq_restore(flags);
2010 rcu_sched_qs(cpu); 2174 rcu_sched_qs(cpu);
2011 force_quiescent_state(&rcu_sched_state, 0); 2175 force_quiescent_state(&rcu_sched_state, 0);
2012 c = c || per_cpu(rcu_sched_data, cpu).nxtlist; 2176 local_irq_save(flags);
2013 } 2177 }
2014 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 2178 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
2179 local_irq_restore(flags);
2015 rcu_bh_qs(cpu); 2180 rcu_bh_qs(cpu);
2016 force_quiescent_state(&rcu_bh_state, 0); 2181 force_quiescent_state(&rcu_bh_state, 0);
2017 c = c || per_cpu(rcu_bh_data, cpu).nxtlist; 2182 local_irq_save(flags);
2018 } 2183 }
2019 2184
2020 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 2185 /*
2021 if (c) 2186 * If RCU callbacks are still pending, RCU still needs this CPU.
2187 * So try forcing the callbacks through the grace period.
2188 */
2189 if (rcu_cpu_has_callbacks(cpu)) {
2190 local_irq_restore(flags);
2191 trace_rcu_prep_idle("More callbacks");
2022 invoke_rcu_core(); 2192 invoke_rcu_core();
2023 return c; 2193 } else {
2194 local_irq_restore(flags);
2195 trace_rcu_prep_idle("Callbacks drained");
2196 }
2024} 2197}
2025 2198
2026#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2199#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 9feffa4c0695..654cfe67f0d1 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
67 rdp->completed, rdp->gpnum, 67 rdp->completed, rdp->gpnum,
68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum, 68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
69 rdp->qs_pending); 69 rdp->qs_pending);
70#ifdef CONFIG_NO_HZ 70 seq_printf(m, " dt=%d/%llx/%d df=%lu",
71 seq_printf(m, " dt=%d/%d/%d df=%lu",
72 atomic_read(&rdp->dynticks->dynticks), 71 atomic_read(&rdp->dynticks->dynticks),
73 rdp->dynticks->dynticks_nesting, 72 rdp->dynticks->dynticks_nesting,
74 rdp->dynticks->dynticks_nmi_nesting, 73 rdp->dynticks->dynticks_nmi_nesting,
75 rdp->dynticks_fqs); 74 rdp->dynticks_fqs);
76#endif /* #ifdef CONFIG_NO_HZ */
77 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 75 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
78 seq_printf(m, " ql=%ld qs=%c%c%c%c", 76 seq_printf(m, " ql=%ld qs=%c%c%c%c",
79 rdp->qlen, 77 rdp->qlen,
@@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->completed, rdp->gpnum, 139 rdp->completed, rdp->gpnum,
142 rdp->passed_quiesce, rdp->passed_quiesce_gpnum, 140 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
143 rdp->qs_pending); 141 rdp->qs_pending);
144#ifdef CONFIG_NO_HZ 142 seq_printf(m, ",%d,%llx,%d,%lu",
145 seq_printf(m, ",%d,%d,%d,%lu",
146 atomic_read(&rdp->dynticks->dynticks), 143 atomic_read(&rdp->dynticks->dynticks),
147 rdp->dynticks->dynticks_nesting, 144 rdp->dynticks->dynticks_nesting,
148 rdp->dynticks->dynticks_nmi_nesting, 145 rdp->dynticks->dynticks_nmi_nesting,
149 rdp->dynticks_fqs); 146 rdp->dynticks_fqs);
150#endif /* #ifdef CONFIG_NO_HZ */
151 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 147 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
152 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, 148 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
153 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 149 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
171static int show_rcudata_csv(struct seq_file *m, void *unused) 167static int show_rcudata_csv(struct seq_file *m, void *unused)
172{ 168{
173 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); 169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
174#ifdef CONFIG_NO_HZ
175 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
176#endif /* #ifdef CONFIG_NO_HZ */
177 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); 171 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
178#ifdef CONFIG_RCU_BOOST 172#ifdef CONFIG_RCU_BOOST
179 seq_puts(m, "\"kt\",\"ktl\""); 173 seq_puts(m, "\"kt\",\"ktl\"");
@@ -278,7 +272,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
278 gpnum = rsp->gpnum; 272 gpnum = rsp->gpnum;
279 seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " 273 seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
280 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", 274 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
281 rsp->completed, gpnum, rsp->signaled, 275 rsp->completed, gpnum, rsp->fqs_state,
282 (long)(rsp->jiffies_force_qs - jiffies), 276 (long)(rsp->jiffies_force_qs - jiffies),
283 (int)(jiffies & 0xffff), 277 (int)(jiffies & 0xffff),
284 rsp->n_force_qs, rsp->n_force_qs_ngp, 278 rsp->n_force_qs, rsp->n_force_qs_ngp,
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 8eafd1bd273e..16502d3a71c8 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -101,6 +101,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
101 101
102 printk("\n============================================\n"); 102 printk("\n============================================\n");
103 printk( "[ BUG: circular locking deadlock detected! ]\n"); 103 printk( "[ BUG: circular locking deadlock detected! ]\n");
104 printk("%s\n", print_tainted());
104 printk( "--------------------------------------------\n"); 105 printk( "--------------------------------------------\n");
105 printk("%s/%d is deadlocking current task %s/%d\n\n", 106 printk("%s/%d is deadlocking current task %s/%d\n\n",
106 task->comm, task_pid_nr(task), 107 task->comm, task_pid_nr(task),
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index f9d8482dd487..a242e691c993 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -579,7 +579,6 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
579 struct rt_mutex_waiter *waiter) 579 struct rt_mutex_waiter *waiter)
580{ 580{
581 int ret = 0; 581 int ret = 0;
582 int was_disabled;
583 582
584 for (;;) { 583 for (;;) {
585 /* Try to acquire the lock: */ 584 /* Try to acquire the lock: */
@@ -602,17 +601,10 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
602 601
603 raw_spin_unlock(&lock->wait_lock); 602 raw_spin_unlock(&lock->wait_lock);
604 603
605 was_disabled = irqs_disabled();
606 if (was_disabled)
607 local_irq_enable();
608
609 debug_rt_mutex_print_deadlock(waiter); 604 debug_rt_mutex_print_deadlock(waiter);
610 605
611 schedule_rt_mutex(lock); 606 schedule_rt_mutex(lock);
612 607
613 if (was_disabled)
614 local_irq_disable();
615
616 raw_spin_lock(&lock->wait_lock); 608 raw_spin_lock(&lock->wait_lock);
617 set_current_state(state); 609 set_current_state(state);
618 } 610 }
diff --git a/kernel/signal.c b/kernel/signal.c
index 739ef2bf105c..56ce3a618b28 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1992,8 +1992,6 @@ static bool do_signal_stop(int signr)
1992 */ 1992 */
1993 if (!(sig->flags & SIGNAL_STOP_STOPPED)) 1993 if (!(sig->flags & SIGNAL_STOP_STOPPED))
1994 sig->group_exit_code = signr; 1994 sig->group_exit_code = signr;
1995 else
1996 WARN_ON_ONCE(!current->ptrace);
1997 1995
1998 sig->group_stop_count = 0; 1996 sig->group_stop_count = 0;
1999 1997
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2c71d91efff0..4eb3a0fa351e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -347,12 +347,12 @@ void irq_exit(void)
347 if (!in_interrupt() && local_softirq_pending()) 347 if (!in_interrupt() && local_softirq_pending())
348 invoke_softirq(); 348 invoke_softirq();
349 349
350 rcu_irq_exit();
351#ifdef CONFIG_NO_HZ 350#ifdef CONFIG_NO_HZ
352 /* Make sure that timer wheel updates are propagated */ 351 /* Make sure that timer wheel updates are propagated */
353 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) 352 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
354 tick_nohz_stop_sched_tick(0); 353 tick_nohz_irq_exit();
355#endif 354#endif
355 rcu_irq_exit();
356 preempt_enable_no_resched(); 356 preempt_enable_no_resched();
357} 357}
358 358
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 6318b511afa1..a650694883a1 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1354,7 +1354,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1354 1354
1355 fput(file); 1355 fput(file);
1356out_putname: 1356out_putname:
1357 putname(pathname); 1357 __putname(pathname);
1358out: 1358out:
1359 return result; 1359 return result;
1360} 1360}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index c4eb71c8b2ea..1ecd6ba36d6c 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -387,7 +387,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
387 * released list and do a notify add later. 387 * released list and do a notify add later.
388 */ 388 */
389 if (old) { 389 if (old) {
390 old->event_handler = clockevents_handle_noop;
391 clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); 390 clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
392 list_del(&old->list); 391 list_del(&old->list);
393 list_add(&old->list, &clockevents_released); 392 list_add(&old->list, &clockevents_released);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index da2f760e780c..d3ad022136e5 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -647,7 +647,7 @@ static void clocksource_enqueue(struct clocksource *cs)
647 647
648/** 648/**
649 * __clocksource_updatefreq_scale - Used update clocksource with new freq 649 * __clocksource_updatefreq_scale - Used update clocksource with new freq
650 * @t: clocksource to be registered 650 * @cs: clocksource to be registered
651 * @scale: Scale factor multiplied against freq to get clocksource hz 651 * @scale: Scale factor multiplied against freq to get clocksource hz
652 * @freq: clocksource frequency (cycles per second) divided by scale 652 * @freq: clocksource frequency (cycles per second) divided by scale
653 * 653 *
@@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
699 699
700/** 700/**
701 * __clocksource_register_scale - Used to install new clocksources 701 * __clocksource_register_scale - Used to install new clocksources
702 * @t: clocksource to be registered 702 * @cs: clocksource to be registered
703 * @scale: Scale factor multiplied against freq to get clocksource hz 703 * @scale: Scale factor multiplied against freq to get clocksource hz
704 * @freq: clocksource frequency (cycles per second) divided by scale 704 * @freq: clocksource frequency (cycles per second) divided by scale
705 * 705 *
@@ -727,7 +727,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
727 727
728/** 728/**
729 * clocksource_register - Used to install new clocksources 729 * clocksource_register - Used to install new clocksources
730 * @t: clocksource to be registered 730 * @cs: clocksource to be registered
731 * 731 *
732 * Returns -EBUSY if registration fails, zero otherwise. 732 * Returns -EBUSY if registration fails, zero otherwise.
733 */ 733 */
@@ -761,6 +761,8 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
761 761
762/** 762/**
763 * clocksource_change_rating - Change the rating of a registered clocksource 763 * clocksource_change_rating - Change the rating of a registered clocksource
764 * @cs: clocksource to be changed
765 * @rating: new rating
764 */ 766 */
765void clocksource_change_rating(struct clocksource *cs, int rating) 767void clocksource_change_rating(struct clocksource *cs, int rating)
766{ 768{
@@ -772,6 +774,7 @@ EXPORT_SYMBOL(clocksource_change_rating);
772 774
773/** 775/**
774 * clocksource_unregister - remove a registered clocksource 776 * clocksource_unregister - remove a registered clocksource
777 * @cs: clocksource to be unregistered
775 */ 778 */
776void clocksource_unregister(struct clocksource *cs) 779void clocksource_unregister(struct clocksource *cs)
777{ 780{
@@ -787,6 +790,7 @@ EXPORT_SYMBOL(clocksource_unregister);
787/** 790/**
788 * sysfs_show_current_clocksources - sysfs interface for current clocksource 791 * sysfs_show_current_clocksources - sysfs interface for current clocksource
789 * @dev: unused 792 * @dev: unused
793 * @attr: unused
790 * @buf: char buffer to be filled with clocksource list 794 * @buf: char buffer to be filled with clocksource list
791 * 795 *
792 * Provides sysfs interface for listing current clocksource. 796 * Provides sysfs interface for listing current clocksource.
@@ -807,6 +811,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,
807/** 811/**
808 * sysfs_override_clocksource - interface for manually overriding clocksource 812 * sysfs_override_clocksource - interface for manually overriding clocksource
809 * @dev: unused 813 * @dev: unused
814 * @attr: unused
810 * @buf: name of override clocksource 815 * @buf: name of override clocksource
811 * @count: length of buffer 816 * @count: length of buffer
812 * 817 *
@@ -842,6 +847,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
842/** 847/**
843 * sysfs_show_available_clocksources - sysfs interface for listing clocksource 848 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
844 * @dev: unused 849 * @dev: unused
850 * @attr: unused
845 * @buf: char buffer to be filled with clocksource list 851 * @buf: char buffer to be filled with clocksource list
846 * 852 *
847 * Provides sysfs interface for listing registered clocksources 853 * Provides sysfs interface for listing registered clocksources
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 31cc06163ed5..7656642e4b8e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -275,51 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
275} 275}
276EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); 276EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
277 277
278/** 278static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
279 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
280 *
281 * When the next event is more than a tick into the future, stop the idle tick
282 * Called either from the idle loop or from irq_exit() when an idle period was
283 * just interrupted by an interrupt which did not cause a reschedule.
284 */
285void tick_nohz_stop_sched_tick(int inidle)
286{ 279{
287 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; 280 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
288 struct tick_sched *ts;
289 ktime_t last_update, expires, now; 281 ktime_t last_update, expires, now;
290 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 282 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
291 u64 time_delta; 283 u64 time_delta;
292 int cpu; 284 int cpu;
293 285
294 local_irq_save(flags);
295
296 cpu = smp_processor_id(); 286 cpu = smp_processor_id();
297 ts = &per_cpu(tick_cpu_sched, cpu); 287 ts = &per_cpu(tick_cpu_sched, cpu);
298 288
299 /*
300 * Update the idle state in the scheduler domain hierarchy
301 * when tick_nohz_stop_sched_tick() is called from the idle loop.
302 * State will be updated to busy during the first busy tick after
303 * exiting idle.
304 */
305 if (inidle)
306 set_cpu_sd_state_idle();
307
308 /*
309 * Call to tick_nohz_start_idle stops the last_update_time from being
310 * updated. Thus, it must not be called in the event we are called from
311 * irq_exit() with the prior state different than idle.
312 */
313 if (!inidle && !ts->inidle)
314 goto end;
315
316 /*
317 * Set ts->inidle unconditionally. Even if the system did not
318 * switch to NOHZ mode the cpu frequency governers rely on the
319 * update of the idle time accounting in tick_nohz_start_idle().
320 */
321 ts->inidle = 1;
322
323 now = tick_nohz_start_idle(cpu, ts); 289 now = tick_nohz_start_idle(cpu, ts);
324 290
325 /* 291 /*
@@ -335,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle)
335 } 301 }
336 302
337 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 303 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
338 goto end; 304 return;
339 305
340 if (need_resched()) 306 if (need_resched())
341 goto end; 307 return;
342 308
343 if (unlikely(local_softirq_pending() && cpu_online(cpu))) { 309 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
344 static int ratelimit; 310 static int ratelimit;
@@ -348,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle)
348 (unsigned int) local_softirq_pending()); 314 (unsigned int) local_softirq_pending());
349 ratelimit++; 315 ratelimit++;
350 } 316 }
351 goto end; 317 return;
352 } 318 }
353 319
354 ts->idle_calls++; 320 ts->idle_calls++;
@@ -443,7 +409,6 @@ void tick_nohz_stop_sched_tick(int inidle)
443 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); 409 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
444 ts->tick_stopped = 1; 410 ts->tick_stopped = 1;
445 ts->idle_jiffies = last_jiffies; 411 ts->idle_jiffies = last_jiffies;
446 rcu_enter_nohz();
447 } 412 }
448 413
449 ts->idle_sleeps++; 414 ts->idle_sleeps++;
@@ -481,8 +446,64 @@ out:
481 ts->next_jiffies = next_jiffies; 446 ts->next_jiffies = next_jiffies;
482 ts->last_jiffies = last_jiffies; 447 ts->last_jiffies = last_jiffies;
483 ts->sleep_length = ktime_sub(dev->next_event, now); 448 ts->sleep_length = ktime_sub(dev->next_event, now);
484end: 449}
485 local_irq_restore(flags); 450
451/**
452 * tick_nohz_idle_enter - stop the idle tick from the idle task
453 *
454 * When the next event is more than a tick into the future, stop the idle tick
455 * Called when we start the idle loop.
456 *
457 * The arch is responsible of calling:
458 *
459 * - rcu_idle_enter() after its last use of RCU before the CPU is put
460 * to sleep.
461 * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
462 */
463void tick_nohz_idle_enter(void)
464{
465 struct tick_sched *ts;
466
467 WARN_ON_ONCE(irqs_disabled());
468
469 /*
470 * Update the idle state in the scheduler domain hierarchy
471 * when tick_nohz_stop_sched_tick() is called from the idle loop.
472 * State will be updated to busy during the first busy tick after
473 * exiting idle.
474 */
475 set_cpu_sd_state_idle();
476
477 local_irq_disable();
478
479 ts = &__get_cpu_var(tick_cpu_sched);
480 /*
481 * set ts->inidle unconditionally. even if the system did not
482 * switch to nohz mode the cpu frequency governers rely on the
483 * update of the idle time accounting in tick_nohz_start_idle().
484 */
485 ts->inidle = 1;
486 tick_nohz_stop_sched_tick(ts);
487
488 local_irq_enable();
489}
490
491/**
492 * tick_nohz_irq_exit - update next tick event from interrupt exit
493 *
494 * When an interrupt fires while we are idle and it doesn't cause
495 * a reschedule, it may still add, modify or delete a timer, enqueue
496 * an RCU callback, etc...
497 * So we need to re-calculate and reprogram the next tick event.
498 */
499void tick_nohz_irq_exit(void)
500{
501 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
502
503 if (!ts->inidle)
504 return;
505
506 tick_nohz_stop_sched_tick(ts);
486} 507}
487 508
488/** 509/**
@@ -524,11 +545,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
524} 545}
525 546
526/** 547/**
527 * tick_nohz_restart_sched_tick - restart the idle tick from the idle task 548 * tick_nohz_idle_exit - restart the idle tick from the idle task
528 * 549 *
529 * Restart the idle tick when the CPU is woken up from idle 550 * Restart the idle tick when the CPU is woken up from idle
551 * This also exit the RCU extended quiescent state. The CPU
552 * can use RCU again after this function is called.
530 */ 553 */
531void tick_nohz_restart_sched_tick(void) 554void tick_nohz_idle_exit(void)
532{ 555{
533 int cpu = smp_processor_id(); 556 int cpu = smp_processor_id();
534 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 557 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
@@ -538,6 +561,7 @@ void tick_nohz_restart_sched_tick(void)
538 ktime_t now; 561 ktime_t now;
539 562
540 local_irq_disable(); 563 local_irq_disable();
564
541 if (ts->idle_active || (ts->inidle && ts->tick_stopped)) 565 if (ts->idle_active || (ts->inidle && ts->tick_stopped))
542 now = ktime_get(); 566 now = ktime_get();
543 567
@@ -552,8 +576,6 @@ void tick_nohz_restart_sched_tick(void)
552 576
553 ts->inidle = 0; 577 ts->inidle = 0;
554 578
555 rcu_exit_nohz();
556
557 /* Update jiffies first */ 579 /* Update jiffies first */
558 select_nohz_load_balancer(0); 580 select_nohz_load_balancer(0);
559 tick_do_update_jiffies64(now); 581 tick_do_update_jiffies64(now);
diff --git a/kernel/timer.c b/kernel/timer.c
index 9c3c62b0c4bc..a297ffcf888e 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -427,6 +427,12 @@ static int timer_fixup_init(void *addr, enum debug_obj_state state)
427 } 427 }
428} 428}
429 429
430/* Stub timer callback for improperly used timers. */
431static void stub_timer(unsigned long data)
432{
433 WARN_ON(1);
434}
435
430/* 436/*
431 * fixup_activate is called when: 437 * fixup_activate is called when:
432 * - an active object is activated 438 * - an active object is activated
@@ -450,7 +456,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
450 debug_object_activate(timer, &timer_debug_descr); 456 debug_object_activate(timer, &timer_debug_descr);
451 return 0; 457 return 0;
452 } else { 458 } else {
453 WARN_ON_ONCE(1); 459 setup_timer(timer, stub_timer, 0);
460 return 1;
454 } 461 }
455 return 0; 462 return 0;
456 463
@@ -480,12 +487,40 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
480 } 487 }
481} 488}
482 489
490/*
491 * fixup_assert_init is called when:
492 * - an untracked/uninit-ed object is found
493 */
494static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
495{
496 struct timer_list *timer = addr;
497
498 switch (state) {
499 case ODEBUG_STATE_NOTAVAILABLE:
500 if (timer->entry.prev == TIMER_ENTRY_STATIC) {
501 /*
502 * This is not really a fixup. The timer was
503 * statically initialized. We just make sure that it
504 * is tracked in the object tracker.
505 */
506 debug_object_init(timer, &timer_debug_descr);
507 return 0;
508 } else {
509 setup_timer(timer, stub_timer, 0);
510 return 1;
511 }
512 default:
513 return 0;
514 }
515}
516
483static struct debug_obj_descr timer_debug_descr = { 517static struct debug_obj_descr timer_debug_descr = {
484 .name = "timer_list", 518 .name = "timer_list",
485 .debug_hint = timer_debug_hint, 519 .debug_hint = timer_debug_hint,
486 .fixup_init = timer_fixup_init, 520 .fixup_init = timer_fixup_init,
487 .fixup_activate = timer_fixup_activate, 521 .fixup_activate = timer_fixup_activate,
488 .fixup_free = timer_fixup_free, 522 .fixup_free = timer_fixup_free,
523 .fixup_assert_init = timer_fixup_assert_init,
489}; 524};
490 525
491static inline void debug_timer_init(struct timer_list *timer) 526static inline void debug_timer_init(struct timer_list *timer)
@@ -508,6 +543,11 @@ static inline void debug_timer_free(struct timer_list *timer)
508 debug_object_free(timer, &timer_debug_descr); 543 debug_object_free(timer, &timer_debug_descr);
509} 544}
510 545
546static inline void debug_timer_assert_init(struct timer_list *timer)
547{
548 debug_object_assert_init(timer, &timer_debug_descr);
549}
550
511static void __init_timer(struct timer_list *timer, 551static void __init_timer(struct timer_list *timer,
512 const char *name, 552 const char *name,
513 struct lock_class_key *key); 553 struct lock_class_key *key);
@@ -531,6 +571,7 @@ EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
531static inline void debug_timer_init(struct timer_list *timer) { } 571static inline void debug_timer_init(struct timer_list *timer) { }
532static inline void debug_timer_activate(struct timer_list *timer) { } 572static inline void debug_timer_activate(struct timer_list *timer) { }
533static inline void debug_timer_deactivate(struct timer_list *timer) { } 573static inline void debug_timer_deactivate(struct timer_list *timer) { }
574static inline void debug_timer_assert_init(struct timer_list *timer) { }
534#endif 575#endif
535 576
536static inline void debug_init(struct timer_list *timer) 577static inline void debug_init(struct timer_list *timer)
@@ -552,6 +593,11 @@ static inline void debug_deactivate(struct timer_list *timer)
552 trace_timer_cancel(timer); 593 trace_timer_cancel(timer);
553} 594}
554 595
596static inline void debug_assert_init(struct timer_list *timer)
597{
598 debug_timer_assert_init(timer);
599}
600
555static void __init_timer(struct timer_list *timer, 601static void __init_timer(struct timer_list *timer,
556 const char *name, 602 const char *name,
557 struct lock_class_key *key) 603 struct lock_class_key *key)
@@ -902,6 +948,8 @@ int del_timer(struct timer_list *timer)
902 unsigned long flags; 948 unsigned long flags;
903 int ret = 0; 949 int ret = 0;
904 950
951 debug_assert_init(timer);
952
905 timer_stats_timer_clear_start_info(timer); 953 timer_stats_timer_clear_start_info(timer);
906 if (timer_pending(timer)) { 954 if (timer_pending(timer)) {
907 base = lock_timer_base(timer, &flags); 955 base = lock_timer_base(timer, &flags);
@@ -932,6 +980,8 @@ int try_to_del_timer_sync(struct timer_list *timer)
932 unsigned long flags; 980 unsigned long flags;
933 int ret = -1; 981 int ret = -1;
934 982
983 debug_assert_init(timer);
984
935 base = lock_timer_base(timer, &flags); 985 base = lock_timer_base(timer, &flags);
936 986
937 if (base->running_timer == timer) 987 if (base->running_timer == timer)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f2bd275bb60f..91dc4bc8bf72 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -338,7 +338,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
338/* trace_flags holds trace_options default values */ 338/* trace_flags holds trace_options default values */
339unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 339unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
341 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE; 341 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
342 TRACE_ITER_IRQ_INFO;
342 343
343static int trace_stop_count; 344static int trace_stop_count;
344static DEFINE_RAW_SPINLOCK(tracing_start_lock); 345static DEFINE_RAW_SPINLOCK(tracing_start_lock);
@@ -426,6 +427,7 @@ static const char *trace_options[] = {
426 "record-cmd", 427 "record-cmd",
427 "overwrite", 428 "overwrite",
428 "disable_on_free", 429 "disable_on_free",
430 "irq-info",
429 NULL 431 NULL
430}; 432};
431 433
@@ -1843,6 +1845,33 @@ static void s_stop(struct seq_file *m, void *p)
1843 trace_event_read_unlock(); 1845 trace_event_read_unlock();
1844} 1846}
1845 1847
1848static void
1849get_total_entries(struct trace_array *tr, unsigned long *total, unsigned long *entries)
1850{
1851 unsigned long count;
1852 int cpu;
1853
1854 *total = 0;
1855 *entries = 0;
1856
1857 for_each_tracing_cpu(cpu) {
1858 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1859 /*
1860 * If this buffer has skipped entries, then we hold all
1861 * entries for the trace and we need to ignore the
1862 * ones before the time stamp.
1863 */
1864 if (tr->data[cpu]->skipped_entries) {
1865 count -= tr->data[cpu]->skipped_entries;
1866 /* total is the same as the entries */
1867 *total += count;
1868 } else
1869 *total += count +
1870 ring_buffer_overrun_cpu(tr->buffer, cpu);
1871 *entries += count;
1872 }
1873}
1874
1846static void print_lat_help_header(struct seq_file *m) 1875static void print_lat_help_header(struct seq_file *m)
1847{ 1876{
1848 seq_puts(m, "# _------=> CPU# \n"); 1877 seq_puts(m, "# _------=> CPU# \n");
@@ -1855,12 +1884,35 @@ static void print_lat_help_header(struct seq_file *m)
1855 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1884 seq_puts(m, "# \\ / ||||| \\ | / \n");
1856} 1885}
1857 1886
1858static void print_func_help_header(struct seq_file *m) 1887static void print_event_info(struct trace_array *tr, struct seq_file *m)
1888{
1889 unsigned long total;
1890 unsigned long entries;
1891
1892 get_total_entries(tr, &total, &entries);
1893 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
1894 entries, total, num_online_cpus());
1895 seq_puts(m, "#\n");
1896}
1897
1898static void print_func_help_header(struct trace_array *tr, struct seq_file *m)
1859{ 1899{
1860 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1900 print_event_info(tr, m);
1901 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1861 seq_puts(m, "# | | | | |\n"); 1902 seq_puts(m, "# | | | | |\n");
1862} 1903}
1863 1904
1905static void print_func_help_header_irq(struct trace_array *tr, struct seq_file *m)
1906{
1907 print_event_info(tr, m);
1908 seq_puts(m, "# _-----=> irqs-off\n");
1909 seq_puts(m, "# / _----=> need-resched\n");
1910 seq_puts(m, "# | / _---=> hardirq/softirq\n");
1911 seq_puts(m, "# || / _--=> preempt-depth\n");
1912 seq_puts(m, "# ||| / delay\n");
1913 seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n");
1914 seq_puts(m, "# | | | |||| | |\n");
1915}
1864 1916
1865void 1917void
1866print_trace_header(struct seq_file *m, struct trace_iterator *iter) 1918print_trace_header(struct seq_file *m, struct trace_iterator *iter)
@@ -1869,32 +1921,14 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1869 struct trace_array *tr = iter->tr; 1921 struct trace_array *tr = iter->tr;
1870 struct trace_array_cpu *data = tr->data[tr->cpu]; 1922 struct trace_array_cpu *data = tr->data[tr->cpu];
1871 struct tracer *type = current_trace; 1923 struct tracer *type = current_trace;
1872 unsigned long entries = 0; 1924 unsigned long entries;
1873 unsigned long total = 0; 1925 unsigned long total;
1874 unsigned long count;
1875 const char *name = "preemption"; 1926 const char *name = "preemption";
1876 int cpu;
1877 1927
1878 if (type) 1928 if (type)
1879 name = type->name; 1929 name = type->name;
1880 1930
1881 1931 get_total_entries(tr, &total, &entries);
1882 for_each_tracing_cpu(cpu) {
1883 count = ring_buffer_entries_cpu(tr->buffer, cpu);
1884 /*
1885 * If this buffer has skipped entries, then we hold all
1886 * entries for the trace and we need to ignore the
1887 * ones before the time stamp.
1888 */
1889 if (tr->data[cpu]->skipped_entries) {
1890 count -= tr->data[cpu]->skipped_entries;
1891 /* total is the same as the entries */
1892 total += count;
1893 } else
1894 total += count +
1895 ring_buffer_overrun_cpu(tr->buffer, cpu);
1896 entries += count;
1897 }
1898 1932
1899 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 1933 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
1900 name, UTS_RELEASE); 1934 name, UTS_RELEASE);
@@ -2140,6 +2174,21 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
2140 return print_trace_fmt(iter); 2174 return print_trace_fmt(iter);
2141} 2175}
2142 2176
2177void trace_latency_header(struct seq_file *m)
2178{
2179 struct trace_iterator *iter = m->private;
2180
2181 /* print nothing if the buffers are empty */
2182 if (trace_empty(iter))
2183 return;
2184
2185 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2186 print_trace_header(m, iter);
2187
2188 if (!(trace_flags & TRACE_ITER_VERBOSE))
2189 print_lat_help_header(m);
2190}
2191
2143void trace_default_header(struct seq_file *m) 2192void trace_default_header(struct seq_file *m)
2144{ 2193{
2145 struct trace_iterator *iter = m->private; 2194 struct trace_iterator *iter = m->private;
@@ -2155,8 +2204,12 @@ void trace_default_header(struct seq_file *m)
2155 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2204 if (!(trace_flags & TRACE_ITER_VERBOSE))
2156 print_lat_help_header(m); 2205 print_lat_help_header(m);
2157 } else { 2206 } else {
2158 if (!(trace_flags & TRACE_ITER_VERBOSE)) 2207 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2159 print_func_help_header(m); 2208 if (trace_flags & TRACE_ITER_IRQ_INFO)
2209 print_func_help_header_irq(iter->tr, m);
2210 else
2211 print_func_help_header(iter->tr, m);
2212 }
2160 } 2213 }
2161} 2214}
2162 2215
@@ -4775,6 +4828,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4775{ 4828{
4776 __ftrace_dump(true, oops_dump_mode); 4829 __ftrace_dump(true, oops_dump_mode);
4777} 4830}
4831EXPORT_SYMBOL_GPL(ftrace_dump);
4778 4832
4779__init static int tracer_alloc_buffers(void) 4833__init static int tracer_alloc_buffers(void)
4780{ 4834{
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 092e1f8d18dc..2c2657462ac3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -370,6 +370,7 @@ void trace_graph_function(struct trace_array *tr,
370 unsigned long ip, 370 unsigned long ip,
371 unsigned long parent_ip, 371 unsigned long parent_ip,
372 unsigned long flags, int pc); 372 unsigned long flags, int pc);
373void trace_latency_header(struct seq_file *m);
373void trace_default_header(struct seq_file *m); 374void trace_default_header(struct seq_file *m);
374void print_trace_header(struct seq_file *m, struct trace_iterator *iter); 375void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
375int trace_empty(struct trace_iterator *iter); 376int trace_empty(struct trace_iterator *iter);
@@ -654,6 +655,7 @@ enum trace_iterator_flags {
654 TRACE_ITER_RECORD_CMD = 0x100000, 655 TRACE_ITER_RECORD_CMD = 0x100000,
655 TRACE_ITER_OVERWRITE = 0x200000, 656 TRACE_ITER_OVERWRITE = 0x200000,
656 TRACE_ITER_STOP_ON_FREE = 0x400000, 657 TRACE_ITER_STOP_ON_FREE = 0x400000,
658 TRACE_ITER_IRQ_INFO = 0x800000,
657}; 659};
658 660
659/* 661/*
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 95dc31efd6dd..f04cc3136bd3 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -27,6 +27,12 @@
27#include "trace.h" 27#include "trace.h"
28#include "trace_output.h" 28#include "trace_output.h"
29 29
30#define DEFAULT_SYS_FILTER_MESSAGE \
31 "### global filter ###\n" \
32 "# Use this to set filters for multiple events.\n" \
33 "# Only events with the given fields will be affected.\n" \
34 "# If no events are modified, an error message will be displayed here"
35
30enum filter_op_ids 36enum filter_op_ids
31{ 37{
32 OP_OR, 38 OP_OR,
@@ -646,7 +652,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
646 if (filter && filter->filter_string) 652 if (filter && filter->filter_string)
647 trace_seq_printf(s, "%s\n", filter->filter_string); 653 trace_seq_printf(s, "%s\n", filter->filter_string);
648 else 654 else
649 trace_seq_printf(s, "none\n"); 655 trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
650 mutex_unlock(&event_mutex); 656 mutex_unlock(&event_mutex);
651} 657}
652 658
@@ -1838,7 +1844,10 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1838 if (!filter) 1844 if (!filter)
1839 goto out; 1845 goto out;
1840 1846
1841 replace_filter_string(filter, filter_string); 1847 /* System filters just show a default message */
1848 kfree(filter->filter_string);
1849 filter->filter_string = NULL;
1850
1842 /* 1851 /*
1843 * No event actually uses the system filter 1852 * No event actually uses the system filter
1844 * we can free it without synchronize_sched(). 1853 * we can free it without synchronize_sched().
@@ -1848,14 +1857,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1848 1857
1849 parse_init(ps, filter_ops, filter_string); 1858 parse_init(ps, filter_ops, filter_string);
1850 err = filter_parse(ps); 1859 err = filter_parse(ps);
1851 if (err) { 1860 if (err)
1852 append_filter_err(ps, system->filter); 1861 goto err_filter;
1853 goto out;
1854 }
1855 1862
1856 err = replace_system_preds(system, ps, filter_string); 1863 err = replace_system_preds(system, ps, filter_string);
1857 if (err) 1864 if (err)
1858 append_filter_err(ps, system->filter); 1865 goto err_filter;
1859 1866
1860out: 1867out:
1861 filter_opstack_clear(ps); 1868 filter_opstack_clear(ps);
@@ -1865,6 +1872,11 @@ out_unlock:
1865 mutex_unlock(&event_mutex); 1872 mutex_unlock(&event_mutex);
1866 1873
1867 return err; 1874 return err;
1875
1876err_filter:
1877 replace_filter_string(filter, filter_string);
1878 append_filter_err(ps, system->filter);
1879 goto out;
1868} 1880}
1869 1881
1870#ifdef CONFIG_PERF_EVENTS 1882#ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 20dad0d7a163..99d20e920368 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -280,9 +280,20 @@ static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
280} 280}
281 281
282static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { } 282static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
283static void irqsoff_print_header(struct seq_file *s) { }
284static void irqsoff_trace_open(struct trace_iterator *iter) { } 283static void irqsoff_trace_open(struct trace_iterator *iter) { }
285static void irqsoff_trace_close(struct trace_iterator *iter) { } 284static void irqsoff_trace_close(struct trace_iterator *iter) { }
285
286#ifdef CONFIG_FUNCTION_TRACER
287static void irqsoff_print_header(struct seq_file *s)
288{
289 trace_default_header(s);
290}
291#else
292static void irqsoff_print_header(struct seq_file *s)
293{
294 trace_latency_header(s);
295}
296#endif /* CONFIG_FUNCTION_TRACER */
286#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 297#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
287 298
288/* 299/*
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 51999309a6cf..0d6ff3555942 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -627,11 +627,23 @@ int trace_print_context(struct trace_iterator *iter)
627 unsigned long usec_rem = do_div(t, USEC_PER_SEC); 627 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
628 unsigned long secs = (unsigned long)t; 628 unsigned long secs = (unsigned long)t;
629 char comm[TASK_COMM_LEN]; 629 char comm[TASK_COMM_LEN];
630 int ret;
630 631
631 trace_find_cmdline(entry->pid, comm); 632 trace_find_cmdline(entry->pid, comm);
632 633
633 return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ", 634 ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
634 comm, entry->pid, iter->cpu, secs, usec_rem); 635 comm, entry->pid, iter->cpu);
636 if (!ret)
637 return 0;
638
639 if (trace_flags & TRACE_ITER_IRQ_INFO) {
640 ret = trace_print_lat_fmt(s, entry);
641 if (!ret)
642 return 0;
643 }
644
645 return trace_seq_printf(s, " %5lu.%06lu: ",
646 secs, usec_rem);
635} 647}
636 648
637int trace_print_lat_context(struct trace_iterator *iter) 649int trace_print_lat_context(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e4a70c0c71b6..ff791ea48b57 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -280,9 +280,20 @@ static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
280} 280}
281 281
282static void wakeup_graph_return(struct ftrace_graph_ret *trace) { } 282static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
283static void wakeup_print_header(struct seq_file *s) { }
284static void wakeup_trace_open(struct trace_iterator *iter) { } 283static void wakeup_trace_open(struct trace_iterator *iter) { }
285static void wakeup_trace_close(struct trace_iterator *iter) { } 284static void wakeup_trace_close(struct trace_iterator *iter) { }
285
286#ifdef CONFIG_FUNCTION_TRACER
287static void wakeup_print_header(struct seq_file *s)
288{
289 trace_default_header(s);
290}
291#else
292static void wakeup_print_header(struct seq_file *s)
293{
294 trace_latency_header(s);
295}
296#endif /* CONFIG_FUNCTION_TRACER */
286#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 297#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
287 298
288/* 299/*
diff --git a/kernel/wait.c b/kernel/wait.c
index 26fa7797f90f..7fdd9eaca2c3 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,10 +10,10 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key) 13void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
14{ 14{
15 spin_lock_init(&q->lock); 15 spin_lock_init(&q->lock);
16 lockdep_set_class(&q->lock, key); 16 lockdep_set_class_and_name(&q->lock, key, name);
17 INIT_LIST_HEAD(&q->task_list); 17 INIT_LIST_HEAD(&q->task_list);
18} 18}
19 19
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a78b7c6e042c..77cb245f8e7b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -268,12 +268,16 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
268 * Try to repair the damage, so we have a better chance to get useful 268 * Try to repair the damage, so we have a better chance to get useful
269 * debug output. 269 * debug output.
270 */ 270 */
271static void 271static int
272debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state), 272debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
273 void * addr, enum debug_obj_state state) 273 void * addr, enum debug_obj_state state)
274{ 274{
275 int fixed = 0;
276
275 if (fixup) 277 if (fixup)
276 debug_objects_fixups += fixup(addr, state); 278 fixed = fixup(addr, state);
279 debug_objects_fixups += fixed;
280 return fixed;
277} 281}
278 282
279static void debug_object_is_on_stack(void *addr, int onstack) 283static void debug_object_is_on_stack(void *addr, int onstack)
@@ -386,6 +390,9 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
386 struct debug_bucket *db; 390 struct debug_bucket *db;
387 struct debug_obj *obj; 391 struct debug_obj *obj;
388 unsigned long flags; 392 unsigned long flags;
393 struct debug_obj o = { .object = addr,
394 .state = ODEBUG_STATE_NOTAVAILABLE,
395 .descr = descr };
389 396
390 if (!debug_objects_enabled) 397 if (!debug_objects_enabled)
391 return; 398 return;
@@ -425,8 +432,9 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
425 * let the type specific code decide whether this is 432 * let the type specific code decide whether this is
426 * true or not. 433 * true or not.
427 */ 434 */
428 debug_object_fixup(descr->fixup_activate, addr, 435 if (debug_object_fixup(descr->fixup_activate, addr,
429 ODEBUG_STATE_NOTAVAILABLE); 436 ODEBUG_STATE_NOTAVAILABLE))
437 debug_print_object(&o, "activate");
430} 438}
431 439
432/** 440/**
@@ -563,6 +571,44 @@ out_unlock:
563} 571}
564 572
565/** 573/**
574 * debug_object_assert_init - debug checks when object should be init-ed
575 * @addr: address of the object
576 * @descr: pointer to an object specific debug description structure
577 */
578void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
579{
580 struct debug_bucket *db;
581 struct debug_obj *obj;
582 unsigned long flags;
583
584 if (!debug_objects_enabled)
585 return;
586
587 db = get_bucket((unsigned long) addr);
588
589 raw_spin_lock_irqsave(&db->lock, flags);
590
591 obj = lookup_object(addr, db);
592 if (!obj) {
593 struct debug_obj o = { .object = addr,
594 .state = ODEBUG_STATE_NOTAVAILABLE,
595 .descr = descr };
596
597 raw_spin_unlock_irqrestore(&db->lock, flags);
598 /*
599 * Maybe the object is static. Let the type specific
600 * code decide what to do.
601 */
602 if (debug_object_fixup(descr->fixup_assert_init, addr,
603 ODEBUG_STATE_NOTAVAILABLE))
604 debug_print_object(&o, "assert_init");
605 return;
606 }
607
608 raw_spin_unlock_irqrestore(&db->lock, flags);
609}
610
611/**
566 * debug_object_active_state - debug checks object usage state machine 612 * debug_object_active_state - debug checks object usage state machine
567 * @addr: address of the object 613 * @addr: address of the object
568 * @descr: pointer to an object specific debug description structure 614 * @descr: pointer to an object specific debug description structure
diff --git a/mm/Kconfig b/mm/Kconfig
index 011b110365c8..e338407f1225 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP
131config HAVE_MEMBLOCK 131config HAVE_MEMBLOCK
132 boolean 132 boolean
133 133
134config HAVE_MEMBLOCK_NODE_MAP
135 boolean
136
137config ARCH_DISCARD_MEMBLOCK
138 boolean
139
134config NO_BOOTMEM 140config NO_BOOTMEM
135 boolean 141 boolean
136 142
diff --git a/mm/filemap.c b/mm/filemap.c
index c106d3b3cc64..5f0a3c91fdac 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1828,7 +1828,7 @@ repeat:
1828 page = __page_cache_alloc(gfp | __GFP_COLD); 1828 page = __page_cache_alloc(gfp | __GFP_COLD);
1829 if (!page) 1829 if (!page)
1830 return ERR_PTR(-ENOMEM); 1830 return ERR_PTR(-ENOMEM);
1831 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); 1831 err = add_to_page_cache_lru(page, mapping, index, gfp);
1832 if (unlikely(err)) { 1832 if (unlikely(err)) {
1833 page_cache_release(page); 1833 page_cache_release(page);
1834 if (err == -EEXIST) 1834 if (err == -EEXIST)
@@ -1925,10 +1925,7 @@ static struct page *wait_on_page_read(struct page *page)
1925 * @gfp: the page allocator flags to use if allocating 1925 * @gfp: the page allocator flags to use if allocating
1926 * 1926 *
1927 * This is the same as "read_mapping_page(mapping, index, NULL)", but with 1927 * This is the same as "read_mapping_page(mapping, index, NULL)", but with
1928 * any new page allocations done using the specified allocation flags. Note 1928 * any new page allocations done using the specified allocation flags.
1929 * that the Radix tree operations will still use GFP_KERNEL, so you can't
1930 * expect to do this atomically or anything like that - but you can pass in
1931 * other page requirements.
1932 * 1929 *
1933 * If the page does not get brought uptodate, return -EIO. 1930 * If the page does not get brought uptodate, return -EIO.
1934 */ 1931 */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 73f17c0293c0..2316840b337a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -901,7 +901,6 @@ retry:
901 h->resv_huge_pages += delta; 901 h->resv_huge_pages += delta;
902 ret = 0; 902 ret = 0;
903 903
904 spin_unlock(&hugetlb_lock);
905 /* Free the needed pages to the hugetlb pool */ 904 /* Free the needed pages to the hugetlb pool */
906 list_for_each_entry_safe(page, tmp, &surplus_list, lru) { 905 list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
907 if ((--needed) < 0) 906 if ((--needed) < 0)
@@ -915,6 +914,7 @@ retry:
915 VM_BUG_ON(page_count(page)); 914 VM_BUG_ON(page_count(page));
916 enqueue_huge_page(h, page); 915 enqueue_huge_page(h, page);
917 } 916 }
917 spin_unlock(&hugetlb_lock);
918 918
919 /* Free unnecessary surplus pages to the buddy allocator */ 919 /* Free unnecessary surplus pages to the buddy allocator */
920free: 920free:
diff --git a/mm/memblock.c b/mm/memblock.c
index 84bec4969ed5..2f55f19b7c86 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,23 @@
20#include <linux/seq_file.h> 20#include <linux/seq_file.h>
21#include <linux/memblock.h> 21#include <linux/memblock.h>
22 22
23struct memblock memblock __initdata_memblock; 23static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
24static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
25
26struct memblock memblock __initdata_memblock = {
27 .memory.regions = memblock_memory_init_regions,
28 .memory.cnt = 1, /* empty dummy entry */
29 .memory.max = INIT_MEMBLOCK_REGIONS,
30
31 .reserved.regions = memblock_reserved_init_regions,
32 .reserved.cnt = 1, /* empty dummy entry */
33 .reserved.max = INIT_MEMBLOCK_REGIONS,
34
35 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
36};
24 37
25int memblock_debug __initdata_memblock; 38int memblock_debug __initdata_memblock;
26int memblock_can_resize __initdata_memblock; 39static int memblock_can_resize __initdata_memblock;
27static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
28static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
29 40
30/* inline so we don't get a warning when pr_debug is compiled out */ 41/* inline so we don't get a warning when pr_debug is compiled out */
31static inline const char *memblock_type_name(struct memblock_type *type) 42static inline const char *memblock_type_name(struct memblock_type *type)
@@ -38,20 +49,15 @@ static inline const char *memblock_type_name(struct memblock_type *type)
38 return "unknown"; 49 return "unknown";
39} 50}
40 51
41/* 52/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
42 * Address comparison utilities 53static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
43 */
44
45static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size)
46{
47 return addr & ~(size - 1);
48}
49
50static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size)
51{ 54{
52 return (addr + (size - 1)) & ~(size - 1); 55 return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
53} 56}
54 57
58/*
59 * Address comparison utilities
60 */
55static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, 61static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
56 phys_addr_t base2, phys_addr_t size2) 62 phys_addr_t base2, phys_addr_t size2)
57{ 63{
@@ -73,83 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
73 return (i < type->cnt) ? i : -1; 79 return (i < type->cnt) ? i : -1;
74} 80}
75 81
76/* 82/**
77 * Find, allocate, deallocate or reserve unreserved regions. All allocations 83 * memblock_find_in_range_node - find free area in given range and node
78 * are top-down. 84 * @start: start of candidate range
85 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
86 * @size: size of free area to find
87 * @align: alignment of free area to find
88 * @nid: nid of the free area to find, %MAX_NUMNODES for any node
89 *
90 * Find @size free area aligned to @align in the specified range and node.
91 *
92 * RETURNS:
93 * Found address on success, %0 on failure.
79 */ 94 */
80 95phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
81static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, 96 phys_addr_t end, phys_addr_t size,
82 phys_addr_t size, phys_addr_t align) 97 phys_addr_t align, int nid)
83{ 98{
84 phys_addr_t base, res_base; 99 phys_addr_t this_start, this_end, cand;
85 long j; 100 u64 i;
86
87 /* In case, huge size is requested */
88 if (end < size)
89 return MEMBLOCK_ERROR;
90
91 base = memblock_align_down((end - size), align);
92 101
93 /* Prevent allocations returning 0 as it's also used to 102 /* align @size to avoid excessive fragmentation on reserved array */
94 * indicate an allocation failure 103 size = round_up(size, align);
95 */
96 if (start == 0)
97 start = PAGE_SIZE;
98
99 while (start <= base) {
100 j = memblock_overlaps_region(&memblock.reserved, base, size);
101 if (j < 0)
102 return base;
103 res_base = memblock.reserved.regions[j].base;
104 if (res_base < size)
105 break;
106 base = memblock_align_down(res_base - size, align);
107 }
108 104
109 return MEMBLOCK_ERROR; 105 /* pump up @end */
110}
111
112static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
113 phys_addr_t align, phys_addr_t start, phys_addr_t end)
114{
115 long i;
116
117 BUG_ON(0 == size);
118
119 /* Pump up max_addr */
120 if (end == MEMBLOCK_ALLOC_ACCESSIBLE) 106 if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
121 end = memblock.current_limit; 107 end = memblock.current_limit;
122 108
123 /* We do a top-down search, this tends to limit memory 109 /* adjust @start to avoid underflow and allocating the first page */
124 * fragmentation by keeping early boot allocs near the 110 start = max3(start, size, (phys_addr_t)PAGE_SIZE);
125 * top of memory 111 end = max(start, end);
126 */
127 for (i = memblock.memory.cnt - 1; i >= 0; i--) {
128 phys_addr_t memblockbase = memblock.memory.regions[i].base;
129 phys_addr_t memblocksize = memblock.memory.regions[i].size;
130 phys_addr_t bottom, top, found;
131 112
132 if (memblocksize < size) 113 for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
133 continue; 114 this_start = clamp(this_start, start, end);
134 if ((memblockbase + memblocksize) <= start) 115 this_end = clamp(this_end, start, end);
135 break; 116
136 bottom = max(memblockbase, start); 117 cand = round_down(this_end - size, align);
137 top = min(memblockbase + memblocksize, end); 118 if (cand >= this_start)
138 if (bottom >= top) 119 return cand;
139 continue;
140 found = memblock_find_region(bottom, top, size, align);
141 if (found != MEMBLOCK_ERROR)
142 return found;
143 } 120 }
144 return MEMBLOCK_ERROR; 121 return 0;
145} 122}
146 123
147/* 124/**
148 * Find a free area with specified alignment in a specific range. 125 * memblock_find_in_range - find free area in given range
126 * @start: start of candidate range
127 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
128 * @size: size of free area to find
129 * @align: alignment of free area to find
130 *
131 * Find @size free area aligned to @align in the specified range.
132 *
133 * RETURNS:
134 * Found address on success, %0 on failure.
149 */ 135 */
150u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) 136phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
137 phys_addr_t end, phys_addr_t size,
138 phys_addr_t align)
151{ 139{
152 return memblock_find_base(size, align, start, end); 140 return memblock_find_in_range_node(start, end, size, align,
141 MAX_NUMNODES);
153} 142}
154 143
155/* 144/*
@@ -178,25 +167,21 @@ int __init_memblock memblock_reserve_reserved_regions(void)
178 167
179static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 168static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
180{ 169{
181 unsigned long i; 170 type->total_size -= type->regions[r].size;
182 171 memmove(&type->regions[r], &type->regions[r + 1],
183 for (i = r; i < type->cnt - 1; i++) { 172 (type->cnt - (r + 1)) * sizeof(type->regions[r]));
184 type->regions[i].base = type->regions[i + 1].base;
185 type->regions[i].size = type->regions[i + 1].size;
186 }
187 type->cnt--; 173 type->cnt--;
188 174
189 /* Special case for empty arrays */ 175 /* Special case for empty arrays */
190 if (type->cnt == 0) { 176 if (type->cnt == 0) {
177 WARN_ON(type->total_size != 0);
191 type->cnt = 1; 178 type->cnt = 1;
192 type->regions[0].base = 0; 179 type->regions[0].base = 0;
193 type->regions[0].size = 0; 180 type->regions[0].size = 0;
181 memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
194 } 182 }
195} 183}
196 184
197/* Defined below but needed now */
198static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
199
200static int __init_memblock memblock_double_array(struct memblock_type *type) 185static int __init_memblock memblock_double_array(struct memblock_type *type)
201{ 186{
202 struct memblock_region *new_array, *old_array; 187 struct memblock_region *new_array, *old_array;
@@ -226,10 +211,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
226 */ 211 */
227 if (use_slab) { 212 if (use_slab) {
228 new_array = kmalloc(new_size, GFP_KERNEL); 213 new_array = kmalloc(new_size, GFP_KERNEL);
229 addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); 214 addr = new_array ? __pa(new_array) : 0;
230 } else 215 } else
231 addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); 216 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
232 if (addr == MEMBLOCK_ERROR) { 217 if (!addr) {
233 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", 218 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
234 memblock_type_name(type), type->max, type->max * 2); 219 memblock_type_name(type), type->max, type->max * 2);
235 return -1; 220 return -1;
@@ -254,7 +239,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
254 return 0; 239 return 0;
255 240
256 /* Add the new reserved region now. Should not fail ! */ 241 /* Add the new reserved region now. Should not fail ! */
257 BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size)); 242 BUG_ON(memblock_reserve(addr, new_size));
258 243
259 /* If the array wasn't our static init one, then free it. We only do 244 /* If the array wasn't our static init one, then free it. We only do
260 * that before SLAB is available as later on, we don't know whether 245 * that before SLAB is available as later on, we don't know whether
@@ -268,343 +253,514 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
268 return 0; 253 return 0;
269} 254}
270 255
271int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, 256/**
272 phys_addr_t addr2, phys_addr_t size2) 257 * memblock_merge_regions - merge neighboring compatible regions
273{ 258 * @type: memblock type to scan
274 return 1; 259 *
275} 260 * Scan @type and merge neighboring compatible regions.
276 261 */
277static long __init_memblock memblock_add_region(struct memblock_type *type, 262static void __init_memblock memblock_merge_regions(struct memblock_type *type)
278 phys_addr_t base, phys_addr_t size)
279{ 263{
280 phys_addr_t end = base + size; 264 int i = 0;
281 int i, slot = -1;
282
283 /* First try and coalesce this MEMBLOCK with others */
284 for (i = 0; i < type->cnt; i++) {
285 struct memblock_region *rgn = &type->regions[i];
286 phys_addr_t rend = rgn->base + rgn->size;
287 265
288 /* Exit if there's no possible hits */ 266 /* cnt never goes below 1 */
289 if (rgn->base > end || rgn->size == 0) 267 while (i < type->cnt - 1) {
290 break; 268 struct memblock_region *this = &type->regions[i];
269 struct memblock_region *next = &type->regions[i + 1];
291 270
292 /* Check if we are fully enclosed within an existing 271 if (this->base + this->size != next->base ||
293 * block 272 memblock_get_region_node(this) !=
294 */ 273 memblock_get_region_node(next)) {
295 if (rgn->base <= base && rend >= end) 274 BUG_ON(this->base + this->size > next->base);
296 return 0; 275 i++;
276 continue;
277 }
297 278
298 /* Check if we overlap or are adjacent with the bottom 279 this->size += next->size;
299 * of a block. 280 memmove(next, next + 1, (type->cnt - (i + 1)) * sizeof(*next));
300 */ 281 type->cnt--;
301 if (base < rgn->base && end >= rgn->base) { 282 }
302 /* If we can't coalesce, create a new block */ 283}
303 if (!memblock_memory_can_coalesce(base, size,
304 rgn->base,
305 rgn->size)) {
306 /* Overlap & can't coalesce are mutually
307 * exclusive, if you do that, be prepared
308 * for trouble
309 */
310 WARN_ON(end != rgn->base);
311 goto new_block;
312 }
313 /* We extend the bottom of the block down to our
314 * base
315 */
316 rgn->base = base;
317 rgn->size = rend - base;
318 284
319 /* Return if we have nothing else to allocate 285/**
320 * (fully coalesced) 286 * memblock_insert_region - insert new memblock region
321 */ 287 * @type: memblock type to insert into
322 if (rend >= end) 288 * @idx: index for the insertion point
323 return 0; 289 * @base: base address of the new region
290 * @size: size of the new region
291 *
292 * Insert new memblock region [@base,@base+@size) into @type at @idx.
293 * @type must already have extra room to accomodate the new region.
294 */
295static void __init_memblock memblock_insert_region(struct memblock_type *type,
296 int idx, phys_addr_t base,
297 phys_addr_t size, int nid)
298{
299 struct memblock_region *rgn = &type->regions[idx];
324 300
325 /* We continue processing from the end of the 301 BUG_ON(type->cnt >= type->max);
326 * coalesced block. 302 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
327 */ 303 rgn->base = base;
328 base = rend; 304 rgn->size = size;
329 size = end - base; 305 memblock_set_region_node(rgn, nid);
330 } 306 type->cnt++;
307 type->total_size += size;
308}
331 309
332 /* Now check if we overlap or are adjacent with the 310/**
333 * top of a block 311 * memblock_add_region - add new memblock region
334 */ 312 * @type: memblock type to add new region into
335 if (base <= rend && end >= rend) { 313 * @base: base address of the new region
336 /* If we can't coalesce, create a new block */ 314 * @size: size of the new region
337 if (!memblock_memory_can_coalesce(rgn->base, 315 * @nid: nid of the new region
338 rgn->size, 316 *
339 base, size)) { 317 * Add new memblock region [@base,@base+@size) into @type. The new region
340 /* Overlap & can't coalesce are mutually 318 * is allowed to overlap with existing ones - overlaps don't affect already
341 * exclusive, if you do that, be prepared 319 * existing regions. @type is guaranteed to be minimal (all neighbouring
342 * for trouble 320 * compatible regions are merged) after the addition.
343 */ 321 *
344 WARN_ON(rend != base); 322 * RETURNS:
345 goto new_block; 323 * 0 on success, -errno on failure.
346 } 324 */
347 /* We adjust our base down to enclose the 325static int __init_memblock memblock_add_region(struct memblock_type *type,
348 * original block and destroy it. It will be 326 phys_addr_t base, phys_addr_t size, int nid)
349 * part of our new allocation. Since we've 327{
350 * freed an entry, we know we won't fail 328 bool insert = false;
351 * to allocate one later, so we won't risk 329 phys_addr_t obase = base;
352 * losing the original block allocation. 330 phys_addr_t end = base + memblock_cap_size(base, &size);
353 */ 331 int i, nr_new;
354 size += (base - rgn->base);
355 base = rgn->base;
356 memblock_remove_region(type, i--);
357 }
358 }
359 332
360 /* If the array is empty, special case, replace the fake 333 /* special case for empty array */
361 * filler region and return 334 if (type->regions[0].size == 0) {
362 */ 335 WARN_ON(type->cnt != 1 || type->total_size);
363 if ((type->cnt == 1) && (type->regions[0].size == 0)) {
364 type->regions[0].base = base; 336 type->regions[0].base = base;
365 type->regions[0].size = size; 337 type->regions[0].size = size;
338 memblock_set_region_node(&type->regions[0], nid);
339 type->total_size = size;
366 return 0; 340 return 0;
367 } 341 }
368 342repeat:
369 new_block: 343 /*
370 /* If we are out of space, we fail. It's too late to resize the array 344 * The following is executed twice. Once with %false @insert and
371 * but then this shouldn't have happened in the first place. 345 * then with %true. The first counts the number of regions needed
346 * to accomodate the new area. The second actually inserts them.
372 */ 347 */
373 if (WARN_ON(type->cnt >= type->max)) 348 base = obase;
374 return -1; 349 nr_new = 0;
375 350
376 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ 351 for (i = 0; i < type->cnt; i++) {
377 for (i = type->cnt - 1; i >= 0; i--) { 352 struct memblock_region *rgn = &type->regions[i];
378 if (base < type->regions[i].base) { 353 phys_addr_t rbase = rgn->base;
379 type->regions[i+1].base = type->regions[i].base; 354 phys_addr_t rend = rbase + rgn->size;
380 type->regions[i+1].size = type->regions[i].size; 355
381 } else { 356 if (rbase >= end)
382 type->regions[i+1].base = base;
383 type->regions[i+1].size = size;
384 slot = i + 1;
385 break; 357 break;
358 if (rend <= base)
359 continue;
360 /*
361 * @rgn overlaps. If it separates the lower part of new
362 * area, insert that portion.
363 */
364 if (rbase > base) {
365 nr_new++;
366 if (insert)
367 memblock_insert_region(type, i++, base,
368 rbase - base, nid);
386 } 369 }
370 /* area below @rend is dealt with, forget about it */
371 base = min(rend, end);
387 } 372 }
388 if (base < type->regions[0].base) { 373
389 type->regions[0].base = base; 374 /* insert the remaining portion */
390 type->regions[0].size = size; 375 if (base < end) {
391 slot = 0; 376 nr_new++;
377 if (insert)
378 memblock_insert_region(type, i, base, end - base, nid);
392 } 379 }
393 type->cnt++;
394 380
395 /* The array is full ? Try to resize it. If that fails, we undo 381 /*
396 * our allocation and return an error 382 * If this was the first round, resize array and repeat for actual
383 * insertions; otherwise, merge and return.
397 */ 384 */
398 if (type->cnt == type->max && memblock_double_array(type)) { 385 if (!insert) {
399 BUG_ON(slot < 0); 386 while (type->cnt + nr_new > type->max)
400 memblock_remove_region(type, slot); 387 if (memblock_double_array(type) < 0)
401 return -1; 388 return -ENOMEM;
389 insert = true;
390 goto repeat;
391 } else {
392 memblock_merge_regions(type);
393 return 0;
402 } 394 }
403
404 return 0;
405} 395}
406 396
407long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) 397int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
398 int nid)
408{ 399{
409 return memblock_add_region(&memblock.memory, base, size); 400 return memblock_add_region(&memblock.memory, base, size, nid);
401}
410 402
403int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
404{
405 return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES);
411} 406}
412 407
413static long __init_memblock __memblock_remove(struct memblock_type *type, 408/**
414 phys_addr_t base, phys_addr_t size) 409 * memblock_isolate_range - isolate given range into disjoint memblocks
410 * @type: memblock type to isolate range for
411 * @base: base of range to isolate
412 * @size: size of range to isolate
413 * @start_rgn: out parameter for the start of isolated region
414 * @end_rgn: out parameter for the end of isolated region
415 *
416 * Walk @type and ensure that regions don't cross the boundaries defined by
417 * [@base,@base+@size). Crossing regions are split at the boundaries,
418 * which may create at most two more regions. The index of the first
419 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
420 *
421 * RETURNS:
422 * 0 on success, -errno on failure.
423 */
424static int __init_memblock memblock_isolate_range(struct memblock_type *type,
425 phys_addr_t base, phys_addr_t size,
426 int *start_rgn, int *end_rgn)
415{ 427{
416 phys_addr_t end = base + size; 428 phys_addr_t end = base + memblock_cap_size(base, &size);
417 int i; 429 int i;
418 430
419 /* Walk through the array for collisions */ 431 *start_rgn = *end_rgn = 0;
432
433 /* we'll create at most two more regions */
434 while (type->cnt + 2 > type->max)
435 if (memblock_double_array(type) < 0)
436 return -ENOMEM;
437
420 for (i = 0; i < type->cnt; i++) { 438 for (i = 0; i < type->cnt; i++) {
421 struct memblock_region *rgn = &type->regions[i]; 439 struct memblock_region *rgn = &type->regions[i];
422 phys_addr_t rend = rgn->base + rgn->size; 440 phys_addr_t rbase = rgn->base;
441 phys_addr_t rend = rbase + rgn->size;
423 442
424 /* Nothing more to do, exit */ 443 if (rbase >= end)
425 if (rgn->base > end || rgn->size == 0)
426 break; 444 break;
427 445 if (rend <= base)
428 /* If we fully enclose the block, drop it */
429 if (base <= rgn->base && end >= rend) {
430 memblock_remove_region(type, i--);
431 continue; 446 continue;
432 }
433 447
434 /* If we are fully enclosed within a block 448 if (rbase < base) {
435 * then we need to split it and we are done 449 /*
436 */ 450 * @rgn intersects from below. Split and continue
437 if (base > rgn->base && end < rend) { 451 * to process the next region - the new top half.
438 rgn->size = base - rgn->base; 452 */
439 if (!memblock_add_region(type, end, rend - end)) 453 rgn->base = base;
440 return 0; 454 rgn->size -= base - rbase;
441 /* Failure to split is bad, we at least 455 type->total_size -= base - rbase;
442 * restore the block before erroring 456 memblock_insert_region(type, i, rbase, base - rbase,
457 memblock_get_region_node(rgn));
458 } else if (rend > end) {
459 /*
460 * @rgn intersects from above. Split and redo the
461 * current region - the new bottom half.
443 */ 462 */
444 rgn->size = rend - rgn->base;
445 WARN_ON(1);
446 return -1;
447 }
448
449 /* Check if we need to trim the bottom of a block */
450 if (rgn->base < end && rend > end) {
451 rgn->size -= end - rgn->base;
452 rgn->base = end; 463 rgn->base = end;
453 break; 464 rgn->size -= end - rbase;
465 type->total_size -= end - rbase;
466 memblock_insert_region(type, i--, rbase, end - rbase,
467 memblock_get_region_node(rgn));
468 } else {
469 /* @rgn is fully contained, record it */
470 if (!*end_rgn)
471 *start_rgn = i;
472 *end_rgn = i + 1;
454 } 473 }
474 }
455 475
456 /* And check if we need to trim the top of a block */ 476 return 0;
457 if (base < rend) 477}
458 rgn->size -= rend - base;
459 478
460 } 479static int __init_memblock __memblock_remove(struct memblock_type *type,
480 phys_addr_t base, phys_addr_t size)
481{
482 int start_rgn, end_rgn;
483 int i, ret;
484
485 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
486 if (ret)
487 return ret;
488
489 for (i = end_rgn - 1; i >= start_rgn; i--)
490 memblock_remove_region(type, i);
461 return 0; 491 return 0;
462} 492}
463 493
464long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) 494int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
465{ 495{
466 return __memblock_remove(&memblock.memory, base, size); 496 return __memblock_remove(&memblock.memory, base, size);
467} 497}
468 498
469long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) 499int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
470{ 500{
501 memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n",
502 (unsigned long long)base,
503 (unsigned long long)base + size,
504 (void *)_RET_IP_);
505
471 return __memblock_remove(&memblock.reserved, base, size); 506 return __memblock_remove(&memblock.reserved, base, size);
472} 507}
473 508
474long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) 509int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
475{ 510{
476 struct memblock_type *_rgn = &memblock.reserved; 511 struct memblock_type *_rgn = &memblock.reserved;
477 512
513 memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n",
514 (unsigned long long)base,
515 (unsigned long long)base + size,
516 (void *)_RET_IP_);
478 BUG_ON(0 == size); 517 BUG_ON(0 == size);
479 518
480 return memblock_add_region(_rgn, base, size); 519 return memblock_add_region(_rgn, base, size, MAX_NUMNODES);
481} 520}
482 521
483phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 522/**
523 * __next_free_mem_range - next function for for_each_free_mem_range()
524 * @idx: pointer to u64 loop variable
525 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
526 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
527 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
528 * @p_nid: ptr to int for nid of the range, can be %NULL
529 *
530 * Find the first free area from *@idx which matches @nid, fill the out
531 * parameters, and update *@idx for the next iteration. The lower 32bit of
532 * *@idx contains index into memory region and the upper 32bit indexes the
533 * areas before each reserved region. For example, if reserved regions
534 * look like the following,
535 *
536 * 0:[0-16), 1:[32-48), 2:[128-130)
537 *
538 * The upper 32bit indexes the following regions.
539 *
540 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
541 *
542 * As both region arrays are sorted, the function advances the two indices
543 * in lockstep and returns each intersection.
544 */
545void __init_memblock __next_free_mem_range(u64 *idx, int nid,
546 phys_addr_t *out_start,
547 phys_addr_t *out_end, int *out_nid)
484{ 548{
485 phys_addr_t found; 549 struct memblock_type *mem = &memblock.memory;
550 struct memblock_type *rsv = &memblock.reserved;
551 int mi = *idx & 0xffffffff;
552 int ri = *idx >> 32;
486 553
487 /* We align the size to limit fragmentation. Without this, a lot of 554 for ( ; mi < mem->cnt; mi++) {
488 * small allocs quickly eat up the whole reserve array on sparc 555 struct memblock_region *m = &mem->regions[mi];
489 */ 556 phys_addr_t m_start = m->base;
490 size = memblock_align_up(size, align); 557 phys_addr_t m_end = m->base + m->size;
491 558
492 found = memblock_find_base(size, align, 0, max_addr); 559 /* only memory regions are associated with nodes, check it */
493 if (found != MEMBLOCK_ERROR && 560 if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
494 !memblock_add_region(&memblock.reserved, found, size)) 561 continue;
495 return found;
496 562
497 return 0; 563 /* scan areas before each reservation for intersection */
564 for ( ; ri < rsv->cnt + 1; ri++) {
565 struct memblock_region *r = &rsv->regions[ri];
566 phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
567 phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
568
569 /* if ri advanced past mi, break out to advance mi */
570 if (r_start >= m_end)
571 break;
572 /* if the two regions intersect, we're done */
573 if (m_start < r_end) {
574 if (out_start)
575 *out_start = max(m_start, r_start);
576 if (out_end)
577 *out_end = min(m_end, r_end);
578 if (out_nid)
579 *out_nid = memblock_get_region_node(m);
580 /*
581 * The region which ends first is advanced
582 * for the next iteration.
583 */
584 if (m_end <= r_end)
585 mi++;
586 else
587 ri++;
588 *idx = (u32)mi | (u64)ri << 32;
589 return;
590 }
591 }
592 }
593
594 /* signal end of iteration */
595 *idx = ULLONG_MAX;
498} 596}
499 597
500phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) 598/**
599 * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
600 * @idx: pointer to u64 loop variable
601 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
602 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
603 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
604 * @p_nid: ptr to int for nid of the range, can be %NULL
605 *
606 * Reverse of __next_free_mem_range().
607 */
608void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
609 phys_addr_t *out_start,
610 phys_addr_t *out_end, int *out_nid)
501{ 611{
502 phys_addr_t alloc; 612 struct memblock_type *mem = &memblock.memory;
613 struct memblock_type *rsv = &memblock.reserved;
614 int mi = *idx & 0xffffffff;
615 int ri = *idx >> 32;
503 616
504 alloc = __memblock_alloc_base(size, align, max_addr); 617 if (*idx == (u64)ULLONG_MAX) {
618 mi = mem->cnt - 1;
619 ri = rsv->cnt;
620 }
505 621
506 if (alloc == 0) 622 for ( ; mi >= 0; mi--) {
507 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", 623 struct memblock_region *m = &mem->regions[mi];
508 (unsigned long long) size, (unsigned long long) max_addr); 624 phys_addr_t m_start = m->base;
625 phys_addr_t m_end = m->base + m->size;
509 626
510 return alloc; 627 /* only memory regions are associated with nodes, check it */
511} 628 if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m))
629 continue;
512 630
513phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) 631 /* scan areas before each reservation for intersection */
514{ 632 for ( ; ri >= 0; ri--) {
515 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); 633 struct memblock_region *r = &rsv->regions[ri];
516} 634 phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
635 phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
636
637 /* if ri advanced past mi, break out to advance mi */
638 if (r_end <= m_start)
639 break;
640 /* if the two regions intersect, we're done */
641 if (m_end > r_start) {
642 if (out_start)
643 *out_start = max(m_start, r_start);
644 if (out_end)
645 *out_end = min(m_end, r_end);
646 if (out_nid)
647 *out_nid = memblock_get_region_node(m);
648
649 if (m_start >= r_start)
650 mi--;
651 else
652 ri--;
653 *idx = (u32)mi | (u64)ri << 32;
654 return;
655 }
656 }
657 }
517 658
659 *idx = ULLONG_MAX;
660}
518 661
662#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
519/* 663/*
520 * Additional node-local allocators. Search for node memory is bottom up 664 * Common iterator interface used to define for_each_mem_range().
521 * and walks memblock regions within that node bottom-up as well, but allocation
522 * within an memblock region is top-down. XXX I plan to fix that at some stage
523 *
524 * WARNING: Only available after early_node_map[] has been populated,
525 * on some architectures, that is after all the calls to add_active_range()
526 * have been done to populate it.
527 */ 665 */
528 666void __init_memblock __next_mem_pfn_range(int *idx, int nid,
529phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) 667 unsigned long *out_start_pfn,
668 unsigned long *out_end_pfn, int *out_nid)
530{ 669{
531#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 670 struct memblock_type *type = &memblock.memory;
532 /* 671 struct memblock_region *r;
533 * This code originates from sparc which really wants use to walk by addresses
534 * and returns the nid. This is not very convenient for early_pfn_map[] users
535 * as the map isn't sorted yet, and it really wants to be walked by nid.
536 *
537 * For now, I implement the inefficient method below which walks the early
538 * map multiple times. Eventually we may want to use an ARCH config option
539 * to implement a completely different method for both case.
540 */
541 unsigned long start_pfn, end_pfn;
542 int i;
543 672
544 for (i = 0; i < MAX_NUMNODES; i++) { 673 while (++*idx < type->cnt) {
545 get_pfn_range_for_nid(i, &start_pfn, &end_pfn); 674 r = &type->regions[*idx];
546 if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) 675
676 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
547 continue; 677 continue;
548 *nid = i; 678 if (nid == MAX_NUMNODES || nid == r->nid)
549 return min(end, PFN_PHYS(end_pfn)); 679 break;
680 }
681 if (*idx >= type->cnt) {
682 *idx = -1;
683 return;
550 } 684 }
551#endif
552 *nid = 0;
553 685
554 return end; 686 if (out_start_pfn)
687 *out_start_pfn = PFN_UP(r->base);
688 if (out_end_pfn)
689 *out_end_pfn = PFN_DOWN(r->base + r->size);
690 if (out_nid)
691 *out_nid = r->nid;
555} 692}
556 693
557static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, 694/**
558 phys_addr_t size, 695 * memblock_set_node - set node ID on memblock regions
559 phys_addr_t align, int nid) 696 * @base: base of area to set node ID for
697 * @size: size of area to set node ID for
698 * @nid: node ID to set
699 *
700 * Set the nid of memblock memory regions in [@base,@base+@size) to @nid.
701 * Regions which cross the area boundaries are split as necessary.
702 *
703 * RETURNS:
704 * 0 on success, -errno on failure.
705 */
706int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
707 int nid)
560{ 708{
561 phys_addr_t start, end; 709 struct memblock_type *type = &memblock.memory;
710 int start_rgn, end_rgn;
711 int i, ret;
562 712
563 start = mp->base; 713 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
564 end = start + mp->size; 714 if (ret)
715 return ret;
565 716
566 start = memblock_align_up(start, align); 717 for (i = start_rgn; i < end_rgn; i++)
567 while (start < end) { 718 type->regions[i].nid = nid;
568 phys_addr_t this_end;
569 int this_nid;
570 719
571 this_end = memblock_nid_range(start, end, &this_nid); 720 memblock_merge_regions(type);
572 if (this_nid == nid) { 721 return 0;
573 phys_addr_t ret = memblock_find_region(start, this_end, size, align); 722}
574 if (ret != MEMBLOCK_ERROR && 723#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
575 !memblock_add_region(&memblock.reserved, ret, size)) 724
576 return ret; 725static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
577 } 726 phys_addr_t align, phys_addr_t max_addr,
578 start = this_end; 727 int nid)
579 } 728{
729 phys_addr_t found;
580 730
581 return MEMBLOCK_ERROR; 731 found = memblock_find_in_range_node(0, max_addr, size, align, nid);
732 if (found && !memblock_reserve(found, size))
733 return found;
734
735 return 0;
582} 736}
583 737
584phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) 738phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
585{ 739{
586 struct memblock_type *mem = &memblock.memory; 740 return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
587 int i; 741}
588 742
589 BUG_ON(0 == size); 743phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
744{
745 return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES);
746}
590 747
591 /* We align the size to limit fragmentation. Without this, a lot of 748phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
592 * small allocs quickly eat up the whole reserve array on sparc 749{
593 */ 750 phys_addr_t alloc;
594 size = memblock_align_up(size, align);
595 751
596 /* We do a bottom-up search for a region with the right 752 alloc = __memblock_alloc_base(size, align, max_addr);
597 * nid since that's easier considering how memblock_nid_range()
598 * works
599 */
600 for (i = 0; i < mem->cnt; i++) {
601 phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
602 size, align, nid);
603 if (ret != MEMBLOCK_ERROR)
604 return ret;
605 }
606 753
607 return 0; 754 if (alloc == 0)
755 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
756 (unsigned long long) size, (unsigned long long) max_addr);
757
758 return alloc;
759}
760
761phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
762{
763 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
608} 764}
609 765
610phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) 766phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
@@ -613,7 +769,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
613 769
614 if (res) 770 if (res)
615 return res; 771 return res;
616 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); 772 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
617} 773}
618 774
619 775
@@ -621,10 +777,9 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
621 * Remaining API functions 777 * Remaining API functions
622 */ 778 */
623 779
624/* You must call memblock_analyze() before this. */
625phys_addr_t __init memblock_phys_mem_size(void) 780phys_addr_t __init memblock_phys_mem_size(void)
626{ 781{
627 return memblock.memory_size; 782 return memblock.memory.total_size;
628} 783}
629 784
630/* lowest address */ 785/* lowest address */
@@ -640,45 +795,28 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
640 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); 795 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
641} 796}
642 797
643/* You must call memblock_analyze() after this. */ 798void __init memblock_enforce_memory_limit(phys_addr_t limit)
644void __init memblock_enforce_memory_limit(phys_addr_t memory_limit)
645{ 799{
646 unsigned long i; 800 unsigned long i;
647 phys_addr_t limit; 801 phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
648 struct memblock_region *p;
649 802
650 if (!memory_limit) 803 if (!limit)
651 return; 804 return;
652 805
653 /* Truncate the memblock regions to satisfy the memory limit. */ 806 /* find out max address */
654 limit = memory_limit;
655 for (i = 0; i < memblock.memory.cnt; i++) { 807 for (i = 0; i < memblock.memory.cnt; i++) {
656 if (limit > memblock.memory.regions[i].size) { 808 struct memblock_region *r = &memblock.memory.regions[i];
657 limit -= memblock.memory.regions[i].size;
658 continue;
659 }
660
661 memblock.memory.regions[i].size = limit;
662 memblock.memory.cnt = i + 1;
663 break;
664 }
665
666 memory_limit = memblock_end_of_DRAM();
667 809
668 /* And truncate any reserves above the limit also. */ 810 if (limit <= r->size) {
669 for (i = 0; i < memblock.reserved.cnt; i++) { 811 max_addr = r->base + limit;
670 p = &memblock.reserved.regions[i]; 812 break;
671
672 if (p->base > memory_limit)
673 p->size = 0;
674 else if ((p->base + p->size) > memory_limit)
675 p->size = memory_limit - p->base;
676
677 if (p->size == 0) {
678 memblock_remove_region(&memblock.reserved, i);
679 i--;
680 } 813 }
814 limit -= r->size;
681 } 815 }
816
817 /* truncate both memory and reserved regions */
818 __memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX);
819 __memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX);
682} 820}
683 821
684static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) 822static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
@@ -712,16 +850,18 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
712int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 850int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
713{ 851{
714 int idx = memblock_search(&memblock.memory, base); 852 int idx = memblock_search(&memblock.memory, base);
853 phys_addr_t end = base + memblock_cap_size(base, &size);
715 854
716 if (idx == -1) 855 if (idx == -1)
717 return 0; 856 return 0;
718 return memblock.memory.regions[idx].base <= base && 857 return memblock.memory.regions[idx].base <= base &&
719 (memblock.memory.regions[idx].base + 858 (memblock.memory.regions[idx].base +
720 memblock.memory.regions[idx].size) >= (base + size); 859 memblock.memory.regions[idx].size) >= end;
721} 860}
722 861
723int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 862int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
724{ 863{
864 memblock_cap_size(base, &size);
725 return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; 865 return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
726} 866}
727 867
@@ -731,86 +871,45 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit)
731 memblock.current_limit = limit; 871 memblock.current_limit = limit;
732} 872}
733 873
734static void __init_memblock memblock_dump(struct memblock_type *region, char *name) 874static void __init_memblock memblock_dump(struct memblock_type *type, char *name)
735{ 875{
736 unsigned long long base, size; 876 unsigned long long base, size;
737 int i; 877 int i;
738 878
739 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); 879 pr_info(" %s.cnt = 0x%lx\n", name, type->cnt);
740 880
741 for (i = 0; i < region->cnt; i++) { 881 for (i = 0; i < type->cnt; i++) {
742 base = region->regions[i].base; 882 struct memblock_region *rgn = &type->regions[i];
743 size = region->regions[i].size; 883 char nid_buf[32] = "";
744 884
745 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", 885 base = rgn->base;
746 name, i, base, base + size - 1, size); 886 size = rgn->size;
887#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
888 if (memblock_get_region_node(rgn) != MAX_NUMNODES)
889 snprintf(nid_buf, sizeof(nid_buf), " on node %d",
890 memblock_get_region_node(rgn));
891#endif
892 pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n",
893 name, i, base, base + size - 1, size, nid_buf);
747 } 894 }
748} 895}
749 896
750void __init_memblock memblock_dump_all(void) 897void __init_memblock __memblock_dump_all(void)
751{ 898{
752 if (!memblock_debug)
753 return;
754
755 pr_info("MEMBLOCK configuration:\n"); 899 pr_info("MEMBLOCK configuration:\n");
756 pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); 900 pr_info(" memory size = %#llx reserved size = %#llx\n",
901 (unsigned long long)memblock.memory.total_size,
902 (unsigned long long)memblock.reserved.total_size);
757 903
758 memblock_dump(&memblock.memory, "memory"); 904 memblock_dump(&memblock.memory, "memory");
759 memblock_dump(&memblock.reserved, "reserved"); 905 memblock_dump(&memblock.reserved, "reserved");
760} 906}
761 907
762void __init memblock_analyze(void) 908void __init memblock_allow_resize(void)
763{ 909{
764 int i;
765
766 /* Check marker in the unused last array entry */
767 WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base
768 != MEMBLOCK_INACTIVE);
769 WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base
770 != MEMBLOCK_INACTIVE);
771
772 memblock.memory_size = 0;
773
774 for (i = 0; i < memblock.memory.cnt; i++)
775 memblock.memory_size += memblock.memory.regions[i].size;
776
777 /* We allow resizing from there */
778 memblock_can_resize = 1; 910 memblock_can_resize = 1;
779} 911}
780 912
781void __init memblock_init(void)
782{
783 static int init_done __initdata = 0;
784
785 if (init_done)
786 return;
787 init_done = 1;
788
789 /* Hookup the initial arrays */
790 memblock.memory.regions = memblock_memory_init_regions;
791 memblock.memory.max = INIT_MEMBLOCK_REGIONS;
792 memblock.reserved.regions = memblock_reserved_init_regions;
793 memblock.reserved.max = INIT_MEMBLOCK_REGIONS;
794
795 /* Write a marker in the unused last array entry */
796 memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
797 memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE;
798
799 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
800 * This simplifies the memblock_add() code below...
801 */
802 memblock.memory.regions[0].base = 0;
803 memblock.memory.regions[0].size = 0;
804 memblock.memory.cnt = 1;
805
806 /* Ditto. */
807 memblock.reserved.regions[0].base = 0;
808 memblock.reserved.regions[0].size = 0;
809 memblock.reserved.cnt = 1;
810
811 memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
812}
813
814static int __init early_memblock(char *p) 913static int __init early_memblock(char *p)
815{ 914{
816 if (p && strstr(p, "debug")) 915 if (p && strstr(p, "debug"))
@@ -819,7 +918,7 @@ static int __init early_memblock(char *p)
819} 918}
820early_param("memblock", early_memblock); 919early_param("memblock", early_memblock);
821 920
822#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) 921#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)
823 922
824static int memblock_debug_show(struct seq_file *m, void *private) 923static int memblock_debug_show(struct seq_file *m, void *private)
825{ 924{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6aff93c98aca..b63f5f7dfa07 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4907,9 +4907,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4907 int cpu; 4907 int cpu;
4908 enable_swap_cgroup(); 4908 enable_swap_cgroup();
4909 parent = NULL; 4909 parent = NULL;
4910 root_mem_cgroup = memcg;
4911 if (mem_cgroup_soft_limit_tree_init()) 4910 if (mem_cgroup_soft_limit_tree_init())
4912 goto free_out; 4911 goto free_out;
4912 root_mem_cgroup = memcg;
4913 for_each_possible_cpu(cpu) { 4913 for_each_possible_cpu(cpu) {
4914 struct memcg_stock_pcp *stock = 4914 struct memcg_stock_pcp *stock =
4915 &per_cpu(memcg_stock, cpu); 4915 &per_cpu(memcg_stock, cpu);
@@ -4948,7 +4948,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4948 return &memcg->css; 4948 return &memcg->css;
4949free_out: 4949free_out:
4950 __mem_cgroup_free(memcg); 4950 __mem_cgroup_free(memcg);
4951 root_mem_cgroup = NULL;
4952 return ERR_PTR(error); 4951 return ERR_PTR(error);
4953} 4952}
4954 4953
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index adc395481813..c3fdbcb17658 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -636,6 +636,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
636 struct vm_area_struct *prev; 636 struct vm_area_struct *prev;
637 struct vm_area_struct *vma; 637 struct vm_area_struct *vma;
638 int err = 0; 638 int err = 0;
639 pgoff_t pgoff;
639 unsigned long vmstart; 640 unsigned long vmstart;
640 unsigned long vmend; 641 unsigned long vmend;
641 642
@@ -643,13 +644,21 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
643 if (!vma || vma->vm_start > start) 644 if (!vma || vma->vm_start > start)
644 return -EFAULT; 645 return -EFAULT;
645 646
647 if (start > vma->vm_start)
648 prev = vma;
649
646 for (; vma && vma->vm_start < end; prev = vma, vma = next) { 650 for (; vma && vma->vm_start < end; prev = vma, vma = next) {
647 next = vma->vm_next; 651 next = vma->vm_next;
648 vmstart = max(start, vma->vm_start); 652 vmstart = max(start, vma->vm_start);
649 vmend = min(end, vma->vm_end); 653 vmend = min(end, vma->vm_end);
650 654
655 if (mpol_equal(vma_policy(vma), new_pol))
656 continue;
657
658 pgoff = vma->vm_pgoff +
659 ((vmstart - vma->vm_start) >> PAGE_SHIFT);
651 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, 660 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
652 vma->anon_vma, vma->vm_file, vma->vm_pgoff, 661 vma->anon_vma, vma->vm_file, pgoff,
653 new_pol); 662 new_pol);
654 if (prev) { 663 if (prev) {
655 vma = prev; 664 vma = prev;
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7fa41b4a07bf..24f0fc1a56d6 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -41,14 +41,13 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
41 if (limit > memblock.current_limit) 41 if (limit > memblock.current_limit)
42 limit = memblock.current_limit; 42 limit = memblock.current_limit;
43 43
44 addr = find_memory_core_early(nid, size, align, goal, limit); 44 addr = memblock_find_in_range_node(goal, limit, size, align, nid);
45 45 if (!addr)
46 if (addr == MEMBLOCK_ERROR)
47 return NULL; 46 return NULL;
48 47
49 ptr = phys_to_virt(addr); 48 ptr = phys_to_virt(addr);
50 memset(ptr, 0, size); 49 memset(ptr, 0, size);
51 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); 50 memblock_reserve(addr, size);
52 /* 51 /*
53 * The min_count is set to 0 so that bootmem allocated blocks 52 * The min_count is set to 0 so that bootmem allocated blocks
54 * are never reported as leaks. 53 * are never reported as leaks.
@@ -107,23 +106,27 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
107 __free_pages_bootmem(pfn_to_page(i), 0); 106 __free_pages_bootmem(pfn_to_page(i), 0);
108} 107}
109 108
110unsigned long __init free_all_memory_core_early(int nodeid) 109unsigned long __init free_low_memory_core_early(int nodeid)
111{ 110{
112 int i;
113 u64 start, end;
114 unsigned long count = 0; 111 unsigned long count = 0;
115 struct range *range = NULL; 112 phys_addr_t start, end;
116 int nr_range; 113 u64 i;
117 114
118 nr_range = get_free_all_memory_range(&range, nodeid); 115 /* free reserved array temporarily so that it's treated as free area */
119 116 memblock_free_reserved_regions();
120 for (i = 0; i < nr_range; i++) { 117
121 start = range[i].start; 118 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
122 end = range[i].end; 119 unsigned long start_pfn = PFN_UP(start);
123 count += end - start; 120 unsigned long end_pfn = min_t(unsigned long,
124 __free_pages_memory(start, end); 121 PFN_DOWN(end), max_low_pfn);
122 if (start_pfn < end_pfn) {
123 __free_pages_memory(start_pfn, end_pfn);
124 count += end_pfn - start_pfn;
125 }
125 } 126 }
126 127
128 /* put region array back? */
129 memblock_reserve_reserved_regions();
127 return count; 130 return count;
128} 131}
129 132
@@ -137,7 +140,7 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
137{ 140{
138 register_page_bootmem_info_node(pgdat); 141 register_page_bootmem_info_node(pgdat);
139 142
140 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ 143 /* free_low_memory_core_early(MAX_NUMNODES) will be called later */
141 return 0; 144 return 0;
142} 145}
143 146
@@ -155,7 +158,7 @@ unsigned long __init free_all_bootmem(void)
155 * Use MAX_NUMNODES will make sure all ranges in early_node_map[] 158 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
156 * will be used instead of only Node0 related 159 * will be used instead of only Node0 related
157 */ 160 */
158 return free_all_memory_core_early(MAX_NUMNODES); 161 return free_low_memory_core_early(MAX_NUMNODES);
159} 162}
160 163
161/** 164/**
@@ -172,7 +175,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
172 unsigned long size) 175 unsigned long size)
173{ 176{
174 kmemleak_free_part(__va(physaddr), size); 177 kmemleak_free_part(__va(physaddr), size);
175 memblock_x86_free_range(physaddr, physaddr + size); 178 memblock_free(physaddr, size);
176} 179}
177 180
178/** 181/**
@@ -187,7 +190,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
187void __init free_bootmem(unsigned long addr, unsigned long size) 190void __init free_bootmem(unsigned long addr, unsigned long size)
188{ 191{
189 kmemleak_free_part(__va(addr), size); 192 kmemleak_free_part(__va(addr), size);
190 memblock_x86_free_range(addr, addr + size); 193 memblock_free(addr, size);
191} 194}
192 195
193static void * __init ___alloc_bootmem_nopanic(unsigned long size, 196static void * __init ___alloc_bootmem_nopanic(unsigned long size,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 76f2c5ae908e..069b64e521fc 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p,
176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, 176unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
177 const nodemask_t *nodemask, unsigned long totalpages) 177 const nodemask_t *nodemask, unsigned long totalpages)
178{ 178{
179 int points; 179 long points;
180 180
181 if (oom_unkillable_task(p, mem, nodemask)) 181 if (oom_unkillable_task(p, mem, nodemask))
182 return 0; 182 return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b8ba3aebf6e..bdc804c2d99c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -181,39 +181,17 @@ static unsigned long __meminitdata nr_kernel_pages;
181static unsigned long __meminitdata nr_all_pages; 181static unsigned long __meminitdata nr_all_pages;
182static unsigned long __meminitdata dma_reserve; 182static unsigned long __meminitdata dma_reserve;
183 183
184#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 184#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
185 /* 185static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
186 * MAX_ACTIVE_REGIONS determines the maximum number of distinct 186static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
187 * ranges of memory (RAM) that may be registered with add_active_range(). 187static unsigned long __initdata required_kernelcore;
188 * Ranges passed to add_active_range() will be merged if possible 188static unsigned long __initdata required_movablecore;
189 * so the number of times add_active_range() can be called is 189static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
190 * related to the number of nodes and the number of holes 190
191 */ 191/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
192 #ifdef CONFIG_MAX_ACTIVE_REGIONS 192int movable_zone;
193 /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */ 193EXPORT_SYMBOL(movable_zone);
194 #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS 194#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
195 #else
196 #if MAX_NUMNODES >= 32
197 /* If there can be many nodes, allow up to 50 holes per node */
198 #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
199 #else
200 /* By default, allow up to 256 distinct regions */
201 #define MAX_ACTIVE_REGIONS 256
202 #endif
203 #endif
204
205 static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
206 static int __meminitdata nr_nodemap_entries;
207 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
208 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
209 static unsigned long __initdata required_kernelcore;
210 static unsigned long __initdata required_movablecore;
211 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
212
213 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
214 int movable_zone;
215 EXPORT_SYMBOL(movable_zone);
216#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
217 195
218#if MAX_NUMNODES > 1 196#if MAX_NUMNODES > 1
219int nr_node_ids __read_mostly = MAX_NUMNODES; 197int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -706,10 +684,10 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
706 int loop; 684 int loop;
707 685
708 prefetchw(page); 686 prefetchw(page);
709 for (loop = 0; loop < BITS_PER_LONG; loop++) { 687 for (loop = 0; loop < (1 << order); loop++) {
710 struct page *p = &page[loop]; 688 struct page *p = &page[loop];
711 689
712 if (loop + 1 < BITS_PER_LONG) 690 if (loop + 1 < (1 << order))
713 prefetchw(p + 1); 691 prefetchw(p + 1);
714 __ClearPageReserved(p); 692 __ClearPageReserved(p);
715 set_page_count(p, 0); 693 set_page_count(p, 0);
@@ -3737,35 +3715,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
3737 return 0; 3715 return 0;
3738} 3716}
3739 3717
3740#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 3718#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
3741/*
3742 * Basic iterator support. Return the first range of PFNs for a node
3743 * Note: nid == MAX_NUMNODES returns first region regardless of node
3744 */
3745static int __meminit first_active_region_index_in_nid(int nid)
3746{
3747 int i;
3748
3749 for (i = 0; i < nr_nodemap_entries; i++)
3750 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3751 return i;
3752
3753 return -1;
3754}
3755
3756/*
3757 * Basic iterator support. Return the next active range of PFNs for a node
3758 * Note: nid == MAX_NUMNODES returns next region regardless of node
3759 */
3760static int __meminit next_active_region_index_in_nid(int index, int nid)
3761{
3762 for (index = index + 1; index < nr_nodemap_entries; index++)
3763 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3764 return index;
3765
3766 return -1;
3767}
3768
3769#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 3719#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
3770/* 3720/*
3771 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 3721 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3775,15 +3725,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
3775 */ 3725 */
3776int __meminit __early_pfn_to_nid(unsigned long pfn) 3726int __meminit __early_pfn_to_nid(unsigned long pfn)
3777{ 3727{
3778 int i; 3728 unsigned long start_pfn, end_pfn;
3779 3729 int i, nid;
3780 for (i = 0; i < nr_nodemap_entries; i++) {
3781 unsigned long start_pfn = early_node_map[i].start_pfn;
3782 unsigned long end_pfn = early_node_map[i].end_pfn;
3783 3730
3731 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
3784 if (start_pfn <= pfn && pfn < end_pfn) 3732 if (start_pfn <= pfn && pfn < end_pfn)
3785 return early_node_map[i].nid; 3733 return nid;
3786 }
3787 /* This is a memory hole */ 3734 /* This is a memory hole */
3788 return -1; 3735 return -1;
3789} 3736}
@@ -3812,11 +3759,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3812} 3759}
3813#endif 3760#endif
3814 3761
3815/* Basic iterator support to walk early_node_map[] */
3816#define for_each_active_range_index_in_nid(i, nid) \
3817 for (i = first_active_region_index_in_nid(nid); i != -1; \
3818 i = next_active_region_index_in_nid(i, nid))
3819
3820/** 3762/**
3821 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range 3763 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
3822 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. 3764 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3826,122 +3768,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
3826 * add_active_ranges() contain no holes and may be freed, this 3768 * add_active_ranges() contain no holes and may be freed, this
3827 * this function may be used instead of calling free_bootmem() manually. 3769 * this function may be used instead of calling free_bootmem() manually.
3828 */ 3770 */
3829void __init free_bootmem_with_active_regions(int nid, 3771void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3830 unsigned long max_low_pfn)
3831{
3832 int i;
3833
3834 for_each_active_range_index_in_nid(i, nid) {
3835 unsigned long size_pages = 0;
3836 unsigned long end_pfn = early_node_map[i].end_pfn;
3837
3838 if (early_node_map[i].start_pfn >= max_low_pfn)
3839 continue;
3840
3841 if (end_pfn > max_low_pfn)
3842 end_pfn = max_low_pfn;
3843
3844 size_pages = end_pfn - early_node_map[i].start_pfn;
3845 free_bootmem_node(NODE_DATA(early_node_map[i].nid),
3846 PFN_PHYS(early_node_map[i].start_pfn),
3847 size_pages << PAGE_SHIFT);
3848 }
3849}
3850
3851#ifdef CONFIG_HAVE_MEMBLOCK
3852/*
3853 * Basic iterator support. Return the last range of PFNs for a node
3854 * Note: nid == MAX_NUMNODES returns last region regardless of node
3855 */
3856static int __meminit last_active_region_index_in_nid(int nid)
3857{ 3772{
3858 int i; 3773 unsigned long start_pfn, end_pfn;
3859 3774 int i, this_nid;
3860 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3861 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3862 return i;
3863
3864 return -1;
3865}
3866
3867/*
3868 * Basic iterator support. Return the previous active range of PFNs for a node
3869 * Note: nid == MAX_NUMNODES returns next region regardless of node
3870 */
3871static int __meminit previous_active_region_index_in_nid(int index, int nid)
3872{
3873 for (index = index - 1; index >= 0; index--)
3874 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3875 return index;
3876
3877 return -1;
3878}
3879
3880#define for_each_active_range_index_in_nid_reverse(i, nid) \
3881 for (i = last_active_region_index_in_nid(nid); i != -1; \
3882 i = previous_active_region_index_in_nid(i, nid))
3883
3884u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3885 u64 goal, u64 limit)
3886{
3887 int i;
3888
3889 /* Need to go over early_node_map to find out good range for node */
3890 for_each_active_range_index_in_nid_reverse(i, nid) {
3891 u64 addr;
3892 u64 ei_start, ei_last;
3893 u64 final_start, final_end;
3894
3895 ei_last = early_node_map[i].end_pfn;
3896 ei_last <<= PAGE_SHIFT;
3897 ei_start = early_node_map[i].start_pfn;
3898 ei_start <<= PAGE_SHIFT;
3899
3900 final_start = max(ei_start, goal);
3901 final_end = min(ei_last, limit);
3902
3903 if (final_start >= final_end)
3904 continue;
3905
3906 addr = memblock_find_in_range(final_start, final_end, size, align);
3907 3775
3908 if (addr == MEMBLOCK_ERROR) 3776 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
3909 continue; 3777 start_pfn = min(start_pfn, max_low_pfn);
3778 end_pfn = min(end_pfn, max_low_pfn);
3910 3779
3911 return addr; 3780 if (start_pfn < end_pfn)
3781 free_bootmem_node(NODE_DATA(this_nid),
3782 PFN_PHYS(start_pfn),
3783 (end_pfn - start_pfn) << PAGE_SHIFT);
3912 } 3784 }
3913
3914 return MEMBLOCK_ERROR;
3915} 3785}
3916#endif
3917 3786
3918int __init add_from_early_node_map(struct range *range, int az, 3787int __init add_from_early_node_map(struct range *range, int az,
3919 int nr_range, int nid) 3788 int nr_range, int nid)
3920{ 3789{
3790 unsigned long start_pfn, end_pfn;
3921 int i; 3791 int i;
3922 u64 start, end;
3923 3792
3924 /* need to go over early_node_map to find out good range for node */ 3793 /* need to go over early_node_map to find out good range for node */
3925 for_each_active_range_index_in_nid(i, nid) { 3794 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3926 start = early_node_map[i].start_pfn; 3795 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3927 end = early_node_map[i].end_pfn;
3928 nr_range = add_range(range, az, nr_range, start, end);
3929 }
3930 return nr_range; 3796 return nr_range;
3931} 3797}
3932 3798
3933void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3934{
3935 int i;
3936 int ret;
3937
3938 for_each_active_range_index_in_nid(i, nid) {
3939 ret = work_fn(early_node_map[i].start_pfn,
3940 early_node_map[i].end_pfn, data);
3941 if (ret)
3942 break;
3943 }
3944}
3945/** 3799/**
3946 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3800 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3947 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3801 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3952,12 +3806,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3952 */ 3806 */
3953void __init sparse_memory_present_with_active_regions(int nid) 3807void __init sparse_memory_present_with_active_regions(int nid)
3954{ 3808{
3955 int i; 3809 unsigned long start_pfn, end_pfn;
3810 int i, this_nid;
3956 3811
3957 for_each_active_range_index_in_nid(i, nid) 3812 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
3958 memory_present(early_node_map[i].nid, 3813 memory_present(this_nid, start_pfn, end_pfn);
3959 early_node_map[i].start_pfn,
3960 early_node_map[i].end_pfn);
3961} 3814}
3962 3815
3963/** 3816/**
@@ -3974,13 +3827,15 @@ void __init sparse_memory_present_with_active_regions(int nid)
3974void __meminit get_pfn_range_for_nid(unsigned int nid, 3827void __meminit get_pfn_range_for_nid(unsigned int nid,
3975 unsigned long *start_pfn, unsigned long *end_pfn) 3828 unsigned long *start_pfn, unsigned long *end_pfn)
3976{ 3829{
3830 unsigned long this_start_pfn, this_end_pfn;
3977 int i; 3831 int i;
3832
3978 *start_pfn = -1UL; 3833 *start_pfn = -1UL;
3979 *end_pfn = 0; 3834 *end_pfn = 0;
3980 3835
3981 for_each_active_range_index_in_nid(i, nid) { 3836 for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
3982 *start_pfn = min(*start_pfn, early_node_map[i].start_pfn); 3837 *start_pfn = min(*start_pfn, this_start_pfn);
3983 *end_pfn = max(*end_pfn, early_node_map[i].end_pfn); 3838 *end_pfn = max(*end_pfn, this_end_pfn);
3984 } 3839 }
3985 3840
3986 if (*start_pfn == -1UL) 3841 if (*start_pfn == -1UL)
@@ -4083,46 +3938,16 @@ unsigned long __meminit __absent_pages_in_range(int nid,
4083 unsigned long range_start_pfn, 3938 unsigned long range_start_pfn,
4084 unsigned long range_end_pfn) 3939 unsigned long range_end_pfn)
4085{ 3940{
4086 int i = 0; 3941 unsigned long nr_absent = range_end_pfn - range_start_pfn;
4087 unsigned long prev_end_pfn = 0, hole_pages = 0; 3942 unsigned long start_pfn, end_pfn;
4088 unsigned long start_pfn; 3943 int i;
4089
4090 /* Find the end_pfn of the first active range of pfns in the node */
4091 i = first_active_region_index_in_nid(nid);
4092 if (i == -1)
4093 return 0;
4094
4095 prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4096
4097 /* Account for ranges before physical memory on this node */
4098 if (early_node_map[i].start_pfn > range_start_pfn)
4099 hole_pages = prev_end_pfn - range_start_pfn;
4100
4101 /* Find all holes for the zone within the node */
4102 for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
4103
4104 /* No need to continue if prev_end_pfn is outside the zone */
4105 if (prev_end_pfn >= range_end_pfn)
4106 break;
4107
4108 /* Make sure the end of the zone is not within the hole */
4109 start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
4110 prev_end_pfn = max(prev_end_pfn, range_start_pfn);
4111 3944
4112 /* Update the hole size cound and move on */ 3945 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4113 if (start_pfn > range_start_pfn) { 3946 start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
4114 BUG_ON(prev_end_pfn > start_pfn); 3947 end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
4115 hole_pages += start_pfn - prev_end_pfn; 3948 nr_absent -= end_pfn - start_pfn;
4116 }
4117 prev_end_pfn = early_node_map[i].end_pfn;
4118 } 3949 }
4119 3950 return nr_absent;
4120 /* Account for ranges past physical memory on this node */
4121 if (range_end_pfn > prev_end_pfn)
4122 hole_pages += range_end_pfn -
4123 max(range_start_pfn, prev_end_pfn);
4124
4125 return hole_pages;
4126} 3951}
4127 3952
4128/** 3953/**
@@ -4143,14 +3968,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4143 unsigned long zone_type, 3968 unsigned long zone_type,
4144 unsigned long *ignored) 3969 unsigned long *ignored)
4145{ 3970{
3971 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
3972 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
4146 unsigned long node_start_pfn, node_end_pfn; 3973 unsigned long node_start_pfn, node_end_pfn;
4147 unsigned long zone_start_pfn, zone_end_pfn; 3974 unsigned long zone_start_pfn, zone_end_pfn;
4148 3975
4149 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn); 3976 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4150 zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type], 3977 zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
4151 node_start_pfn); 3978 zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
4152 zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
4153 node_end_pfn);
4154 3979
4155 adjust_zone_range_for_zone_movable(nid, zone_type, 3980 adjust_zone_range_for_zone_movable(nid, zone_type,
4156 node_start_pfn, node_end_pfn, 3981 node_start_pfn, node_end_pfn,
@@ -4158,7 +3983,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4158 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 3983 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4159} 3984}
4160 3985
4161#else 3986#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4162static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 3987static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4163 unsigned long zone_type, 3988 unsigned long zone_type,
4164 unsigned long *zones_size) 3989 unsigned long *zones_size)
@@ -4176,7 +4001,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4176 return zholes_size[zone_type]; 4001 return zholes_size[zone_type];
4177} 4002}
4178 4003
4179#endif 4004#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4180 4005
4181static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4006static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4182 unsigned long *zones_size, unsigned long *zholes_size) 4007 unsigned long *zones_size, unsigned long *zholes_size)
@@ -4399,10 +4224,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4399 */ 4224 */
4400 if (pgdat == NODE_DATA(0)) { 4225 if (pgdat == NODE_DATA(0)) {
4401 mem_map = NODE_DATA(0)->node_mem_map; 4226 mem_map = NODE_DATA(0)->node_mem_map;
4402#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4227#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4403 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 4228 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
4404 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); 4229 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
4405#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4230#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4406 } 4231 }
4407#endif 4232#endif
4408#endif /* CONFIG_FLAT_NODE_MEM_MAP */ 4233#endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4427,7 +4252,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4427 free_area_init_core(pgdat, zones_size, zholes_size); 4252 free_area_init_core(pgdat, zones_size, zholes_size);
4428} 4253}
4429 4254
4430#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 4255#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4431 4256
4432#if MAX_NUMNODES > 1 4257#if MAX_NUMNODES > 1
4433/* 4258/*
@@ -4449,170 +4274,6 @@ static inline void setup_nr_node_ids(void)
4449#endif 4274#endif
4450 4275
4451/** 4276/**
4452 * add_active_range - Register a range of PFNs backed by physical memory
4453 * @nid: The node ID the range resides on
4454 * @start_pfn: The start PFN of the available physical memory
4455 * @end_pfn: The end PFN of the available physical memory
4456 *
4457 * These ranges are stored in an early_node_map[] and later used by
4458 * free_area_init_nodes() to calculate zone sizes and holes. If the
4459 * range spans a memory hole, it is up to the architecture to ensure
4460 * the memory is not freed by the bootmem allocator. If possible
4461 * the range being registered will be merged with existing ranges.
4462 */
4463void __init add_active_range(unsigned int nid, unsigned long start_pfn,
4464 unsigned long end_pfn)
4465{
4466 int i;
4467
4468 mminit_dprintk(MMINIT_TRACE, "memory_register",
4469 "Entering add_active_range(%d, %#lx, %#lx) "
4470 "%d entries of %d used\n",
4471 nid, start_pfn, end_pfn,
4472 nr_nodemap_entries, MAX_ACTIVE_REGIONS);
4473
4474 mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
4475
4476 /* Merge with existing active regions if possible */
4477 for (i = 0; i < nr_nodemap_entries; i++) {
4478 if (early_node_map[i].nid != nid)
4479 continue;
4480
4481 /* Skip if an existing region covers this new one */
4482 if (start_pfn >= early_node_map[i].start_pfn &&
4483 end_pfn <= early_node_map[i].end_pfn)
4484 return;
4485
4486 /* Merge forward if suitable */
4487 if (start_pfn <= early_node_map[i].end_pfn &&
4488 end_pfn > early_node_map[i].end_pfn) {
4489 early_node_map[i].end_pfn = end_pfn;
4490 return;
4491 }
4492
4493 /* Merge backward if suitable */
4494 if (start_pfn < early_node_map[i].start_pfn &&
4495 end_pfn >= early_node_map[i].start_pfn) {
4496 early_node_map[i].start_pfn = start_pfn;
4497 return;
4498 }
4499 }
4500
4501 /* Check that early_node_map is large enough */
4502 if (i >= MAX_ACTIVE_REGIONS) {
4503 printk(KERN_CRIT "More than %d memory regions, truncating\n",
4504 MAX_ACTIVE_REGIONS);
4505 return;
4506 }
4507
4508 early_node_map[i].nid = nid;
4509 early_node_map[i].start_pfn = start_pfn;
4510 early_node_map[i].end_pfn = end_pfn;
4511 nr_nodemap_entries = i + 1;
4512}
4513
4514/**
4515 * remove_active_range - Shrink an existing registered range of PFNs
4516 * @nid: The node id the range is on that should be shrunk
4517 * @start_pfn: The new PFN of the range
4518 * @end_pfn: The new PFN of the range
4519 *
4520 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
4521 * The map is kept near the end physical page range that has already been
4522 * registered. This function allows an arch to shrink an existing registered
4523 * range.
4524 */
4525void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
4526 unsigned long end_pfn)
4527{
4528 int i, j;
4529 int removed = 0;
4530
4531 printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
4532 nid, start_pfn, end_pfn);
4533
4534 /* Find the old active region end and shrink */
4535 for_each_active_range_index_in_nid(i, nid) {
4536 if (early_node_map[i].start_pfn >= start_pfn &&
4537 early_node_map[i].end_pfn <= end_pfn) {
4538 /* clear it */
4539 early_node_map[i].start_pfn = 0;
4540 early_node_map[i].end_pfn = 0;
4541 removed = 1;
4542 continue;
4543 }
4544 if (early_node_map[i].start_pfn < start_pfn &&
4545 early_node_map[i].end_pfn > start_pfn) {
4546 unsigned long temp_end_pfn = early_node_map[i].end_pfn;
4547 early_node_map[i].end_pfn = start_pfn;
4548 if (temp_end_pfn > end_pfn)
4549 add_active_range(nid, end_pfn, temp_end_pfn);
4550 continue;
4551 }
4552 if (early_node_map[i].start_pfn >= start_pfn &&
4553 early_node_map[i].end_pfn > end_pfn &&
4554 early_node_map[i].start_pfn < end_pfn) {
4555 early_node_map[i].start_pfn = end_pfn;
4556 continue;
4557 }
4558 }
4559
4560 if (!removed)
4561 return;
4562
4563 /* remove the blank ones */
4564 for (i = nr_nodemap_entries - 1; i > 0; i--) {
4565 if (early_node_map[i].nid != nid)
4566 continue;
4567 if (early_node_map[i].end_pfn)
4568 continue;
4569 /* we found it, get rid of it */
4570 for (j = i; j < nr_nodemap_entries - 1; j++)
4571 memcpy(&early_node_map[j], &early_node_map[j+1],
4572 sizeof(early_node_map[j]));
4573 j = nr_nodemap_entries - 1;
4574 memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
4575 nr_nodemap_entries--;
4576 }
4577}
4578
4579/**
4580 * remove_all_active_ranges - Remove all currently registered regions
4581 *
4582 * During discovery, it may be found that a table like SRAT is invalid
4583 * and an alternative discovery method must be used. This function removes
4584 * all currently registered regions.
4585 */
4586void __init remove_all_active_ranges(void)
4587{
4588 memset(early_node_map, 0, sizeof(early_node_map));
4589 nr_nodemap_entries = 0;
4590}
4591
4592/* Compare two active node_active_regions */
4593static int __init cmp_node_active_region(const void *a, const void *b)
4594{
4595 struct node_active_region *arange = (struct node_active_region *)a;
4596 struct node_active_region *brange = (struct node_active_region *)b;
4597
4598 /* Done this way to avoid overflows */
4599 if (arange->start_pfn > brange->start_pfn)
4600 return 1;
4601 if (arange->start_pfn < brange->start_pfn)
4602 return -1;
4603
4604 return 0;
4605}
4606
4607/* sort the node_map by start_pfn */
4608void __init sort_node_map(void)
4609{
4610 sort(early_node_map, (size_t)nr_nodemap_entries,
4611 sizeof(struct node_active_region),
4612 cmp_node_active_region, NULL);
4613}
4614
4615/**
4616 * node_map_pfn_alignment - determine the maximum internode alignment 4277 * node_map_pfn_alignment - determine the maximum internode alignment
4617 * 4278 *
4618 * This function should be called after node map is populated and sorted. 4279 * This function should be called after node map is populated and sorted.
@@ -4634,15 +4295,11 @@ void __init sort_node_map(void)
4634unsigned long __init node_map_pfn_alignment(void) 4295unsigned long __init node_map_pfn_alignment(void)
4635{ 4296{
4636 unsigned long accl_mask = 0, last_end = 0; 4297 unsigned long accl_mask = 0, last_end = 0;
4298 unsigned long start, end, mask;
4637 int last_nid = -1; 4299 int last_nid = -1;
4638 int i; 4300 int i, nid;
4639
4640 for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
4641 int nid = early_node_map[i].nid;
4642 unsigned long start = early_node_map[i].start_pfn;
4643 unsigned long end = early_node_map[i].end_pfn;
4644 unsigned long mask;
4645 4301
4302 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
4646 if (!start || last_nid < 0 || last_nid == nid) { 4303 if (!start || last_nid < 0 || last_nid == nid) {
4647 last_nid = nid; 4304 last_nid = nid;
4648 last_end = end; 4305 last_end = end;
@@ -4669,12 +4326,12 @@ unsigned long __init node_map_pfn_alignment(void)
4669/* Find the lowest pfn for a node */ 4326/* Find the lowest pfn for a node */
4670static unsigned long __init find_min_pfn_for_node(int nid) 4327static unsigned long __init find_min_pfn_for_node(int nid)
4671{ 4328{
4672 int i;
4673 unsigned long min_pfn = ULONG_MAX; 4329 unsigned long min_pfn = ULONG_MAX;
4330 unsigned long start_pfn;
4331 int i;
4674 4332
4675 /* Assuming a sorted map, the first range found has the starting pfn */ 4333 for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
4676 for_each_active_range_index_in_nid(i, nid) 4334 min_pfn = min(min_pfn, start_pfn);
4677 min_pfn = min(min_pfn, early_node_map[i].start_pfn);
4678 4335
4679 if (min_pfn == ULONG_MAX) { 4336 if (min_pfn == ULONG_MAX) {
4680 printk(KERN_WARNING 4337 printk(KERN_WARNING
@@ -4703,15 +4360,16 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4703 */ 4360 */
4704static unsigned long __init early_calculate_totalpages(void) 4361static unsigned long __init early_calculate_totalpages(void)
4705{ 4362{
4706 int i;
4707 unsigned long totalpages = 0; 4363 unsigned long totalpages = 0;
4364 unsigned long start_pfn, end_pfn;
4365 int i, nid;
4366
4367 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4368 unsigned long pages = end_pfn - start_pfn;
4708 4369
4709 for (i = 0; i < nr_nodemap_entries; i++) {
4710 unsigned long pages = early_node_map[i].end_pfn -
4711 early_node_map[i].start_pfn;
4712 totalpages += pages; 4370 totalpages += pages;
4713 if (pages) 4371 if (pages)
4714 node_set_state(early_node_map[i].nid, N_HIGH_MEMORY); 4372 node_set_state(nid, N_HIGH_MEMORY);
4715 } 4373 }
4716 return totalpages; 4374 return totalpages;
4717} 4375}
@@ -4766,6 +4424,8 @@ restart:
4766 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4424 /* Spread kernelcore memory as evenly as possible throughout nodes */
4767 kernelcore_node = required_kernelcore / usable_nodes; 4425 kernelcore_node = required_kernelcore / usable_nodes;
4768 for_each_node_state(nid, N_HIGH_MEMORY) { 4426 for_each_node_state(nid, N_HIGH_MEMORY) {
4427 unsigned long start_pfn, end_pfn;
4428
4769 /* 4429 /*
4770 * Recalculate kernelcore_node if the division per node 4430 * Recalculate kernelcore_node if the division per node
4771 * now exceeds what is necessary to satisfy the requested 4431 * now exceeds what is necessary to satisfy the requested
@@ -4782,13 +4442,10 @@ restart:
4782 kernelcore_remaining = kernelcore_node; 4442 kernelcore_remaining = kernelcore_node;
4783 4443
4784 /* Go through each range of PFNs within this node */ 4444 /* Go through each range of PFNs within this node */
4785 for_each_active_range_index_in_nid(i, nid) { 4445 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4786 unsigned long start_pfn, end_pfn;
4787 unsigned long size_pages; 4446 unsigned long size_pages;
4788 4447
4789 start_pfn = max(early_node_map[i].start_pfn, 4448 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4790 zone_movable_pfn[nid]);
4791 end_pfn = early_node_map[i].end_pfn;
4792 if (start_pfn >= end_pfn) 4449 if (start_pfn >= end_pfn)
4793 continue; 4450 continue;
4794 4451
@@ -4890,11 +4547,8 @@ static void check_for_regular_memory(pg_data_t *pgdat)
4890 */ 4547 */
4891void __init free_area_init_nodes(unsigned long *max_zone_pfn) 4548void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4892{ 4549{
4893 unsigned long nid; 4550 unsigned long start_pfn, end_pfn;
4894 int i; 4551 int i, nid;
4895
4896 /* Sort early_node_map as initialisation assumes it is sorted */
4897 sort_node_map();
4898 4552
4899 /* Record where the zone boundaries are */ 4553 /* Record where the zone boundaries are */
4900 memset(arch_zone_lowest_possible_pfn, 0, 4554 memset(arch_zone_lowest_possible_pfn, 0,
@@ -4941,11 +4595,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4941 } 4595 }
4942 4596
4943 /* Print out the early_node_map[] */ 4597 /* Print out the early_node_map[] */
4944 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); 4598 printk("Early memory PFN ranges\n");
4945 for (i = 0; i < nr_nodemap_entries; i++) 4599 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4946 printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid, 4600 printk(" %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
4947 early_node_map[i].start_pfn,
4948 early_node_map[i].end_pfn);
4949 4601
4950 /* Initialise every node */ 4602 /* Initialise every node */
4951 mminit_verify_pageflags_layout(); 4603 mminit_verify_pageflags_layout();
@@ -4998,7 +4650,7 @@ static int __init cmdline_parse_movablecore(char *p)
4998early_param("kernelcore", cmdline_parse_kernelcore); 4650early_param("kernelcore", cmdline_parse_kernelcore);
4999early_param("movablecore", cmdline_parse_movablecore); 4651early_param("movablecore", cmdline_parse_movablecore);
5000 4652
5001#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 4653#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
5002 4654
5003/** 4655/**
5004 * set_dma_reserve - set the specified number of pages reserved in the first zone 4656 * set_dma_reserve - set the specified number of pages reserved in the first zone
diff --git a/mm/percpu.c b/mm/percpu.c
index 3bb810a72006..716eb4acf2fc 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1023,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
1023 if (!is_vmalloc_addr(addr)) 1023 if (!is_vmalloc_addr(addr))
1024 return __pa(addr); 1024 return __pa(addr);
1025 else 1025 else
1026 return page_to_phys(vmalloc_to_page(addr)); 1026 return page_to_phys(vmalloc_to_page(addr)) +
1027 offset_in_page(addr);
1027 } else 1028 } else
1028 return page_to_phys(pcpu_addr_to_page(addr)); 1029 return page_to_phys(pcpu_addr_to_page(addr)) +
1030 offset_in_page(addr);
1029} 1031}
1030 1032
1031/** 1033/**
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1d8b32f07139..27be2f0d4cb7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1290,7 +1290,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
1290 unsigned long align, unsigned long flags, unsigned long start, 1290 unsigned long align, unsigned long flags, unsigned long start,
1291 unsigned long end, int node, gfp_t gfp_mask, void *caller) 1291 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1292{ 1292{
1293 static struct vmap_area *va; 1293 struct vmap_area *va;
1294 struct vm_struct *area; 1294 struct vm_struct *area;
1295 1295
1296 BUG_ON(in_interrupt()); 1296 BUG_ON(in_interrupt());
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index c7aafc7c5ed4..5f09a578d49d 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -245,9 +245,11 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
245 if (tt_global_entry) { 245 if (tt_global_entry) {
246 /* This node is probably going to update its tt table */ 246 /* This node is probably going to update its tt table */
247 tt_global_entry->orig_node->tt_poss_change = true; 247 tt_global_entry->orig_node->tt_poss_change = true;
248 /* The global entry has to be marked as PENDING and has to be 248 /* The global entry has to be marked as ROAMING and has to be
249 * kept for consistency purpose */ 249 * kept for consistency purpose */
250 tt_global_entry->flags |= TT_CLIENT_PENDING; 250 tt_global_entry->flags |= TT_CLIENT_ROAM;
251 tt_global_entry->roam_at = jiffies;
252
251 send_roam_adv(bat_priv, tt_global_entry->addr, 253 send_roam_adv(bat_priv, tt_global_entry->addr,
252 tt_global_entry->orig_node); 254 tt_global_entry->orig_node);
253 } 255 }
@@ -694,6 +696,7 @@ void tt_global_del(struct bat_priv *bat_priv,
694 const char *message, bool roaming) 696 const char *message, bool roaming)
695{ 697{
696 struct tt_global_entry *tt_global_entry = NULL; 698 struct tt_global_entry *tt_global_entry = NULL;
699 struct tt_local_entry *tt_local_entry = NULL;
697 700
698 tt_global_entry = tt_global_hash_find(bat_priv, addr); 701 tt_global_entry = tt_global_hash_find(bat_priv, addr);
699 if (!tt_global_entry) 702 if (!tt_global_entry)
@@ -701,15 +704,29 @@ void tt_global_del(struct bat_priv *bat_priv,
701 704
702 if (tt_global_entry->orig_node == orig_node) { 705 if (tt_global_entry->orig_node == orig_node) {
703 if (roaming) { 706 if (roaming) {
704 tt_global_entry->flags |= TT_CLIENT_ROAM; 707 /* if we are deleting a global entry due to a roam
705 tt_global_entry->roam_at = jiffies; 708 * event, there are two possibilities:
706 goto out; 709 * 1) the client roamed from node A to node B => we mark
710 * it with TT_CLIENT_ROAM, we start a timer and we
711 * wait for node B to claim it. In case of timeout
712 * the entry is purged.
713 * 2) the client roamed to us => we can directly delete
714 * the global entry, since it is useless now. */
715 tt_local_entry = tt_local_hash_find(bat_priv,
716 tt_global_entry->addr);
717 if (!tt_local_entry) {
718 tt_global_entry->flags |= TT_CLIENT_ROAM;
719 tt_global_entry->roam_at = jiffies;
720 goto out;
721 }
707 } 722 }
708 _tt_global_del(bat_priv, tt_global_entry, message); 723 _tt_global_del(bat_priv, tt_global_entry, message);
709 } 724 }
710out: 725out:
711 if (tt_global_entry) 726 if (tt_global_entry)
712 tt_global_entry_free_ref(tt_global_entry); 727 tt_global_entry_free_ref(tt_global_entry);
728 if (tt_local_entry)
729 tt_local_entry_free_ref(tt_local_entry);
713} 730}
714 731
715void tt_global_del_orig(struct bat_priv *bat_priv, 732void tt_global_del_orig(struct bat_priv *bat_priv,
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 91bcd3a961ec..1eea8208b2cc 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -79,17 +79,12 @@ static struct bnep_session *__bnep_get_session(u8 *dst)
79 79
80static void __bnep_link_session(struct bnep_session *s) 80static void __bnep_link_session(struct bnep_session *s)
81{ 81{
82 /* It's safe to call __module_get() here because sessions are added
83 by the socket layer which has to hold the reference to this module.
84 */
85 __module_get(THIS_MODULE);
86 list_add(&s->list, &bnep_session_list); 82 list_add(&s->list, &bnep_session_list);
87} 83}
88 84
89static void __bnep_unlink_session(struct bnep_session *s) 85static void __bnep_unlink_session(struct bnep_session *s)
90{ 86{
91 list_del(&s->list); 87 list_del(&s->list);
92 module_put(THIS_MODULE);
93} 88}
94 89
95static int bnep_send(struct bnep_session *s, void *data, size_t len) 90static int bnep_send(struct bnep_session *s, void *data, size_t len)
@@ -530,6 +525,7 @@ static int bnep_session(void *arg)
530 525
531 up_write(&bnep_session_sem); 526 up_write(&bnep_session_sem);
532 free_netdev(dev); 527 free_netdev(dev);
528 module_put_and_exit(0);
533 return 0; 529 return 0;
534} 530}
535 531
@@ -616,9 +612,11 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
616 612
617 __bnep_link_session(s); 613 __bnep_link_session(s);
618 614
615 __module_get(THIS_MODULE);
619 s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name); 616 s->task = kthread_run(bnep_session, s, "kbnepd %s", dev->name);
620 if (IS_ERR(s->task)) { 617 if (IS_ERR(s->task)) {
621 /* Session thread start failed, gotta cleanup. */ 618 /* Session thread start failed, gotta cleanup. */
619 module_put(THIS_MODULE);
622 unregister_netdev(dev); 620 unregister_netdev(dev);
623 __bnep_unlink_session(s); 621 __bnep_unlink_session(s);
624 err = PTR_ERR(s->task); 622 err = PTR_ERR(s->task);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 7d00ddf9e9dc..5a6e634f7fca 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -67,14 +67,12 @@ static struct cmtp_session *__cmtp_get_session(bdaddr_t *bdaddr)
67 67
68static void __cmtp_link_session(struct cmtp_session *session) 68static void __cmtp_link_session(struct cmtp_session *session)
69{ 69{
70 __module_get(THIS_MODULE);
71 list_add(&session->list, &cmtp_session_list); 70 list_add(&session->list, &cmtp_session_list);
72} 71}
73 72
74static void __cmtp_unlink_session(struct cmtp_session *session) 73static void __cmtp_unlink_session(struct cmtp_session *session)
75{ 74{
76 list_del(&session->list); 75 list_del(&session->list);
77 module_put(THIS_MODULE);
78} 76}
79 77
80static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci) 78static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci)
@@ -327,6 +325,7 @@ static int cmtp_session(void *arg)
327 up_write(&cmtp_session_sem); 325 up_write(&cmtp_session_sem);
328 326
329 kfree(session); 327 kfree(session);
328 module_put_and_exit(0);
330 return 0; 329 return 0;
331} 330}
332 331
@@ -376,9 +375,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
376 375
377 __cmtp_link_session(session); 376 __cmtp_link_session(session);
378 377
378 __module_get(THIS_MODULE);
379 session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d", 379 session->task = kthread_run(cmtp_session, session, "kcmtpd_ctr_%d",
380 session->num); 380 session->num);
381 if (IS_ERR(session->task)) { 381 if (IS_ERR(session->task)) {
382 module_put(THIS_MODULE);
382 err = PTR_ERR(session->task); 383 err = PTR_ERR(session->task);
383 goto unlink; 384 goto unlink;
384 } 385 }
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e0af7237cd92..c1c597e3e198 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -673,7 +673,7 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
673 goto encrypt; 673 goto encrypt;
674 674
675auth: 675auth:
676 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) 676 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))
677 return 0; 677 return 0;
678 678
679 if (!hci_conn_auth(conn, sec_level, auth_type)) 679 if (!hci_conn_auth(conn, sec_level, auth_type))
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index be84ae33ae36..b84458dcc226 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -613,7 +613,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
613 if (!test_bit(HCI_RAW, &hdev->flags)) { 613 if (!test_bit(HCI_RAW, &hdev->flags)) {
614 set_bit(HCI_INIT, &hdev->flags); 614 set_bit(HCI_INIT, &hdev->flags);
615 __hci_request(hdev, hci_reset_req, 0, 615 __hci_request(hdev, hci_reset_req, 0,
616 msecs_to_jiffies(HCI_INIT_TIMEOUT)); 616 msecs_to_jiffies(250));
617 clear_bit(HCI_INIT, &hdev->flags); 617 clear_bit(HCI_INIT, &hdev->flags);
618 } 618 }
619 619
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d7d96b6b1f0d..643a41b76e2e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -545,7 +545,7 @@ static void hci_setup(struct hci_dev *hdev)
545{ 545{
546 hci_setup_event_mask(hdev); 546 hci_setup_event_mask(hdev);
547 547
548 if (hdev->lmp_ver > 1) 548 if (hdev->hci_ver > 1)
549 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); 549 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
550 550
551 if (hdev->features[6] & LMP_SIMPLE_PAIR) { 551 if (hdev->features[6] & LMP_SIMPLE_PAIR) {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 5ea94a1eecf2..17b5b1cd9657 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2152,7 +2152,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi
2152 void *ptr = req->data; 2152 void *ptr = req->data;
2153 int type, olen; 2153 int type, olen;
2154 unsigned long val; 2154 unsigned long val;
2155 struct l2cap_conf_rfc rfc; 2155 struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
2156 2156
2157 BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data); 2157 BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data);
2158 2158
@@ -2271,6 +2271,16 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len)
2271 } 2271 }
2272 } 2272 }
2273 2273
2274 /* Use sane default values in case a misbehaving remote device
2275 * did not send an RFC option.
2276 */
2277 rfc.mode = chan->mode;
2278 rfc.retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO);
2279 rfc.monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO);
2280 rfc.max_pdu_size = cpu_to_le16(chan->imtu);
2281
2282 BT_ERR("Expected RFC option was not found, using defaults");
2283
2274done: 2284done:
2275 switch (rfc.mode) { 2285 switch (rfc.mode) {
2276 case L2CAP_MODE_ERTM: 2286 case L2CAP_MODE_ERTM:
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4e32e18211f9..2d28dfe98389 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1146,6 +1146,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1146 if (list_empty(&s->dlcs)) { 1146 if (list_empty(&s->dlcs)) {
1147 s->state = BT_DISCONN; 1147 s->state = BT_DISCONN;
1148 rfcomm_send_disc(s, 0); 1148 rfcomm_send_disc(s, 0);
1149 rfcomm_session_clear_timer(s);
1149 } 1150 }
1150 1151
1151 break; 1152 break;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d6ec3720c77e..fa8b8f763580 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -114,12 +114,18 @@ static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const vo
114 return NULL; 114 return NULL;
115} 115}
116 116
117static unsigned int fake_mtu(const struct dst_entry *dst)
118{
119 return dst->dev->mtu;
120}
121
117static struct dst_ops fake_dst_ops = { 122static struct dst_ops fake_dst_ops = {
118 .family = AF_INET, 123 .family = AF_INET,
119 .protocol = cpu_to_be16(ETH_P_IP), 124 .protocol = cpu_to_be16(ETH_P_IP),
120 .update_pmtu = fake_update_pmtu, 125 .update_pmtu = fake_update_pmtu,
121 .cow_metrics = fake_cow_metrics, 126 .cow_metrics = fake_cow_metrics,
122 .neigh_lookup = fake_neigh_lookup, 127 .neigh_lookup = fake_neigh_lookup,
128 .mtu = fake_mtu,
123}; 129};
124 130
125/* 131/*
@@ -141,7 +147,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
141 rt->dst.dev = br->dev; 147 rt->dst.dev = br->dev;
142 rt->dst.path = &rt->dst; 148 rt->dst.path = &rt->dst;
143 dst_init_metrics(&rt->dst, br_dst_default_metrics, true); 149 dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
144 rt->dst.flags = DST_NOXFRM; 150 rt->dst.flags = DST_NOXFRM | DST_NOPEER;
145 rt->dst.ops = &fake_dst_ops; 151 rt->dst.ops = &fake_dst_ops;
146} 152}
147 153
diff --git a/net/core/flow.c b/net/core/flow.c
index 8ae42de9c79e..e318c7e98042 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -358,6 +358,18 @@ void flow_cache_flush(void)
358 put_online_cpus(); 358 put_online_cpus();
359} 359}
360 360
361static void flow_cache_flush_task(struct work_struct *work)
362{
363 flow_cache_flush();
364}
365
366static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
367
368void flow_cache_flush_deferred(void)
369{
370 schedule_work(&flow_cache_flush_work);
371}
372
361static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) 373static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
362{ 374{
363 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 375 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c71c434a4c05..385aefe53648 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -665,11 +665,14 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
665 if (count) { 665 if (count) {
666 int i; 666 int i;
667 667
668 if (count > 1<<30) { 668 if (count > INT_MAX)
669 return -EINVAL;
670 count = roundup_pow_of_two(count);
671 if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table))
672 / sizeof(struct rps_dev_flow)) {
669 /* Enforce a limit to prevent overflow */ 673 /* Enforce a limit to prevent overflow */
670 return -EINVAL; 674 return -EINVAL;
671 } 675 }
672 count = roundup_pow_of_two(count);
673 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); 676 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
674 if (!table) 677 if (!table)
675 return -ENOMEM; 678 return -ENOMEM;
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ed7b1d12f5e..b23f174ab84c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -288,11 +288,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
288 unsigned long flags; 288 unsigned long flags;
289 struct sk_buff_head *list = &sk->sk_receive_queue; 289 struct sk_buff_head *list = &sk->sk_receive_queue;
290 290
291 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces 291 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
292 number of warnings when compiling with -W --ANK
293 */
294 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
295 (unsigned)sk->sk_rcvbuf) {
296 atomic_inc(&sk->sk_drops); 292 atomic_inc(&sk->sk_drops);
297 trace_sock_rcvqueue_full(sk, skb); 293 trace_sock_rcvqueue_full(sk, skb);
298 return -ENOMEM; 294 return -ENOMEM;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0da2afc97f32..99ec116bef14 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -253,6 +253,10 @@ static int __init ic_open_devs(void)
253 } 253 }
254 } 254 }
255 255
256 /* no point in waiting if we could not bring up at least one device */
257 if (!ic_first_dev)
258 goto have_carrier;
259
256 /* wait for a carrier on at least one device */ 260 /* wait for a carrier on at least one device */
257 start = jiffies; 261 start = jiffies;
258 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { 262 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 065effd8349a..0b2e7329abda 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
285 if (register_netdevice(dev) < 0) 285 if (register_netdevice(dev) < 0)
286 goto failed_free; 286 goto failed_free;
287 287
288 strcpy(nt->parms.name, dev->name);
289
288 dev_hold(dev); 290 dev_hold(dev);
289 ipip_tunnel_link(ipn, nt); 291 ipip_tunnel_link(ipn, nt);
290 return nt; 292 return nt;
@@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev)
759 struct ip_tunnel *tunnel = netdev_priv(dev); 761 struct ip_tunnel *tunnel = netdev_priv(dev);
760 762
761 tunnel->dev = dev; 763 tunnel->dev = dev;
762 strcpy(tunnel->parms.name, dev->name);
763 764
764 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 765 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
765 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 766 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
825static int __net_init ipip_init_net(struct net *net) 826static int __net_init ipip_init_net(struct net *net)
826{ 827{
827 struct ipip_net *ipn = net_generic(net, ipip_net_id); 828 struct ipip_net *ipn = net_generic(net, ipip_net_id);
829 struct ip_tunnel *t;
828 int err; 830 int err;
829 831
830 ipn->tunnels[0] = ipn->tunnels_wc; 832 ipn->tunnels[0] = ipn->tunnels_wc;
@@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net)
848 if ((err = register_netdev(ipn->fb_tunnel_dev))) 850 if ((err = register_netdev(ipn->fb_tunnel_dev)))
849 goto err_reg_dev; 851 goto err_reg_dev;
850 852
853 t = netdev_priv(ipn->fb_tunnel_dev);
854
855 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
851 return 0; 856 return 0;
852 857
853err_reg_dev: 858err_reg_dev:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 46af62363b8c..94cdbc55ca7e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -91,6 +91,7 @@
91#include <linux/rcupdate.h> 91#include <linux/rcupdate.h>
92#include <linux/times.h> 92#include <linux/times.h>
93#include <linux/slab.h> 93#include <linux/slab.h>
94#include <linux/prefetch.h>
94#include <net/dst.h> 95#include <net/dst.h>
95#include <net/net_namespace.h> 96#include <net/net_namespace.h>
96#include <net/protocol.h> 97#include <net/protocol.h>
@@ -120,6 +121,7 @@
120 121
121static int ip_rt_max_size; 122static int ip_rt_max_size;
122static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 123static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
124static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 125static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number __read_mostly = 9; 126static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50; 127static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -133,6 +135,9 @@ static int ip_rt_min_advmss __read_mostly = 256;
133static int rt_chain_length_max __read_mostly = 20; 135static int rt_chain_length_max __read_mostly = 20;
134static int redirect_genid; 136static int redirect_genid;
135 137
138static struct delayed_work expires_work;
139static unsigned long expires_ljiffies;
140
136/* 141/*
137 * Interface to generic destination cache. 142 * Interface to generic destination cache.
138 */ 143 */
@@ -830,6 +835,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
830 return ONE; 835 return ONE;
831} 836}
832 837
838static void rt_check_expire(void)
839{
840 static unsigned int rover;
841 unsigned int i = rover, goal;
842 struct rtable *rth;
843 struct rtable __rcu **rthp;
844 unsigned long samples = 0;
845 unsigned long sum = 0, sum2 = 0;
846 unsigned long delta;
847 u64 mult;
848
849 delta = jiffies - expires_ljiffies;
850 expires_ljiffies = jiffies;
851 mult = ((u64)delta) << rt_hash_log;
852 if (ip_rt_gc_timeout > 1)
853 do_div(mult, ip_rt_gc_timeout);
854 goal = (unsigned int)mult;
855 if (goal > rt_hash_mask)
856 goal = rt_hash_mask + 1;
857 for (; goal > 0; goal--) {
858 unsigned long tmo = ip_rt_gc_timeout;
859 unsigned long length;
860
861 i = (i + 1) & rt_hash_mask;
862 rthp = &rt_hash_table[i].chain;
863
864 if (need_resched())
865 cond_resched();
866
867 samples++;
868
869 if (rcu_dereference_raw(*rthp) == NULL)
870 continue;
871 length = 0;
872 spin_lock_bh(rt_hash_lock_addr(i));
873 while ((rth = rcu_dereference_protected(*rthp,
874 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
875 prefetch(rth->dst.rt_next);
876 if (rt_is_expired(rth)) {
877 *rthp = rth->dst.rt_next;
878 rt_free(rth);
879 continue;
880 }
881 if (rth->dst.expires) {
882 /* Entry is expired even if it is in use */
883 if (time_before_eq(jiffies, rth->dst.expires)) {
884nofree:
885 tmo >>= 1;
886 rthp = &rth->dst.rt_next;
887 /*
888 * We only count entries on
889 * a chain with equal hash inputs once
890 * so that entries for different QOS
891 * levels, and other non-hash input
892 * attributes don't unfairly skew
893 * the length computation
894 */
895 length += has_noalias(rt_hash_table[i].chain, rth);
896 continue;
897 }
898 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
899 goto nofree;
900
901 /* Cleanup aged off entries. */
902 *rthp = rth->dst.rt_next;
903 rt_free(rth);
904 }
905 spin_unlock_bh(rt_hash_lock_addr(i));
906 sum += length;
907 sum2 += length*length;
908 }
909 if (samples) {
910 unsigned long avg = sum / samples;
911 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
912 rt_chain_length_max = max_t(unsigned long,
913 ip_rt_gc_elasticity,
914 (avg + 4*sd) >> FRACT_BITS);
915 }
916 rover = i;
917}
918
919/*
920 * rt_worker_func() is run in process context.
921 * we call rt_check_expire() to scan part of the hash table
922 */
923static void rt_worker_func(struct work_struct *work)
924{
925 rt_check_expire();
926 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
927}
928
833/* 929/*
834 * Perturbation of rt_genid by a small quantity [1..256] 930 * Perturbation of rt_genid by a small quantity [1..256]
835 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 931 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -1271,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1271{ 1367{
1272 struct rtable *rt = (struct rtable *) dst; 1368 struct rtable *rt = (struct rtable *) dst;
1273 1369
1274 if (rt) { 1370 if (rt && !(rt->dst.flags & DST_NOPEER)) {
1275 if (rt->peer == NULL) 1371 if (rt->peer == NULL)
1276 rt_bind_peer(rt, rt->rt_dst, 1); 1372 rt_bind_peer(rt, rt->rt_dst, 1);
1277 1373
@@ -1282,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1282 iph->id = htons(inet_getid(rt->peer, more)); 1378 iph->id = htons(inet_getid(rt->peer, more));
1283 return; 1379 return;
1284 } 1380 }
1285 } else 1381 } else if (!rt)
1286 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", 1382 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
1287 __builtin_return_address(0)); 1383 __builtin_return_address(0));
1288 1384
@@ -3179,6 +3275,13 @@ static ctl_table ipv4_route_table[] = {
3179 .proc_handler = proc_dointvec_jiffies, 3275 .proc_handler = proc_dointvec_jiffies,
3180 }, 3276 },
3181 { 3277 {
3278 .procname = "gc_interval",
3279 .data = &ip_rt_gc_interval,
3280 .maxlen = sizeof(int),
3281 .mode = 0644,
3282 .proc_handler = proc_dointvec_jiffies,
3283 },
3284 {
3182 .procname = "redirect_load", 3285 .procname = "redirect_load",
3183 .data = &ip_rt_redirect_load, 3286 .data = &ip_rt_redirect_load,
3184 .maxlen = sizeof(int), 3287 .maxlen = sizeof(int),
@@ -3388,6 +3491,11 @@ int __init ip_rt_init(void)
3388 devinet_init(); 3491 devinet_init();
3389 ip_fib_init(); 3492 ip_fib_init();
3390 3493
3494 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3495 expires_ljiffies = jiffies;
3496 schedule_delayed_work(&expires_work,
3497 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3498
3391 if (ip_rt_proc_init()) 3499 if (ip_rt_proc_init())
3392 printk(KERN_ERR "Unable to create route proc files\n"); 3500 printk(KERN_ERR "Unable to create route proc files\n");
3393#ifdef CONFIG_XFRM 3501#ifdef CONFIG_XFRM
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cf88df82e2c2..36806def8cfd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1805,7 +1805,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1805 return ERR_PTR(-EACCES); 1805 return ERR_PTR(-EACCES);
1806 1806
1807 /* Add default multicast route */ 1807 /* Add default multicast route */
1808 addrconf_add_mroute(dev); 1808 if (!(dev->flags & IFF_LOOPBACK))
1809 addrconf_add_mroute(dev);
1809 1810
1810 /* Add link local route */ 1811 /* Add link local route */
1811 addrconf_add_lroute(dev); 1812 addrconf_add_lroute(dev);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 84d0bd5cac93..ec562713db9b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -603,7 +603,7 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
603 static atomic_t ipv6_fragmentation_id; 603 static atomic_t ipv6_fragmentation_id;
604 int old, new; 604 int old, new;
605 605
606 if (rt) { 606 if (rt && !(rt->dst.flags & DST_NOPEER)) {
607 struct inet_peer *peer; 607 struct inet_peer *peer;
608 608
609 if (!rt->rt6i_peer) 609 if (!rt->rt6i_peer)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3399dd326287..b582a0a0f1c5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -728,7 +728,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
728 int attempts = !in_softirq(); 728 int attempts = !in_softirq();
729 729
730 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 730 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
731 if (rt->rt6i_dst.plen != 128 && 731 if (ort->rt6i_dst.plen != 128 &&
732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
733 rt->rt6i_flags |= RTF_ANYCAST; 733 rt->rt6i_flags |= RTF_ANYCAST;
734 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 734 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a7a18602a046..96f3623618e3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -263,6 +263,8 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
263 if (register_netdevice(dev) < 0) 263 if (register_netdevice(dev) < 0)
264 goto failed_free; 264 goto failed_free;
265 265
266 strcpy(nt->parms.name, dev->name);
267
266 dev_hold(dev); 268 dev_hold(dev);
267 269
268 ipip6_tunnel_link(sitn, nt); 270 ipip6_tunnel_link(sitn, nt);
@@ -1144,7 +1146,6 @@ static int ipip6_tunnel_init(struct net_device *dev)
1144 struct ip_tunnel *tunnel = netdev_priv(dev); 1146 struct ip_tunnel *tunnel = netdev_priv(dev);
1145 1147
1146 tunnel->dev = dev; 1148 tunnel->dev = dev;
1147 strcpy(tunnel->parms.name, dev->name);
1148 1149
1149 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1150 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1150 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1151 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -1207,6 +1208,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
1207static int __net_init sit_init_net(struct net *net) 1208static int __net_init sit_init_net(struct net *net)
1208{ 1209{
1209 struct sit_net *sitn = net_generic(net, sit_net_id); 1210 struct sit_net *sitn = net_generic(net, sit_net_id);
1211 struct ip_tunnel *t;
1210 int err; 1212 int err;
1211 1213
1212 sitn->tunnels[0] = sitn->tunnels_wc; 1214 sitn->tunnels[0] = sitn->tunnels_wc;
@@ -1231,6 +1233,9 @@ static int __net_init sit_init_net(struct net *net)
1231 if ((err = register_netdev(sitn->fb_tunnel_dev))) 1233 if ((err = register_netdev(sitn->fb_tunnel_dev)))
1232 goto err_reg_dev; 1234 goto err_reg_dev;
1233 1235
1236 t = netdev_priv(sitn->fb_tunnel_dev);
1237
1238 strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
1234 return 0; 1239 return 0;
1235 1240
1236err_reg_dev: 1241err_reg_dev:
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index dfd3a648a551..a18e6c3d36e3 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
833 copied += used; 833 copied += used;
834 len -= used; 834 len -= used;
835 835
836 /* For non stream protcols we get one packet per recvmsg call */
837 if (sk->sk_type != SOCK_STREAM)
838 goto copy_uaddr;
839
836 if (!(flags & MSG_PEEK)) { 840 if (!(flags & MSG_PEEK)) {
837 sk_eat_skb(sk, skb, 0); 841 sk_eat_skb(sk, skb, 0);
838 *seq = 0; 842 *seq = 0;
839 } 843 }
840 844
841 /* For non stream protcols we get one packet per recvmsg call */
842 if (sk->sk_type != SOCK_STREAM)
843 goto copy_uaddr;
844
845 /* Partial read */ 845 /* Partial read */
846 if (used + offset < skb->len) 846 if (used + offset < skb->len)
847 continue; 847 continue;
@@ -857,6 +857,12 @@ copy_uaddr:
857 } 857 }
858 if (llc_sk(sk)->cmsg_flags) 858 if (llc_sk(sk)->cmsg_flags)
859 llc_cmsg_rcv(msg, skb); 859 llc_cmsg_rcv(msg, skb);
860
861 if (!(flags & MSG_PEEK)) {
862 sk_eat_skb(sk, skb, 0);
863 *seq = 0;
864 }
865
860 goto out; 866 goto out;
861} 867}
862 868
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index b064e4df12c6..2e4b961648d4 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -303,6 +303,38 @@ ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
303 __release(agg_queue); 303 __release(agg_queue);
304} 304}
305 305
306/*
307 * splice packets from the STA's pending to the local pending,
308 * requires a call to ieee80211_agg_splice_finish later
309 */
310static void __acquires(agg_queue)
311ieee80211_agg_splice_packets(struct ieee80211_local *local,
312 struct tid_ampdu_tx *tid_tx, u16 tid)
313{
314 int queue = ieee80211_ac_from_tid(tid);
315 unsigned long flags;
316
317 ieee80211_stop_queue_agg(local, tid);
318
319 if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
320 " from the pending queue\n", tid))
321 return;
322
323 if (!skb_queue_empty(&tid_tx->pending)) {
324 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
325 /* copy over remaining packets */
326 skb_queue_splice_tail_init(&tid_tx->pending,
327 &local->pending[queue]);
328 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
329 }
330}
331
332static void __releases(agg_queue)
333ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
334{
335 ieee80211_wake_queue_agg(local, tid);
336}
337
306void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) 338void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
307{ 339{
308 struct tid_ampdu_tx *tid_tx; 340 struct tid_ampdu_tx *tid_tx;
@@ -314,19 +346,17 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
314 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 346 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
315 347
316 /* 348 /*
317 * While we're asking the driver about the aggregation, 349 * Start queuing up packets for this aggregation session.
318 * stop the AC queue so that we don't have to worry 350 * We're going to release them once the driver is OK with
319 * about frames that came in while we were doing that, 351 * that.
320 * which would require us to put them to the AC pending
321 * afterwards which just makes the code more complex.
322 */ 352 */
323 ieee80211_stop_queue_agg(local, tid);
324
325 clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); 353 clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
326 354
327 /* 355 /*
328 * make sure no packets are being processed to get 356 * Make sure no packets are being processed. This ensures that
329 * valid starting sequence number 357 * we have a valid starting sequence number and that in-flight
358 * packets have been flushed out and no packets for this TID
359 * will go into the driver during the ampdu_action call.
330 */ 360 */
331 synchronize_net(); 361 synchronize_net();
332 362
@@ -340,17 +370,15 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
340 " tid %d\n", tid); 370 " tid %d\n", tid);
341#endif 371#endif
342 spin_lock_bh(&sta->lock); 372 spin_lock_bh(&sta->lock);
373 ieee80211_agg_splice_packets(local, tid_tx, tid);
343 ieee80211_assign_tid_tx(sta, tid, NULL); 374 ieee80211_assign_tid_tx(sta, tid, NULL);
375 ieee80211_agg_splice_finish(local, tid);
344 spin_unlock_bh(&sta->lock); 376 spin_unlock_bh(&sta->lock);
345 377
346 ieee80211_wake_queue_agg(local, tid);
347 kfree_rcu(tid_tx, rcu_head); 378 kfree_rcu(tid_tx, rcu_head);
348 return; 379 return;
349 } 380 }
350 381
351 /* we can take packets again now */
352 ieee80211_wake_queue_agg(local, tid);
353
354 /* activate the timer for the recipient's addBA response */ 382 /* activate the timer for the recipient's addBA response */
355 mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); 383 mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
356#ifdef CONFIG_MAC80211_HT_DEBUG 384#ifdef CONFIG_MAC80211_HT_DEBUG
@@ -466,38 +494,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
466} 494}
467EXPORT_SYMBOL(ieee80211_start_tx_ba_session); 495EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
468 496
469/*
470 * splice packets from the STA's pending to the local pending,
471 * requires a call to ieee80211_agg_splice_finish later
472 */
473static void __acquires(agg_queue)
474ieee80211_agg_splice_packets(struct ieee80211_local *local,
475 struct tid_ampdu_tx *tid_tx, u16 tid)
476{
477 int queue = ieee80211_ac_from_tid(tid);
478 unsigned long flags;
479
480 ieee80211_stop_queue_agg(local, tid);
481
482 if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
483 " from the pending queue\n", tid))
484 return;
485
486 if (!skb_queue_empty(&tid_tx->pending)) {
487 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
488 /* copy over remaining packets */
489 skb_queue_splice_tail_init(&tid_tx->pending,
490 &local->pending[queue]);
491 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
492 }
493}
494
495static void __releases(agg_queue)
496ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
497{
498 ieee80211_wake_queue_agg(local, tid);
499}
500
501static void ieee80211_agg_tx_operational(struct ieee80211_local *local, 497static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
502 struct sta_info *sta, u16 tid) 498 struct sta_info *sta, u16 tid)
503{ 499{
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 12571fb2881c..29fa5badde75 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -616,7 +616,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
616 if ((cp) && (!cp->dest)) { 616 if ((cp) && (!cp->dest)) {
617 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, 617 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
618 cp->dport, &cp->vaddr, cp->vport, 618 cp->dport, &cp->vaddr, cp->vport,
619 cp->protocol, cp->fwmark); 619 cp->protocol, cp->fwmark, cp->flags);
620 ip_vs_bind_dest(cp, dest); 620 ip_vs_bind_dest(cp, dest);
621 return dest; 621 return dest;
622 } else 622 } else
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 008bf97cc91a..e1a66cf37f9a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -619,15 +619,21 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
619 const union nf_inet_addr *daddr, 619 const union nf_inet_addr *daddr,
620 __be16 dport, 620 __be16 dport,
621 const union nf_inet_addr *vaddr, 621 const union nf_inet_addr *vaddr,
622 __be16 vport, __u16 protocol, __u32 fwmark) 622 __be16 vport, __u16 protocol, __u32 fwmark,
623 __u32 flags)
623{ 624{
624 struct ip_vs_dest *dest; 625 struct ip_vs_dest *dest;
625 struct ip_vs_service *svc; 626 struct ip_vs_service *svc;
627 __be16 port = dport;
626 628
627 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); 629 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
628 if (!svc) 630 if (!svc)
629 return NULL; 631 return NULL;
630 dest = ip_vs_lookup_dest(svc, daddr, dport); 632 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
633 port = 0;
634 dest = ip_vs_lookup_dest(svc, daddr, port);
635 if (!dest)
636 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
631 if (dest) 637 if (dest)
632 atomic_inc(&dest->refcnt); 638 atomic_inc(&dest->refcnt);
633 ip_vs_service_put(svc); 639 ip_vs_service_put(svc);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3cdd479f9b5d..2b6678c0ce14 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -740,7 +740,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
740 * but still handled. 740 * but still handled.
741 */ 741 */
742 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, 742 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
743 param->vport, protocol, fwmark); 743 param->vport, protocol, fwmark, flags);
744 744
745 /* Set the approprite ativity flag */ 745 /* Set the approprite ativity flag */
746 if (protocol == IPPROTO_TCP) { 746 if (protocol == IPPROTO_TCP) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ef21b221f036..257e77256c5c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -135,7 +135,7 @@ nla_put_failure:
135static inline int 135static inline int
136ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) 136ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
137{ 137{
138 long timeout = (ct->timeout.expires - jiffies) / HZ; 138 long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
139 139
140 if (timeout < 0) 140 if (timeout < 0)
141 timeout = 0; 141 timeout = 0;
@@ -1358,12 +1358,15 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1358 nf_ct_protonum(ct)); 1358 nf_ct_protonum(ct));
1359 if (helper == NULL) { 1359 if (helper == NULL) {
1360 rcu_read_unlock(); 1360 rcu_read_unlock();
1361 spin_unlock_bh(&nf_conntrack_lock);
1361#ifdef CONFIG_MODULES 1362#ifdef CONFIG_MODULES
1362 if (request_module("nfct-helper-%s", helpname) < 0) { 1363 if (request_module("nfct-helper-%s", helpname) < 0) {
1364 spin_lock_bh(&nf_conntrack_lock);
1363 err = -EOPNOTSUPP; 1365 err = -EOPNOTSUPP;
1364 goto err1; 1366 goto err1;
1365 } 1367 }
1366 1368
1369 spin_lock_bh(&nf_conntrack_lock);
1367 rcu_read_lock(); 1370 rcu_read_lock();
1368 helper = __nf_conntrack_helper_find(helpname, 1371 helper = __nf_conntrack_helper_find(helpname,
1369 nf_ct_l3num(ct), 1372 nf_ct_l3num(ct),
@@ -1638,7 +1641,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1638 const struct nf_conntrack_expect *exp) 1641 const struct nf_conntrack_expect *exp)
1639{ 1642{
1640 struct nf_conn *master = exp->master; 1643 struct nf_conn *master = exp->master;
1641 long timeout = (exp->timeout.expires - jiffies) / HZ; 1644 long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
1642 struct nf_conn_help *help; 1645 struct nf_conn_help *help;
1643 1646
1644 if (timeout < 0) 1647 if (timeout < 0)
@@ -1869,25 +1872,30 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
1869 1872
1870 err = -ENOMEM; 1873 err = -ENOMEM;
1871 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1874 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1872 if (skb2 == NULL) 1875 if (skb2 == NULL) {
1876 nf_ct_expect_put(exp);
1873 goto out; 1877 goto out;
1878 }
1874 1879
1875 rcu_read_lock(); 1880 rcu_read_lock();
1876 err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 1881 err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
1877 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); 1882 nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
1878 rcu_read_unlock(); 1883 rcu_read_unlock();
1884 nf_ct_expect_put(exp);
1879 if (err <= 0) 1885 if (err <= 0)
1880 goto free; 1886 goto free;
1881 1887
1882 nf_ct_expect_put(exp); 1888 err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
1889 if (err < 0)
1890 goto out;
1883 1891
1884 return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); 1892 return 0;
1885 1893
1886free: 1894free:
1887 kfree_skb(skb2); 1895 kfree_skb(skb2);
1888out: 1896out:
1889 nf_ct_expect_put(exp); 1897 /* this avoids a loop in nfnetlink. */
1890 return err; 1898 return err == -EAGAIN ? -ENOBUFS : err;
1891} 1899}
1892 1900
1893static int 1901static int
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5b138506690e..9ddf1c3bfb39 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -87,10 +87,10 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
87 break; 87 break;
88 } 88 }
89 89
90 if (sinfo->count.to) 90 if (sinfo->count.to >= sinfo->count.from)
91 return what <= sinfo->count.to && what >= sinfo->count.from; 91 return what <= sinfo->count.to && what >= sinfo->count.from;
92 else 92 else /* inverted */
93 return what >= sinfo->count.from; 93 return what < sinfo->count.to || what > sinfo->count.from;
94} 94}
95 95
96static int connbytes_mt_check(const struct xt_mtchk_param *par) 96static int connbytes_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 3925c6578767..ea66034499ce 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -69,7 +69,7 @@ static int __nci_request(struct nci_dev *ndev,
69 __u32 timeout) 69 __u32 timeout)
70{ 70{
71 int rc = 0; 71 int rc = 0;
72 unsigned long completion_rc; 72 long completion_rc;
73 73
74 ndev->req_status = NCI_REQ_PEND; 74 ndev->req_status = NCI_REQ_PEND;
75 75
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 82a6f34d39d0..d9d4970b9b07 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1630,8 +1630,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1630 if (snaplen > res) 1630 if (snaplen > res)
1631 snaplen = res; 1631 snaplen = res;
1632 1632
1633 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 1633 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
1634 (unsigned)sk->sk_rcvbuf)
1635 goto drop_n_acct; 1634 goto drop_n_acct;
1636 1635
1637 if (skb_shared(skb)) { 1636 if (skb_shared(skb)) {
@@ -1762,8 +1761,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1762 if (po->tp_version <= TPACKET_V2) { 1761 if (po->tp_version <= TPACKET_V2) {
1763 if (macoff + snaplen > po->rx_ring.frame_size) { 1762 if (macoff + snaplen > po->rx_ring.frame_size) {
1764 if (po->copy_thresh && 1763 if (po->copy_thresh &&
1765 atomic_read(&sk->sk_rmem_alloc) + skb->truesize 1764 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1766 < (unsigned)sk->sk_rcvbuf) {
1767 if (skb_shared(skb)) { 1765 if (skb_shared(skb)) {
1768 copy_skb = skb_clone(skb, GFP_ATOMIC); 1766 copy_skb = skb_clone(skb, GFP_ATOMIC);
1769 } else { 1767 } else {
@@ -2450,8 +2448,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc
2450{ 2448{
2451 struct packet_sock *po = pkt_sk(sk); 2449 struct packet_sock *po = pkt_sk(sk);
2452 2450
2453 if (po->fanout) 2451 if (po->fanout) {
2452 if (dev)
2453 dev_put(dev);
2454
2454 return -EINVAL; 2455 return -EINVAL;
2456 }
2455 2457
2456 lock_sock(sk); 2458 lock_sock(sk);
2457 2459
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index b9493a09a870..6cd8ddfb512d 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -385,7 +385,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
385 struct gred_sched_data *q; 385 struct gred_sched_data *q;
386 386
387 if (table->tab[dp] == NULL) { 387 if (table->tab[dp] == NULL) {
388 table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL); 388 table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC);
389 if (table->tab[dp] == NULL) 389 if (table->tab[dp] == NULL)
390 return -ENOMEM; 390 return -ENOMEM;
391 } 391 }
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index f88256cbacbf..28de43092330 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -107,7 +107,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
107 if (!netif_is_multiqueue(dev)) 107 if (!netif_is_multiqueue(dev))
108 return -EOPNOTSUPP; 108 return -EOPNOTSUPP;
109 109
110 if (nla_len(opt) < sizeof(*qopt)) 110 if (!opt || nla_len(opt) < sizeof(*qopt))
111 return -EINVAL; 111 return -EINVAL;
112 112
113 qopt = nla_data(opt); 113 qopt = nla_data(opt);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index eb3b9a86c6ed..a4ab207cdc59 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -488,7 +488,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
488 return -EINVAL; 488 return -EINVAL;
489 489
490 s = sizeof(struct disttable) + n * sizeof(s16); 490 s = sizeof(struct disttable) + n * sizeof(s16);
491 d = kmalloc(s, GFP_KERNEL); 491 d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
492 if (!d) 492 if (!d)
493 d = vmalloc(s); 493 d = vmalloc(s);
494 if (!d) 494 if (!d)
@@ -501,9 +501,10 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
501 root_lock = qdisc_root_sleeping_lock(sch); 501 root_lock = qdisc_root_sleeping_lock(sch);
502 502
503 spin_lock_bh(root_lock); 503 spin_lock_bh(root_lock);
504 dist_free(q->delay_dist); 504 swap(q->delay_dist, d);
505 q->delay_dist = d;
506 spin_unlock_bh(root_lock); 505 spin_unlock_bh(root_lock);
506
507 dist_free(d);
507 return 0; 508 return 0;
508} 509}
509 510
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 103343408593..7b0325459e71 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -817,11 +817,11 @@ skip_unblock:
817static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) 817static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
818{ 818{
819 unsigned long mask; 819 unsigned long mask;
820 uint32_t limit, roundedF; 820 u64 limit, roundedF;
821 int slot_shift = cl->grp->slot_shift; 821 int slot_shift = cl->grp->slot_shift;
822 822
823 roundedF = qfq_round_down(cl->F, slot_shift); 823 roundedF = qfq_round_down(cl->F, slot_shift);
824 limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift); 824 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
825 825
826 if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) { 826 if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
827 /* timestamp was stale */ 827 /* timestamp was stale */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 152b5b3c3fff..acd2edbc073e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -173,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
173 asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; 173 asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
174 asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; 174 asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
175 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = 175 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
176 (unsigned long)sp->autoclose * HZ; 176 min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ;
177 177
178 /* Initializes the timers */ 178 /* Initializes the timers */
179 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) 179 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 08b3cead6503..817174eb5f41 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -697,13 +697,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
697 /* Keep track of how many bytes are in flight to the receiver. */ 697 /* Keep track of how many bytes are in flight to the receiver. */
698 asoc->outqueue.outstanding_bytes += datasize; 698 asoc->outqueue.outstanding_bytes += datasize;
699 699
700 /* Update our view of the receiver's rwnd. Include sk_buff overhead 700 /* Update our view of the receiver's rwnd. */
701 * while updating peer.rwnd so that it reduces the chances of a
702 * receiver running out of receive buffer space even when receive
703 * window is still open. This can happen when a sender is sending
704 * sending small messages.
705 */
706 datasize += sizeof(struct sk_buff);
707 if (datasize < rwnd) 701 if (datasize < rwnd)
708 rwnd -= datasize; 702 rwnd -= datasize;
709 else 703 else
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 14c2b06028ff..cfeb1d4a1ee6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
411 chunk->transport->flight_size -= 411 chunk->transport->flight_size -=
412 sctp_data_size(chunk); 412 sctp_data_size(chunk);
413 q->outstanding_bytes -= sctp_data_size(chunk); 413 q->outstanding_bytes -= sctp_data_size(chunk);
414 q->asoc->peer.rwnd += (sctp_data_size(chunk) + 414 q->asoc->peer.rwnd += sctp_data_size(chunk);
415 sizeof(struct sk_buff));
416 } 415 }
417 continue; 416 continue;
418 } 417 }
@@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
432 * (Section 7.2.4)), add the data size of those 431 * (Section 7.2.4)), add the data size of those
433 * chunks to the rwnd. 432 * chunks to the rwnd.
434 */ 433 */
435 q->asoc->peer.rwnd += (sctp_data_size(chunk) + 434 q->asoc->peer.rwnd += sctp_data_size(chunk);
436 sizeof(struct sk_buff));
437 q->outstanding_bytes -= sctp_data_size(chunk); 435 q->outstanding_bytes -= sctp_data_size(chunk);
438 if (chunk->transport) 436 if (chunk->transport)
439 transport->flight_size -= sctp_data_size(chunk); 437 transport->flight_size -= sctp_data_size(chunk);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 61b9fca5a173..6f6ad8686833 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1285,6 +1285,9 @@ SCTP_STATIC __init int sctp_init(void)
1285 sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; 1285 sctp_max_instreams = SCTP_DEFAULT_INSTREAMS;
1286 sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; 1286 sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS;
1287 1287
1288 /* Initialize maximum autoclose timeout. */
1289 sctp_max_autoclose = INT_MAX / HZ;
1290
1288 /* Initialize handle used for association ids. */ 1291 /* Initialize handle used for association ids. */
1289 idr_init(&sctp_assocs_id); 1292 idr_init(&sctp_assocs_id);
1290 1293
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13bf5fcdbff1..54a7cd2fdd7a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2200,8 +2200,6 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
2200 return -EINVAL; 2200 return -EINVAL;
2201 if (copy_from_user(&sp->autoclose, optval, optlen)) 2201 if (copy_from_user(&sp->autoclose, optval, optlen))
2202 return -EFAULT; 2202 return -EFAULT;
2203 /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */
2204 sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ);
2205 2203
2206 return 0; 2204 return 0;
2207} 2205}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 6b3952961b85..60ffbd067ff7 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -53,6 +53,10 @@ static int sack_timer_min = 1;
53static int sack_timer_max = 500; 53static int sack_timer_max = 500;
54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ 54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
55static int rwnd_scale_max = 16; 55static int rwnd_scale_max = 16;
56static unsigned long max_autoclose_min = 0;
57static unsigned long max_autoclose_max =
58 (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX)
59 ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ;
56 60
57extern long sysctl_sctp_mem[3]; 61extern long sysctl_sctp_mem[3];
58extern int sysctl_sctp_rmem[3]; 62extern int sysctl_sctp_rmem[3];
@@ -258,6 +262,15 @@ static ctl_table sctp_table[] = {
258 .extra1 = &one, 262 .extra1 = &one,
259 .extra2 = &rwnd_scale_max, 263 .extra2 = &rwnd_scale_max,
260 }, 264 },
265 {
266 .procname = "max_autoclose",
267 .data = &sctp_max_autoclose,
268 .maxlen = sizeof(unsigned long),
269 .mode = 0644,
270 .proc_handler = &proc_doulongvec_minmax,
271 .extra1 = &max_autoclose_min,
272 .extra2 = &max_autoclose_max,
273 },
261 274
262 { /* sentinel */ } 275 { /* sentinel */ }
263}; 276};
diff --git a/net/socket.c b/net/socket.c
index 2877647f347b..a0053750e37a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2883,7 +2883,7 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2883 2883
2884 return dev_ioctl(net, cmd, uifr); 2884 return dev_ioctl(net, cmd, uifr);
2885 default: 2885 default:
2886 return -EINVAL; 2886 return -ENOIOCTLCMD;
2887 } 2887 }
2888} 2888}
2889 2889
@@ -3210,20 +3210,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3210 return sock_do_ioctl(net, sock, cmd, arg); 3210 return sock_do_ioctl(net, sock, cmd, arg);
3211 } 3211 }
3212 3212
3213 /* Prevent warning from compat_sys_ioctl, these always
3214 * result in -EINVAL in the native case anyway. */
3215 switch (cmd) {
3216 case SIOCRTMSG:
3217 case SIOCGIFCOUNT:
3218 case SIOCSRARP:
3219 case SIOCGRARP:
3220 case SIOCDRARP:
3221 case SIOCSIFLINK:
3222 case SIOCGIFSLAVE:
3223 case SIOCSIFSLAVE:
3224 return -EINVAL;
3225 }
3226
3227 return -ENOIOCTLCMD; 3213 return -ENOIOCTLCMD;
3228} 3214}
3229 3215
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d12ffa545811..00a1a2acd587 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -590,6 +590,27 @@ void rpc_prepare_task(struct rpc_task *task)
590 task->tk_ops->rpc_call_prepare(task, task->tk_calldata); 590 task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
591} 591}
592 592
593static void
594rpc_init_task_statistics(struct rpc_task *task)
595{
596 /* Initialize retry counters */
597 task->tk_garb_retry = 2;
598 task->tk_cred_retry = 2;
599 task->tk_rebind_retry = 2;
600
601 /* starting timestamp */
602 task->tk_start = ktime_get();
603}
604
605static void
606rpc_reset_task_statistics(struct rpc_task *task)
607{
608 task->tk_timeouts = 0;
609 task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
610
611 rpc_init_task_statistics(task);
612}
613
593/* 614/*
594 * Helper that calls task->tk_ops->rpc_call_done if it exists 615 * Helper that calls task->tk_ops->rpc_call_done if it exists
595 */ 616 */
@@ -602,6 +623,7 @@ void rpc_exit_task(struct rpc_task *task)
602 WARN_ON(RPC_ASSASSINATED(task)); 623 WARN_ON(RPC_ASSASSINATED(task));
603 /* Always release the RPC slot and buffer memory */ 624 /* Always release the RPC slot and buffer memory */
604 xprt_release(task); 625 xprt_release(task);
626 rpc_reset_task_statistics(task);
605 } 627 }
606 } 628 }
607} 629}
@@ -804,11 +826,6 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
804 task->tk_calldata = task_setup_data->callback_data; 826 task->tk_calldata = task_setup_data->callback_data;
805 INIT_LIST_HEAD(&task->tk_task); 827 INIT_LIST_HEAD(&task->tk_task);
806 828
807 /* Initialize retry counters */
808 task->tk_garb_retry = 2;
809 task->tk_cred_retry = 2;
810 task->tk_rebind_retry = 2;
811
812 task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; 829 task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
813 task->tk_owner = current->tgid; 830 task->tk_owner = current->tgid;
814 831
@@ -818,8 +835,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
818 if (task->tk_ops->rpc_call_prepare != NULL) 835 if (task->tk_ops->rpc_call_prepare != NULL)
819 task->tk_action = rpc_prepare_task; 836 task->tk_action = rpc_prepare_task;
820 837
821 /* starting timestamp */ 838 rpc_init_task_statistics(task);
822 task->tk_start = ktime_get();
823 839
824 dprintk("RPC: new task initialized, procpid %u\n", 840 dprintk("RPC: new task initialized, procpid %u\n",
825 task_pid_nr(current)); 841 task_pid_nr(current));
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f4385e45a5fc..c64c0ef519b5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -995,13 +995,11 @@ out_init_req:
995 995
996static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 996static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
997{ 997{
998 if (xprt_dynamic_free_slot(xprt, req))
999 return;
1000
1001 memset(req, 0, sizeof(*req)); /* mark unused */
1002
1003 spin_lock(&xprt->reserve_lock); 998 spin_lock(&xprt->reserve_lock);
1004 list_add(&req->rq_list, &xprt->free); 999 if (!xprt_dynamic_free_slot(xprt, req)) {
1000 memset(req, 0, sizeof(*req)); /* mark unused */
1001 list_add(&req->rq_list, &xprt->free);
1002 }
1005 rpc_wake_up_next(&xprt->backlog); 1003 rpc_wake_up_next(&xprt->backlog);
1006 spin_unlock(&xprt->reserve_lock); 1004 spin_unlock(&xprt->reserve_lock);
1007} 1005}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2118d6446630..9049a5caeb25 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2276,8 +2276,6 @@ static void __xfrm_garbage_collect(struct net *net)
2276{ 2276{
2277 struct dst_entry *head, *next; 2277 struct dst_entry *head, *next;
2278 2278
2279 flow_cache_flush();
2280
2281 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 2279 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2282 head = xfrm_policy_sk_bundles; 2280 head = xfrm_policy_sk_bundles;
2283 xfrm_policy_sk_bundles = NULL; 2281 xfrm_policy_sk_bundles = NULL;
@@ -2290,6 +2288,18 @@ static void __xfrm_garbage_collect(struct net *net)
2290 } 2288 }
2291} 2289}
2292 2290
2291static void xfrm_garbage_collect(struct net *net)
2292{
2293 flow_cache_flush();
2294 __xfrm_garbage_collect(net);
2295}
2296
2297static void xfrm_garbage_collect_deferred(struct net *net)
2298{
2299 flow_cache_flush_deferred();
2300 __xfrm_garbage_collect(net);
2301}
2302
2293static void xfrm_init_pmtu(struct dst_entry *dst) 2303static void xfrm_init_pmtu(struct dst_entry *dst)
2294{ 2304{
2295 do { 2305 do {
@@ -2422,7 +2432,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2422 if (likely(dst_ops->neigh_lookup == NULL)) 2432 if (likely(dst_ops->neigh_lookup == NULL))
2423 dst_ops->neigh_lookup = xfrm_neigh_lookup; 2433 dst_ops->neigh_lookup = xfrm_neigh_lookup;
2424 if (likely(afinfo->garbage_collect == NULL)) 2434 if (likely(afinfo->garbage_collect == NULL))
2425 afinfo->garbage_collect = __xfrm_garbage_collect; 2435 afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2426 xfrm_policy_afinfo[afinfo->family] = afinfo; 2436 xfrm_policy_afinfo[afinfo->family] = afinfo;
2427 } 2437 }
2428 write_unlock_bh(&xfrm_policy_afinfo_lock); 2438 write_unlock_bh(&xfrm_policy_afinfo_lock);
@@ -2516,7 +2526,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
2516 2526
2517 switch (event) { 2527 switch (event) {
2518 case NETDEV_DOWN: 2528 case NETDEV_DOWN:
2519 __xfrm_garbage_collect(dev_net(dev)); 2529 xfrm_garbage_collect(dev_net(dev));
2520 } 2530 }
2521 return NOTIFY_DONE; 2531 return NOTIFY_DONE;
2522} 2532}
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index ba573fe7c74d..914833d99b06 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -60,8 +60,8 @@ update-po-config: $(obj)/kxgettext $(obj)/gconf.glade.h
60 --directory=$(srctree) --directory=$(objtree) \ 60 --directory=$(srctree) --directory=$(objtree) \
61 --output $(obj)/config.pot 61 --output $(obj)/config.pot
62 $(Q)sed -i s/CHARSET/UTF-8/ $(obj)/config.pot 62 $(Q)sed -i s/CHARSET/UTF-8/ $(obj)/config.pot
63 $(Q)ln -fs Kconfig.x86 arch/um/Kconfig 63 $(Q)(for i in `ls $(srctree)/arch/*/Kconfig \
64 $(Q)(for i in `ls $(srctree)/arch/*/Kconfig`; \ 64 $(srctree)/arch/*/um/Kconfig`; \
65 do \ 65 do \
66 echo " GEN $$i"; \ 66 echo " GEN $$i"; \
67 $(obj)/kxgettext $$i \ 67 $(obj)/kxgettext $$i \
@@ -69,7 +69,6 @@ update-po-config: $(obj)/kxgettext $(obj)/gconf.glade.h
69 done ) 69 done )
70 $(Q)msguniq --sort-by-file --to-code=UTF-8 $(obj)/config.pot \ 70 $(Q)msguniq --sort-by-file --to-code=UTF-8 $(obj)/config.pot \
71 --output $(obj)/linux.pot 71 --output $(obj)/linux.pot
72 $(Q)rm -f $(srctree)/arch/um/Kconfig
73 $(Q)rm -f $(obj)/config.pot 72 $(Q)rm -f $(obj)/config.pot
74 73
75PHONY += allnoconfig allyesconfig allmodconfig alldefconfig randconfig 74PHONY += allnoconfig allyesconfig allmodconfig alldefconfig randconfig
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 5dd5b140242c..8738deff26fa 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -27,20 +27,35 @@ static int evmkey_len = MAX_KEY_SIZE;
27 27
28struct crypto_shash *hmac_tfm; 28struct crypto_shash *hmac_tfm;
29 29
30static DEFINE_MUTEX(mutex);
31
30static struct shash_desc *init_desc(void) 32static struct shash_desc *init_desc(void)
31{ 33{
32 int rc; 34 int rc;
33 struct shash_desc *desc; 35 struct shash_desc *desc;
34 36
35 if (hmac_tfm == NULL) { 37 if (hmac_tfm == NULL) {
38 mutex_lock(&mutex);
39 if (hmac_tfm)
40 goto out;
36 hmac_tfm = crypto_alloc_shash(evm_hmac, 0, CRYPTO_ALG_ASYNC); 41 hmac_tfm = crypto_alloc_shash(evm_hmac, 0, CRYPTO_ALG_ASYNC);
37 if (IS_ERR(hmac_tfm)) { 42 if (IS_ERR(hmac_tfm)) {
38 pr_err("Can not allocate %s (reason: %ld)\n", 43 pr_err("Can not allocate %s (reason: %ld)\n",
39 evm_hmac, PTR_ERR(hmac_tfm)); 44 evm_hmac, PTR_ERR(hmac_tfm));
40 rc = PTR_ERR(hmac_tfm); 45 rc = PTR_ERR(hmac_tfm);
41 hmac_tfm = NULL; 46 hmac_tfm = NULL;
47 mutex_unlock(&mutex);
48 return ERR_PTR(rc);
49 }
50 rc = crypto_shash_setkey(hmac_tfm, evmkey, evmkey_len);
51 if (rc) {
52 crypto_free_shash(hmac_tfm);
53 hmac_tfm = NULL;
54 mutex_unlock(&mutex);
42 return ERR_PTR(rc); 55 return ERR_PTR(rc);
43 } 56 }
57out:
58 mutex_unlock(&mutex);
44 } 59 }
45 60
46 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac_tfm), 61 desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(hmac_tfm),
@@ -51,11 +66,7 @@ static struct shash_desc *init_desc(void)
51 desc->tfm = hmac_tfm; 66 desc->tfm = hmac_tfm;
52 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; 67 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
53 68
54 rc = crypto_shash_setkey(hmac_tfm, evmkey, evmkey_len);
55 if (rc)
56 goto out;
57 rc = crypto_shash_init(desc); 69 rc = crypto_shash_init(desc);
58out:
59 if (rc) { 70 if (rc) {
60 kfree(desc); 71 kfree(desc);
61 return ERR_PTR(rc); 72 return ERR_PTR(rc);
diff --git a/security/security.c b/security/security.c
index 0c6cc69c8f86..e2f684aeb70c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -381,7 +381,7 @@ int security_old_inode_init_security(struct inode *inode, struct inode *dir,
381 void **value, size_t *len) 381 void **value, size_t *len)
382{ 382{
383 if (unlikely(IS_PRIVATE(inode))) 383 if (unlikely(IS_PRIVATE(inode)))
384 return 0; 384 return -EOPNOTSUPP;
385 return security_ops->inode_init_security(inode, dir, qstr, name, value, 385 return security_ops->inode_init_security(inode, dir, qstr, name, value,
386 len); 386 len);
387} 387}
diff --git a/security/selinux/netport.c b/security/selinux/netport.c
index 0b62bd112461..7b9eb1faf68b 100644
--- a/security/selinux/netport.c
+++ b/security/selinux/netport.c
@@ -123,7 +123,9 @@ static void sel_netport_insert(struct sel_netport *port)
123 if (sel_netport_hash[idx].size == SEL_NETPORT_HASH_BKT_LIMIT) { 123 if (sel_netport_hash[idx].size == SEL_NETPORT_HASH_BKT_LIMIT) {
124 struct sel_netport *tail; 124 struct sel_netport *tail;
125 tail = list_entry( 125 tail = list_entry(
126 rcu_dereference(sel_netport_hash[idx].list.prev), 126 rcu_dereference_protected(
127 sel_netport_hash[idx].list.prev,
128 lockdep_is_held(&sel_netport_lock)),
127 struct sel_netport, list); 129 struct sel_netport, list);
128 list_del_rcu(&tail->list); 130 list_del_rcu(&tail->list);
129 kfree_rcu(tail, rcu); 131 kfree_rcu(tail, rcu);
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index 6e5addeb236b..73516f69ac7c 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -899,6 +899,10 @@ static void atmel_ac97c_reset(struct atmel_ac97c *chip)
899 /* AC97 v2.2 specifications says minimum 1 us. */ 899 /* AC97 v2.2 specifications says minimum 1 us. */
900 udelay(2); 900 udelay(2);
901 gpio_set_value(chip->reset_pin, 1); 901 gpio_set_value(chip->reset_pin, 1);
902 } else {
903 ac97c_writel(chip, MR, AC97C_MR_WRST | AC97C_MR_ENA);
904 udelay(2);
905 ac97c_writel(chip, MR, AC97C_MR_ENA);
902 } 906 }
903} 907}
904 908
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 7d98240def0b..c2f79e63124d 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2507,6 +2507,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
2507 SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), 2507 SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB),
2508 SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB), 2508 SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
2509 SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB), 2509 SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB),
2510 SND_PCI_QUIRK(0x1043, 0x83ce, "ASUS 1101HA", POS_FIX_LPIB),
2510 SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB), 2511 SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB),
2511 SND_PCI_QUIRK(0x1297, 0x3166, "Shuttle", POS_FIX_LPIB), 2512 SND_PCI_QUIRK(0x1297, 0x3166, "Shuttle", POS_FIX_LPIB),
2512 SND_PCI_QUIRK(0x1458, 0xa022, "ga-ma770-ud3", POS_FIX_LPIB), 2513 SND_PCI_QUIRK(0x1458, 0xa022, "ga-ma770-ud3", POS_FIX_LPIB),
@@ -2970,7 +2971,8 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = {
2970 /* SCH */ 2971 /* SCH */
2971 { PCI_DEVICE(0x8086, 0x811b), 2972 { PCI_DEVICE(0x8086, 0x811b),
2972 .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_SCH_SNOOP | 2973 .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_SCH_SNOOP |
2973 AZX_DCAPS_BUFSIZE}, 2974 AZX_DCAPS_BUFSIZE | AZX_DCAPS_POSFIX_LPIB }, /* Poulsbo */
2975 /* ICH */
2974 { PCI_DEVICE(0x8086, 0x2668), 2976 { PCI_DEVICE(0x8086, 0x2668),
2975 .driver_data = AZX_DRIVER_ICH | AZX_DCAPS_OLD_SSYNC | 2977 .driver_data = AZX_DRIVER_ICH | AZX_DCAPS_OLD_SSYNC |
2976 AZX_DCAPS_BUFSIZE }, /* ICH6 */ 2978 AZX_DCAPS_BUFSIZE }, /* ICH6 */
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index eeb25d529e30..616678fde486 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -4929,6 +4929,12 @@ static int find_mute_led_gpio(struct hda_codec *codec, int default_polarity)
4929 set_hp_led_gpio(codec); 4929 set_hp_led_gpio(codec);
4930 return 1; 4930 return 1;
4931 } 4931 }
4932 /* BIOS bug: unfilled OEM string */
4933 if (strstr(dev->name, "HP_Mute_LED_P_G")) {
4934 set_hp_led_gpio(codec);
4935 spec->gpio_led_polarity = 1;
4936 return 1;
4937 }
4932 } 4938 }
4933 4939
4934 /* 4940 /*
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 4584514d93d4..fa787d45d74a 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -33,7 +33,7 @@ config SND_SOC_ALL_CODECS
33 select SND_SOC_CX20442 33 select SND_SOC_CX20442
34 select SND_SOC_DA7210 if I2C 34 select SND_SOC_DA7210 if I2C
35 select SND_SOC_DFBMCS320 35 select SND_SOC_DFBMCS320
36 select SND_SOC_JZ4740_CODEC if SOC_JZ4740 36 select SND_SOC_JZ4740_CODEC
37 select SND_SOC_LM4857 if I2C 37 select SND_SOC_LM4857 if I2C
38 select SND_SOC_MAX98088 if I2C 38 select SND_SOC_MAX98088 if I2C
39 select SND_SOC_MAX98095 if I2C 39 select SND_SOC_MAX98095 if I2C
diff --git a/sound/soc/codecs/jz4740.c b/sound/soc/codecs/jz4740.c
index e373f8f06907..3e1f4e172bfb 100644
--- a/sound/soc/codecs/jz4740.c
+++ b/sound/soc/codecs/jz4740.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/io.h>
18 19
19#include <linux/delay.h> 20#include <linux/delay.h>
20 21
diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c
index bfdc52370ad0..d3b0a20744f1 100644
--- a/sound/soc/codecs/wm8776.c
+++ b/sound/soc/codecs/wm8776.c
@@ -235,6 +235,7 @@ static int wm8776_hw_params(struct snd_pcm_substream *substream,
235 switch (snd_pcm_format_width(params_format(params))) { 235 switch (snd_pcm_format_width(params_format(params))) {
236 case 16: 236 case 16:
237 iface = 0; 237 iface = 0;
238 break;
238 case 20: 239 case 20:
239 iface = 0x10; 240 iface = 0x10;
240 break; 241 break;
diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c
index 0293763debe5..5a14d5c0e0e1 100644
--- a/sound/soc/codecs/wm8958-dsp2.c
+++ b/sound/soc/codecs/wm8958-dsp2.c
@@ -60,6 +60,8 @@ static int wm8958_dsp2_fw(struct snd_soc_codec *codec, const char *name,
60 } 60 }
61 61
62 if (memcmp(fw->data, "WMFW", 4) != 0) { 62 if (memcmp(fw->data, "WMFW", 4) != 0) {
63 memcpy(&data32, fw->data, sizeof(data32));
64 data32 = be32_to_cpu(data32);
63 dev_err(codec->dev, "%s: firmware has bad file magic %08x\n", 65 dev_err(codec->dev, "%s: firmware has bad file magic %08x\n",
64 name, data32); 66 name, data32);
65 goto err; 67 goto err;
diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index 645c980d6b80..a33b04d17195 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -1968,6 +1968,7 @@ static int wm8996_set_sysclk(struct snd_soc_dai *dai,
1968 break; 1968 break;
1969 case 24576000: 1969 case 24576000:
1970 ratediv = WM8996_SYSCLK_DIV; 1970 ratediv = WM8996_SYSCLK_DIV;
1971 wm8996->sysclk /= 2;
1971 case 12288000: 1972 case 12288000:
1972 snd_soc_update_bits(codec, WM8996_AIF_RATE, 1973 snd_soc_update_bits(codec, WM8996_AIF_RATE,
1973 WM8996_SYSCLK_RATE, WM8996_SYSCLK_RATE); 1974 WM8996_SYSCLK_RATE, WM8996_SYSCLK_RATE);
diff --git a/sound/soc/mxs/mxs-pcm.c b/sound/soc/mxs/mxs-pcm.c
index dea5aa4aa647..f39d7dd9fbcb 100644
--- a/sound/soc/mxs/mxs-pcm.c
+++ b/sound/soc/mxs/mxs-pcm.c
@@ -357,3 +357,6 @@ static void __exit snd_mxs_pcm_exit(void)
357 platform_driver_unregister(&mxs_pcm_driver); 357 platform_driver_unregister(&mxs_pcm_driver);
358} 358}
359module_exit(snd_mxs_pcm_exit); 359module_exit(snd_mxs_pcm_exit);
360
361MODULE_LICENSE("GPL");
362MODULE_ALIAS("platform:mxs-pcm-audio");
diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c
index 7fbeaec06eb4..1c57f6630a48 100644
--- a/sound/soc/mxs/mxs-sgtl5000.c
+++ b/sound/soc/mxs/mxs-sgtl5000.c
@@ -171,3 +171,4 @@ module_exit(mxs_sgtl5000_exit);
171MODULE_AUTHOR("Freescale Semiconductor, Inc."); 171MODULE_AUTHOR("Freescale Semiconductor, Inc.");
172MODULE_DESCRIPTION("MXS ALSA SoC Machine driver"); 172MODULE_DESCRIPTION("MXS ALSA SoC Machine driver");
173MODULE_LICENSE("GPL"); 173MODULE_LICENSE("GPL");
174MODULE_ALIAS("platform:mxs-sgtl5000");
diff --git a/sound/soc/pxa/hx4700.c b/sound/soc/pxa/hx4700.c
index 65c124831a00..c664e33fb6d7 100644
--- a/sound/soc/pxa/hx4700.c
+++ b/sound/soc/pxa/hx4700.c
@@ -209,9 +209,10 @@ static int __devinit hx4700_audio_probe(struct platform_device *pdev)
209 snd_soc_card_hx4700.dev = &pdev->dev; 209 snd_soc_card_hx4700.dev = &pdev->dev;
210 ret = snd_soc_register_card(&snd_soc_card_hx4700); 210 ret = snd_soc_register_card(&snd_soc_card_hx4700);
211 if (ret) 211 if (ret)
212 return ret; 212 gpio_free_array(hx4700_audio_gpios,
213 ARRAY_SIZE(hx4700_audio_gpios));
213 214
214 return 0; 215 return ret;
215} 216}
216 217
217static int __devexit hx4700_audio_remove(struct platform_device *pdev) 218static int __devexit hx4700_audio_remove(struct platform_device *pdev)
diff --git a/sound/soc/samsung/jive_wm8750.c b/sound/soc/samsung/jive_wm8750.c
index 1826acf20f7c..8e523fd9189e 100644
--- a/sound/soc/samsung/jive_wm8750.c
+++ b/sound/soc/samsung/jive_wm8750.c
@@ -101,7 +101,6 @@ static int jive_wm8750_init(struct snd_soc_pcm_runtime *rtd)
101{ 101{
102 struct snd_soc_codec *codec = rtd->codec; 102 struct snd_soc_codec *codec = rtd->codec;
103 struct snd_soc_dapm_context *dapm = &codec->dapm; 103 struct snd_soc_dapm_context *dapm = &codec->dapm;
104 int err;
105 104
106 /* These endpoints are not being used. */ 105 /* These endpoints are not being used. */
107 snd_soc_dapm_nc_pin(dapm, "LINPUT2"); 106 snd_soc_dapm_nc_pin(dapm, "LINPUT2");
@@ -131,7 +130,7 @@ static struct snd_soc_card snd_soc_machine_jive = {
131 .dai_link = &jive_dai, 130 .dai_link = &jive_dai,
132 .num_links = 1, 131 .num_links = 1,
133 132
134 .dapm_widgtets = wm8750_dapm_widgets, 133 .dapm_widgets = wm8750_dapm_widgets,
135 .num_dapm_widgets = ARRAY_SIZE(wm8750_dapm_widgets), 134 .num_dapm_widgets = ARRAY_SIZE(wm8750_dapm_widgets),
136 .dapm_routes = audio_map, 135 .dapm_routes = audio_map,
137 .num_dapm_routes = ARRAY_SIZE(audio_map), 136 .num_dapm_routes = ARRAY_SIZE(audio_map),
diff --git a/sound/soc/samsung/smdk2443_wm9710.c b/sound/soc/samsung/smdk2443_wm9710.c
index 3a0dbfc793f0..8bd1dc5706bf 100644
--- a/sound/soc/samsung/smdk2443_wm9710.c
+++ b/sound/soc/samsung/smdk2443_wm9710.c
@@ -12,6 +12,7 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/module.h>
15#include <sound/soc.h> 16#include <sound/soc.h>
16 17
17static struct snd_soc_card smdk2443; 18static struct snd_soc_card smdk2443;
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index fe6762ed56bd..c89f9e1453f7 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -22,7 +22,7 @@ OPTIONS
22------- 22-------
23-i:: 23-i::
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data unless stdin is a fifo)
26 26
27-d:: 27-d::
28--dsos=<dso[,dso...]>:: 28--dsos=<dso[,dso...]>::
@@ -66,7 +66,7 @@ OPTIONS
66 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
67 samples, TAB/UNTAB cycles through the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
68 68
69-c:: 69-C::
70--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 70--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
71 be provided as a comma-separated list with no space: 0,1. Ranges of 71 be provided as a comma-separated list with no space: 0,1. Ranges of
72 CPUs are specified with -: 0-2. Default is to report samples on all 72 CPUs are specified with -: 0-2. Default is to report samples on all
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index cc22325ffd1b..25c52efcc7f0 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -26,7 +26,7 @@ OPTIONS
26 Show only DSOs with hits. 26 Show only DSOs with hits.
27-i:: 27-i::
28--input=:: 28--input=::
29 Input file name. (default: perf.data) 29 Input file name. (default: perf.data unless stdin is a fifo)
30-f:: 30-f::
31--force:: 31--force::
32 Don't do ownership validation. 32 Don't do ownership validation.
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 0cada9e053dc..0507ec7bad71 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -18,7 +18,7 @@ OPTIONS
18------- 18-------
19-i:: 19-i::
20--input=:: 20--input=::
21 Input file name. (default: perf.data) 21 Input file name. (default: perf.data unless stdin is a fifo)
22 22
23SEE ALSO 23SEE ALSO
24-------- 24--------
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index a52fcde894c7..7c8fbbf3f61c 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -23,7 +23,7 @@ OPTIONS
23------- 23-------
24-i <file>:: 24-i <file>::
25--input=<file>:: 25--input=<file>::
26 Select the input file (default: perf.data) 26 Select the input file (default: perf.data unless stdin is a fifo)
27 27
28--caller:: 28--caller::
29 Show per-callsite statistics 29 Show per-callsite statistics
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 4a26a2f3a6a3..d6b2a4f2108b 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -29,7 +29,7 @@ COMMON OPTIONS
29 29
30-i:: 30-i::
31--input=<file>:: 31--input=<file>::
32 Input file name. 32 Input file name. (default: perf.data unless stdin is a fifo)
33 33
34-v:: 34-v::
35--verbose:: 35--verbose::
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5a520f825295..2937f7e14bb7 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -89,7 +89,7 @@ OPTIONS
89 89
90-m:: 90-m::
91--mmap-pages=:: 91--mmap-pages=::
92 Number of mmap data pages. 92 Number of mmap data pages. Must be a power of two.
93 93
94-g:: 94-g::
95--call-graph:: 95--call-graph::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 212f24d672e1..9b430e98712e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -19,7 +19,7 @@ OPTIONS
19------- 19-------
20-i:: 20-i::
21--input=:: 21--input=::
22 Input file name. (default: perf.data) 22 Input file name. (default: perf.data unless stdin is a fifo)
23 23
24-v:: 24-v::
25--verbose:: 25--verbose::
@@ -39,7 +39,7 @@ OPTIONS
39-T:: 39-T::
40--threads:: 40--threads::
41 Show per-thread event counters 41 Show per-thread event counters
42-C:: 42-c::
43--comms=:: 43--comms=::
44 Only consider symbols in these comms. CSV that understands 44 Only consider symbols in these comms. CSV that understands
45 file://filename entries. 45 file://filename entries.
@@ -80,9 +80,10 @@ OPTIONS
80--dump-raw-trace:: 80--dump-raw-trace::
81 Dump raw trace in ASCII. 81 Dump raw trace in ASCII.
82 82
83-g [type,min,order]:: 83-g [type,min[,limit],order]::
84--call-graph:: 84--call-graph::
85 Display call chains using type, min percent threshold and order. 85 Display call chains using type, min percent threshold, optional print
86 limit and order.
86 type can be either: 87 type can be either:
87 - flat: single column, linear exposure of call chains. 88 - flat: single column, linear exposure of call chains.
88 - graph: use a graph tree, displaying absolute overhead rates. 89 - graph: use a graph tree, displaying absolute overhead rates.
@@ -128,7 +129,7 @@ OPTIONS
128--symfs=<directory>:: 129--symfs=<directory>::
129 Look for files with symbols relative to this directory. 130 Look for files with symbols relative to this directory.
130 131
131-c:: 132-C::
132--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 133--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
133 be provided as a comma-separated list with no space: 0,1. Ranges of 134 be provided as a comma-separated list with no space: 0,1. Ranges of
134 CPUs are specified with -: 0-2. Default is to report samples on all 135 CPUs are specified with -: 0-2. Default is to report samples on all
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 5b212b57f70b..8ff4df956951 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -40,7 +40,7 @@ OPTIONS
40------- 40-------
41-i:: 41-i::
42--input=<file>:: 42--input=<file>::
43 Input file name. (default: perf.data) 43 Input file name. (default: perf.data unless stdin is a fifo)
44 44
45-v:: 45-v::
46--verbose:: 46--verbose::
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index dec87ecb530e..2f6cef43da25 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -106,7 +106,7 @@ OPTIONS
106 106
107-i:: 107-i::
108--input=:: 108--input=::
109 Input file name. 109 Input file name. (default: perf.data unless stdin is a fifo)
110 110
111-d:: 111-d::
112--debug-mode:: 112--debug-mode::
@@ -182,12 +182,17 @@ OPTIONS
182--hide-call-graph:: 182--hide-call-graph::
183 When printing symbols do not display call chain. 183 When printing symbols do not display call chain.
184 184
185-c:: 185-C::
186--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 186--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
187 be provided as a comma-separated list with no space: 0,1. Ranges of 187 be provided as a comma-separated list with no space: 0,1. Ranges of
188 CPUs are specified with -: 0-2. Default is to report samples on all 188 CPUs are specified with -: 0-2. Default is to report samples on all
189 CPUs. 189 CPUs.
190 190
191-c::
192--comms=::
193 Only display events for these comms. CSV that understands
194 file://filename entries.
195
191-I:: 196-I::
192--show-info:: 197--show-info::
193 Display extended information about the perf.data file. This adds 198 Display extended information about the perf.data file. This adds
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 2c3b462f64b0..b24ac40fcd58 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -8,13 +8,19 @@ perf-test - Runs sanity tests.
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf test <options>' 11'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command does assorted sanity tests, initially through linked routines but 15This command does assorted sanity tests, initially through linked routines but
16also will look for a directory with more tests in the form of scripts. 16also will look for a directory with more tests in the form of scripts.
17 17
18To get a list of available tests use 'perf test list', specifying a test name
19fragment will show all tests that have it.
20
21To run just specific tests, inform test name fragments or the numbers obtained
22from 'perf test list'.
23
18OPTIONS 24OPTIONS
19------- 25-------
20-v:: 26-v::
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index d7b79e2ba2ad..1632b0efc757 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -27,7 +27,7 @@ OPTIONS
27 Select the output file (default: output.svg) 27 Select the output file (default: output.svg)
28-i:: 28-i::
29--input=:: 29--input=::
30 Select the input file (default: perf.data) 30 Select the input file (default: perf.data unless stdin is a fifo)
31-w:: 31-w::
32--width=:: 32--width=::
33 Select the width of the SVG file (default: 1000) 33 Select the width of the SVG file (default: 1000)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index b98e3075646b..ac86d67b636e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -278,6 +278,7 @@ LIB_H += util/strbuf.h
278LIB_H += util/strlist.h 278LIB_H += util/strlist.h
279LIB_H += util/strfilter.h 279LIB_H += util/strfilter.h
280LIB_H += util/svghelper.h 280LIB_H += util/svghelper.h
281LIB_H += util/tool.h
281LIB_H += util/run-command.h 282LIB_H += util/run-command.h
282LIB_H += util/sigchain.h 283LIB_H += util/sigchain.h
283LIB_H += util/symbol.h 284LIB_H += util/symbol.h
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 46b4c24f338e..214ba7f9f577 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -27,32 +27,32 @@
27#include "util/sort.h" 27#include "util/sort.h"
28#include "util/hist.h" 28#include "util/hist.h"
29#include "util/session.h" 29#include "util/session.h"
30#include "util/tool.h"
30 31
31#include <linux/bitmap.h> 32#include <linux/bitmap.h>
32 33
33static char const *input_name = "perf.data"; 34struct perf_annotate {
34 35 struct perf_tool tool;
35static bool force, use_tui, use_stdio; 36 char const *input_name;
36 37 bool force, use_tui, use_stdio;
37static bool full_paths; 38 bool full_paths;
38 39 bool print_line;
39static bool print_line; 40 const char *sym_hist_filter;
40 41 const char *cpu_list;
41static const char *sym_hist_filter; 42 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
42 43};
43static const char *cpu_list;
44static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
45 44
46static int perf_evlist__add_sample(struct perf_evlist *evlist, 45static int perf_evsel__add_sample(struct perf_evsel *evsel,
47 struct perf_sample *sample, 46 struct perf_sample *sample,
48 struct perf_evsel *evsel, 47 struct addr_location *al,
49 struct addr_location *al) 48 struct perf_annotate *ann)
50{ 49{
51 struct hist_entry *he; 50 struct hist_entry *he;
52 int ret; 51 int ret;
53 52
54 if (sym_hist_filter != NULL && 53 if (ann->sym_hist_filter != NULL &&
55 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { 54 (al->sym == NULL ||
55 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
56 /* We're only interested in a symbol named sym_hist_filter */ 56 /* We're only interested in a symbol named sym_hist_filter */
57 if (al->sym != NULL) { 57 if (al->sym != NULL) {
58 rb_erase(&al->sym->rb_node, 58 rb_erase(&al->sym->rb_node,
@@ -69,8 +69,7 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist,
69 ret = 0; 69 ret = 0;
70 if (he->ms.sym != NULL) { 70 if (he->ms.sym != NULL) {
71 struct annotation *notes = symbol__annotation(he->ms.sym); 71 struct annotation *notes = symbol__annotation(he->ms.sym);
72 if (notes->src == NULL && 72 if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
73 symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0)
74 return -ENOMEM; 73 return -ENOMEM;
75 74
76 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 75 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
@@ -81,25 +80,26 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist,
81 return ret; 80 return ret;
82} 81}
83 82
84static int process_sample_event(union perf_event *event, 83static int process_sample_event(struct perf_tool *tool,
84 union perf_event *event,
85 struct perf_sample *sample, 85 struct perf_sample *sample,
86 struct perf_evsel *evsel, 86 struct perf_evsel *evsel,
87 struct perf_session *session) 87 struct machine *machine)
88{ 88{
89 struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
89 struct addr_location al; 90 struct addr_location al;
90 91
91 if (perf_event__preprocess_sample(event, session, &al, sample, 92 if (perf_event__preprocess_sample(event, machine, &al, sample,
92 symbol__annotate_init) < 0) { 93 symbol__annotate_init) < 0) {
93 pr_warning("problem processing %d event, skipping it.\n", 94 pr_warning("problem processing %d event, skipping it.\n",
94 event->header.type); 95 event->header.type);
95 return -1; 96 return -1;
96 } 97 }
97 98
98 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 99 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
99 return 0; 100 return 0;
100 101
101 if (!al.filtered && 102 if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
102 perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
103 pr_warning("problem incrementing symbol count, " 103 pr_warning("problem incrementing symbol count, "
104 "skipping event\n"); 104 "skipping event\n");
105 return -1; 105 return -1;
@@ -108,14 +108,15 @@ static int process_sample_event(union perf_event *event,
108 return 0; 108 return 0;
109} 109}
110 110
111static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) 111static int hist_entry__tty_annotate(struct hist_entry *he, int evidx,
112 struct perf_annotate *ann)
112{ 113{
113 return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx, 114 return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
114 print_line, full_paths, 0, 0); 115 ann->print_line, ann->full_paths, 0, 0);
115} 116}
116 117
117static void hists__find_annotations(struct hists *self, int evidx, 118static void hists__find_annotations(struct hists *self, int evidx,
118 int nr_events) 119 struct perf_annotate *ann)
119{ 120{
120 struct rb_node *nd = rb_first(&self->entries), *next; 121 struct rb_node *nd = rb_first(&self->entries), *next;
121 int key = K_RIGHT; 122 int key = K_RIGHT;
@@ -138,8 +139,7 @@ find_next:
138 } 139 }
139 140
140 if (use_browser > 0) { 141 if (use_browser > 0) {
141 key = hist_entry__tui_annotate(he, evidx, nr_events, 142 key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0);
142 NULL, NULL, 0);
143 switch (key) { 143 switch (key) {
144 case K_RIGHT: 144 case K_RIGHT:
145 next = rb_next(nd); 145 next = rb_next(nd);
@@ -154,7 +154,7 @@ find_next:
154 if (next != NULL) 154 if (next != NULL)
155 nd = next; 155 nd = next;
156 } else { 156 } else {
157 hist_entry__tty_annotate(he, evidx); 157 hist_entry__tty_annotate(he, evidx, ann);
158 nd = rb_next(nd); 158 nd = rb_next(nd);
159 /* 159 /*
160 * Since we have a hist_entry per IP for the same 160 * Since we have a hist_entry per IP for the same
@@ -167,33 +167,26 @@ find_next:
167 } 167 }
168} 168}
169 169
170static struct perf_event_ops event_ops = { 170static int __cmd_annotate(struct perf_annotate *ann)
171 .sample = process_sample_event,
172 .mmap = perf_event__process_mmap,
173 .comm = perf_event__process_comm,
174 .fork = perf_event__process_task,
175 .ordered_samples = true,
176 .ordering_requires_timestamps = true,
177};
178
179static int __cmd_annotate(void)
180{ 171{
181 int ret; 172 int ret;
182 struct perf_session *session; 173 struct perf_session *session;
183 struct perf_evsel *pos; 174 struct perf_evsel *pos;
184 u64 total_nr_samples; 175 u64 total_nr_samples;
185 176
186 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 177 session = perf_session__new(ann->input_name, O_RDONLY,
178 ann->force, false, &ann->tool);
187 if (session == NULL) 179 if (session == NULL)
188 return -ENOMEM; 180 return -ENOMEM;
189 181
190 if (cpu_list) { 182 if (ann->cpu_list) {
191 ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 183 ret = perf_session__cpu_bitmap(session, ann->cpu_list,
184 ann->cpu_bitmap);
192 if (ret) 185 if (ret)
193 goto out_delete; 186 goto out_delete;
194 } 187 }
195 188
196 ret = perf_session__process_events(session, &event_ops); 189 ret = perf_session__process_events(session, &ann->tool);
197 if (ret) 190 if (ret)
198 goto out_delete; 191 goto out_delete;
199 192
@@ -217,13 +210,12 @@ static int __cmd_annotate(void)
217 total_nr_samples += nr_samples; 210 total_nr_samples += nr_samples;
218 hists__collapse_resort(hists); 211 hists__collapse_resort(hists);
219 hists__output_resort(hists); 212 hists__output_resort(hists);
220 hists__find_annotations(hists, pos->idx, 213 hists__find_annotations(hists, pos->idx, ann);
221 session->evlist->nr_entries);
222 } 214 }
223 } 215 }
224 216
225 if (total_nr_samples == 0) { 217 if (total_nr_samples == 0) {
226 ui__warning("The %s file has no samples!\n", input_name); 218 ui__warning("The %s file has no samples!\n", session->filename);
227 goto out_delete; 219 goto out_delete;
228 } 220 }
229out_delete: 221out_delete:
@@ -247,29 +239,41 @@ static const char * const annotate_usage[] = {
247 NULL 239 NULL
248}; 240};
249 241
250static const struct option options[] = { 242int cmd_annotate(int argc, const char **argv, const char *prefix __used)
251 OPT_STRING('i', "input", &input_name, "file", 243{
244 struct perf_annotate annotate = {
245 .tool = {
246 .sample = process_sample_event,
247 .mmap = perf_event__process_mmap,
248 .comm = perf_event__process_comm,
249 .fork = perf_event__process_task,
250 .ordered_samples = true,
251 .ordering_requires_timestamps = true,
252 },
253 };
254 const struct option options[] = {
255 OPT_STRING('i', "input", &annotate.input_name, "file",
252 "input file name"), 256 "input file name"),
253 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 257 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
254 "only consider symbols in these dsos"), 258 "only consider symbols in these dsos"),
255 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", 259 OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
256 "symbol to annotate"), 260 "symbol to annotate"),
257 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 261 OPT_BOOLEAN('f', "force", &annotate.force, "don't complain, do it"),
258 OPT_INCR('v', "verbose", &verbose, 262 OPT_INCR('v', "verbose", &verbose,
259 "be more verbose (show symbol address, etc)"), 263 "be more verbose (show symbol address, etc)"),
260 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 264 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
261 "dump raw trace in ASCII"), 265 "dump raw trace in ASCII"),
262 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 266 OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
263 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 267 OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
264 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 268 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
265 "file", "vmlinux pathname"), 269 "file", "vmlinux pathname"),
266 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 270 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
267 "load module symbols - WARNING: use only with -k and LIVE kernel"), 271 "load module symbols - WARNING: use only with -k and LIVE kernel"),
268 OPT_BOOLEAN('l', "print-line", &print_line, 272 OPT_BOOLEAN('l', "print-line", &annotate.print_line,
269 "print matching source lines (may be slow)"), 273 "print matching source lines (may be slow)"),
270 OPT_BOOLEAN('P', "full-paths", &full_paths, 274 OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
271 "Don't shorten the displayed pathnames"), 275 "Don't shorten the displayed pathnames"),
272 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 276 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
273 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 277 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
274 "Look for files with symbols relative to this directory"), 278 "Look for files with symbols relative to this directory"),
275 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, 279 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
@@ -279,15 +283,13 @@ static const struct option options[] = {
279 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 283 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
280 "Specify disassembler style (e.g. -M intel for intel syntax)"), 284 "Specify disassembler style (e.g. -M intel for intel syntax)"),
281 OPT_END() 285 OPT_END()
282}; 286 };
283 287
284int cmd_annotate(int argc, const char **argv, const char *prefix __used)
285{
286 argc = parse_options(argc, argv, options, annotate_usage, 0); 288 argc = parse_options(argc, argv, options, annotate_usage, 0);
287 289
288 if (use_stdio) 290 if (annotate.use_stdio)
289 use_browser = 0; 291 use_browser = 0;
290 else if (use_tui) 292 else if (annotate.use_tui)
291 use_browser = 1; 293 use_browser = 1;
292 294
293 setup_browser(true); 295 setup_browser(true);
@@ -308,7 +310,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
308 if (argc > 1) 310 if (argc > 1)
309 usage_with_options(annotate_usage, options); 311 usage_with_options(annotate_usage, options);
310 312
311 sym_hist_filter = argv[0]; 313 annotate.sym_hist_filter = argv[0];
312 } 314 }
313 315
314 if (field_sep && *field_sep == '.') { 316 if (field_sep && *field_sep == '.') {
@@ -316,5 +318,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
316 return -1; 318 return -1;
317 } 319 }
318 320
319 return __cmd_annotate(); 321 return __cmd_annotate(&annotate);
320} 322}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index cb690a65bf02..52480467e9ff 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -18,7 +18,7 @@
18 18
19#include <libelf.h> 19#include <libelf.h>
20 20
21static char const *input_name = "perf.data"; 21static const char *input_name;
22static bool force; 22static bool force;
23static bool show_kernel; 23static bool show_kernel;
24static bool with_hits; 24static bool with_hits;
@@ -39,24 +39,6 @@ static const struct option options[] = {
39 OPT_END() 39 OPT_END()
40}; 40};
41 41
42static int perf_session__list_build_ids(void)
43{
44 struct perf_session *session;
45
46 session = perf_session__new(input_name, O_RDONLY, force, false,
47 &build_id__mark_dso_hit_ops);
48 if (session == NULL)
49 return -1;
50
51 if (with_hits)
52 perf_session__process_events(session, &build_id__mark_dso_hit_ops);
53
54 perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
55
56 perf_session__delete(session);
57 return 0;
58}
59
60static int sysfs__fprintf_build_id(FILE *fp) 42static int sysfs__fprintf_build_id(FILE *fp)
61{ 43{
62 u8 kallsyms_build_id[BUILD_ID_SIZE]; 44 u8 kallsyms_build_id[BUILD_ID_SIZE];
@@ -85,17 +67,36 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
85 return fprintf(fp, "%s\n", sbuild_id); 67 return fprintf(fp, "%s\n", sbuild_id);
86} 68}
87 69
88static int __cmd_buildid_list(void) 70static int perf_session__list_build_ids(void)
89{ 71{
90 if (show_kernel) 72 struct perf_session *session;
91 return sysfs__fprintf_build_id(stdout);
92 73
93 elf_version(EV_CURRENT); 74 elf_version(EV_CURRENT);
75
76 session = perf_session__new(input_name, O_RDONLY, force, false,
77 &build_id__mark_dso_hit_ops);
78 if (session == NULL)
79 return -1;
80
94 /* 81 /*
95 * See if this is an ELF file first: 82 * See if this is an ELF file first:
96 */ 83 */
97 if (filename__fprintf_build_id(input_name, stdout)) 84 if (filename__fprintf_build_id(session->filename, stdout))
98 return 0; 85 goto out;
86
87 if (with_hits)
88 perf_session__process_events(session, &build_id__mark_dso_hit_ops);
89
90 perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
91out:
92 perf_session__delete(session);
93 return 0;
94}
95
96static int __cmd_buildid_list(void)
97{
98 if (show_kernel)
99 return sysfs__fprintf_build_id(stdout);
99 100
100 return perf_session__list_build_ids(); 101 return perf_session__list_build_ids();
101} 102}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index b39f3a1ee7dc..4f19513d7dda 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -9,7 +9,9 @@
9#include "util/debug.h" 9#include "util/debug.h"
10#include "util/event.h" 10#include "util/event.h"
11#include "util/hist.h" 11#include "util/hist.h"
12#include "util/evsel.h"
12#include "util/session.h" 13#include "util/session.h"
14#include "util/tool.h"
13#include "util/sort.h" 15#include "util/sort.h"
14#include "util/symbol.h" 16#include "util/symbol.h"
15#include "util/util.h" 17#include "util/util.h"
@@ -30,14 +32,15 @@ static int hists__add_entry(struct hists *self,
30 return -ENOMEM; 32 return -ENOMEM;
31} 33}
32 34
33static int diff__process_sample_event(union perf_event *event, 35static int diff__process_sample_event(struct perf_tool *tool __used,
36 union perf_event *event,
34 struct perf_sample *sample, 37 struct perf_sample *sample,
35 struct perf_evsel *evsel __used, 38 struct perf_evsel *evsel __used,
36 struct perf_session *session) 39 struct machine *machine)
37{ 40{
38 struct addr_location al; 41 struct addr_location al;
39 42
40 if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 43 if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) {
41 pr_warning("problem processing %d event, skipping it.\n", 44 pr_warning("problem processing %d event, skipping it.\n",
42 event->header.type); 45 event->header.type);
43 return -1; 46 return -1;
@@ -46,16 +49,16 @@ static int diff__process_sample_event(union perf_event *event,
46 if (al.filtered || al.sym == NULL) 49 if (al.filtered || al.sym == NULL)
47 return 0; 50 return 0;
48 51
49 if (hists__add_entry(&session->hists, &al, sample->period)) { 52 if (hists__add_entry(&evsel->hists, &al, sample->period)) {
50 pr_warning("problem incrementing symbol period, skipping event\n"); 53 pr_warning("problem incrementing symbol period, skipping event\n");
51 return -1; 54 return -1;
52 } 55 }
53 56
54 session->hists.stats.total_period += sample->period; 57 evsel->hists.stats.total_period += sample->period;
55 return 0; 58 return 0;
56} 59}
57 60
58static struct perf_event_ops event_ops = { 61static struct perf_tool perf_diff = {
59 .sample = diff__process_sample_event, 62 .sample = diff__process_sample_event,
60 .mmap = perf_event__process_mmap, 63 .mmap = perf_event__process_mmap,
61 .comm = perf_event__process_comm, 64 .comm = perf_event__process_comm,
@@ -145,13 +148,13 @@ static int __cmd_diff(void)
145 int ret, i; 148 int ret, i;
146 struct perf_session *session[2]; 149 struct perf_session *session[2];
147 150
148 session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops); 151 session[0] = perf_session__new(input_old, O_RDONLY, force, false, &perf_diff);
149 session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops); 152 session[1] = perf_session__new(input_new, O_RDONLY, force, false, &perf_diff);
150 if (session[0] == NULL || session[1] == NULL) 153 if (session[0] == NULL || session[1] == NULL)
151 return -ENOMEM; 154 return -ENOMEM;
152 155
153 for (i = 0; i < 2; ++i) { 156 for (i = 0; i < 2; ++i) {
154 ret = perf_session__process_events(session[i], &event_ops); 157 ret = perf_session__process_events(session[i], &perf_diff);
155 if (ret) 158 if (ret)
156 goto out_delete; 159 goto out_delete;
157 } 160 }
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 4c5e9e04a41f..26760322c4f4 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -15,7 +15,7 @@
15#include "util/parse-options.h" 15#include "util/parse-options.h"
16#include "util/session.h" 16#include "util/session.h"
17 17
18static char const *input_name = "perf.data"; 18static const char *input_name;
19 19
20static int __cmd_evlist(void) 20static int __cmd_evlist(void)
21{ 21{
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8dfc12bb119b..09c106193e65 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -9,6 +9,7 @@
9 9
10#include "perf.h" 10#include "perf.h"
11#include "util/session.h" 11#include "util/session.h"
12#include "util/tool.h"
12#include "util/debug.h" 13#include "util/debug.h"
13 14
14#include "util/parse-options.h" 15#include "util/parse-options.h"
@@ -16,8 +17,9 @@
16static char const *input_name = "-"; 17static char const *input_name = "-";
17static bool inject_build_ids; 18static bool inject_build_ids;
18 19
19static int perf_event__repipe_synth(union perf_event *event, 20static int perf_event__repipe_synth(struct perf_tool *tool __used,
20 struct perf_session *session __used) 21 union perf_event *event,
22 struct machine *machine __used)
21{ 23{
22 uint32_t size; 24 uint32_t size;
23 void *buf = event; 25 void *buf = event;
@@ -36,41 +38,70 @@ static int perf_event__repipe_synth(union perf_event *event,
36 return 0; 38 return 0;
37} 39}
38 40
39static int perf_event__repipe(union perf_event *event, 41static int perf_event__repipe_op2_synth(struct perf_tool *tool,
42 union perf_event *event,
43 struct perf_session *session __used)
44{
45 return perf_event__repipe_synth(tool, event, NULL);
46}
47
48static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
49 union perf_event *event)
50{
51 return perf_event__repipe_synth(tool, event, NULL);
52}
53
54static int perf_event__repipe_tracing_data_synth(union perf_event *event,
55 struct perf_session *session __used)
56{
57 return perf_event__repipe_synth(NULL, event, NULL);
58}
59
60static int perf_event__repipe_attr(union perf_event *event,
61 struct perf_evlist **pevlist __used)
62{
63 return perf_event__repipe_synth(NULL, event, NULL);
64}
65
66static int perf_event__repipe(struct perf_tool *tool,
67 union perf_event *event,
40 struct perf_sample *sample __used, 68 struct perf_sample *sample __used,
41 struct perf_session *session) 69 struct machine *machine)
42{ 70{
43 return perf_event__repipe_synth(event, session); 71 return perf_event__repipe_synth(tool, event, machine);
44} 72}
45 73
46static int perf_event__repipe_sample(union perf_event *event, 74static int perf_event__repipe_sample(struct perf_tool *tool,
75 union perf_event *event,
47 struct perf_sample *sample __used, 76 struct perf_sample *sample __used,
48 struct perf_evsel *evsel __used, 77 struct perf_evsel *evsel __used,
49 struct perf_session *session) 78 struct machine *machine)
50{ 79{
51 return perf_event__repipe_synth(event, session); 80 return perf_event__repipe_synth(tool, event, machine);
52} 81}
53 82
54static int perf_event__repipe_mmap(union perf_event *event, 83static int perf_event__repipe_mmap(struct perf_tool *tool,
84 union perf_event *event,
55 struct perf_sample *sample, 85 struct perf_sample *sample,
56 struct perf_session *session) 86 struct machine *machine)
57{ 87{
58 int err; 88 int err;
59 89
60 err = perf_event__process_mmap(event, sample, session); 90 err = perf_event__process_mmap(tool, event, sample, machine);
61 perf_event__repipe(event, sample, session); 91 perf_event__repipe(tool, event, sample, machine);
62 92
63 return err; 93 return err;
64} 94}
65 95
66static int perf_event__repipe_task(union perf_event *event, 96static int perf_event__repipe_task(struct perf_tool *tool,
97 union perf_event *event,
67 struct perf_sample *sample, 98 struct perf_sample *sample,
68 struct perf_session *session) 99 struct machine *machine)
69{ 100{
70 int err; 101 int err;
71 102
72 err = perf_event__process_task(event, sample, session); 103 err = perf_event__process_task(tool, event, sample, machine);
73 perf_event__repipe(event, sample, session); 104 perf_event__repipe(tool, event, sample, machine);
74 105
75 return err; 106 return err;
76} 107}
@@ -80,7 +111,7 @@ static int perf_event__repipe_tracing_data(union perf_event *event,
80{ 111{
81 int err; 112 int err;
82 113
83 perf_event__repipe_synth(event, session); 114 perf_event__repipe_synth(NULL, event, NULL);
84 err = perf_event__process_tracing_data(event, session); 115 err = perf_event__process_tracing_data(event, session);
85 116
86 return err; 117 return err;
@@ -100,10 +131,10 @@ static int dso__read_build_id(struct dso *self)
100 return -1; 131 return -1;
101} 132}
102 133
103static int dso__inject_build_id(struct dso *self, struct perf_session *session) 134static int dso__inject_build_id(struct dso *self, struct perf_tool *tool,
135 struct machine *machine)
104{ 136{
105 u16 misc = PERF_RECORD_MISC_USER; 137 u16 misc = PERF_RECORD_MISC_USER;
106 struct machine *machine;
107 int err; 138 int err;
108 139
109 if (dso__read_build_id(self) < 0) { 140 if (dso__read_build_id(self) < 0) {
@@ -111,17 +142,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
111 return -1; 142 return -1;
112 } 143 }
113 144
114 machine = perf_session__find_host_machine(session);
115 if (machine == NULL) {
116 pr_err("Can't find machine for session\n");
117 return -1;
118 }
119
120 if (self->kernel) 145 if (self->kernel)
121 misc = PERF_RECORD_MISC_KERNEL; 146 misc = PERF_RECORD_MISC_KERNEL;
122 147
123 err = perf_event__synthesize_build_id(self, misc, perf_event__repipe, 148 err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe,
124 machine, session); 149 machine);
125 if (err) { 150 if (err) {
126 pr_err("Can't synthesize build_id event for %s\n", self->long_name); 151 pr_err("Can't synthesize build_id event for %s\n", self->long_name);
127 return -1; 152 return -1;
@@ -130,10 +155,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
130 return 0; 155 return 0;
131} 156}
132 157
133static int perf_event__inject_buildid(union perf_event *event, 158static int perf_event__inject_buildid(struct perf_tool *tool,
159 union perf_event *event,
134 struct perf_sample *sample, 160 struct perf_sample *sample,
135 struct perf_evsel *evsel __used, 161 struct perf_evsel *evsel __used,
136 struct perf_session *session) 162 struct machine *machine)
137{ 163{
138 struct addr_location al; 164 struct addr_location al;
139 struct thread *thread; 165 struct thread *thread;
@@ -141,21 +167,21 @@ static int perf_event__inject_buildid(union perf_event *event,
141 167
142 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 168 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
143 169
144 thread = perf_session__findnew(session, event->ip.pid); 170 thread = machine__findnew_thread(machine, event->ip.pid);
145 if (thread == NULL) { 171 if (thread == NULL) {
146 pr_err("problem processing %d event, skipping it.\n", 172 pr_err("problem processing %d event, skipping it.\n",
147 event->header.type); 173 event->header.type);
148 goto repipe; 174 goto repipe;
149 } 175 }
150 176
151 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 177 thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
152 event->ip.pid, event->ip.ip, &al); 178 event->ip.ip, &al);
153 179
154 if (al.map != NULL) { 180 if (al.map != NULL) {
155 if (!al.map->dso->hit) { 181 if (!al.map->dso->hit) {
156 al.map->dso->hit = 1; 182 al.map->dso->hit = 1;
157 if (map__load(al.map, NULL) >= 0) { 183 if (map__load(al.map, NULL) >= 0) {
158 dso__inject_build_id(al.map->dso, session); 184 dso__inject_build_id(al.map->dso, tool, machine);
159 /* 185 /*
160 * If this fails, too bad, let the other side 186 * If this fails, too bad, let the other side
161 * account this as unresolved. 187 * account this as unresolved.
@@ -168,24 +194,24 @@ static int perf_event__inject_buildid(union perf_event *event,
168 } 194 }
169 195
170repipe: 196repipe:
171 perf_event__repipe(event, sample, session); 197 perf_event__repipe(tool, event, sample, machine);
172 return 0; 198 return 0;
173} 199}
174 200
175struct perf_event_ops inject_ops = { 201struct perf_tool perf_inject = {
176 .sample = perf_event__repipe_sample, 202 .sample = perf_event__repipe_sample,
177 .mmap = perf_event__repipe, 203 .mmap = perf_event__repipe,
178 .comm = perf_event__repipe, 204 .comm = perf_event__repipe,
179 .fork = perf_event__repipe, 205 .fork = perf_event__repipe,
180 .exit = perf_event__repipe, 206 .exit = perf_event__repipe,
181 .lost = perf_event__repipe, 207 .lost = perf_event__repipe,
182 .read = perf_event__repipe, 208 .read = perf_event__repipe_sample,
183 .throttle = perf_event__repipe, 209 .throttle = perf_event__repipe,
184 .unthrottle = perf_event__repipe, 210 .unthrottle = perf_event__repipe,
185 .attr = perf_event__repipe_synth, 211 .attr = perf_event__repipe_attr,
186 .event_type = perf_event__repipe_synth, 212 .event_type = perf_event__repipe_event_type_synth,
187 .tracing_data = perf_event__repipe_synth, 213 .tracing_data = perf_event__repipe_tracing_data_synth,
188 .build_id = perf_event__repipe_synth, 214 .build_id = perf_event__repipe_op2_synth,
189}; 215};
190 216
191extern volatile int session_done; 217extern volatile int session_done;
@@ -203,17 +229,17 @@ static int __cmd_inject(void)
203 signal(SIGINT, sig_handler); 229 signal(SIGINT, sig_handler);
204 230
205 if (inject_build_ids) { 231 if (inject_build_ids) {
206 inject_ops.sample = perf_event__inject_buildid; 232 perf_inject.sample = perf_event__inject_buildid;
207 inject_ops.mmap = perf_event__repipe_mmap; 233 perf_inject.mmap = perf_event__repipe_mmap;
208 inject_ops.fork = perf_event__repipe_task; 234 perf_inject.fork = perf_event__repipe_task;
209 inject_ops.tracing_data = perf_event__repipe_tracing_data; 235 perf_inject.tracing_data = perf_event__repipe_tracing_data;
210 } 236 }
211 237
212 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); 238 session = perf_session__new(input_name, O_RDONLY, false, true, &perf_inject);
213 if (session == NULL) 239 if (session == NULL)
214 return -ENOMEM; 240 return -ENOMEM;
215 241
216 ret = perf_session__process_events(session, &inject_ops); 242 ret = perf_session__process_events(session, &perf_inject);
217 243
218 perf_session__delete(session); 244 perf_session__delete(session);
219 245
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 225e963df105..fe1ad8f21961 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -7,6 +7,7 @@
7#include "util/thread.h" 7#include "util/thread.h"
8#include "util/header.h" 8#include "util/header.h"
9#include "util/session.h" 9#include "util/session.h"
10#include "util/tool.h"
10 11
11#include "util/parse-options.h" 12#include "util/parse-options.h"
12#include "util/trace-event.h" 13#include "util/trace-event.h"
@@ -18,7 +19,7 @@
18struct alloc_stat; 19struct alloc_stat;
19typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 20typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
20 21
21static char const *input_name = "perf.data"; 22static const char *input_name;
22 23
23static int alloc_flag; 24static int alloc_flag;
24static int caller_flag; 25static int caller_flag;
@@ -303,12 +304,13 @@ static void process_raw_event(union perf_event *raw_event __used, void *data,
303 } 304 }
304} 305}
305 306
306static int process_sample_event(union perf_event *event, 307static int process_sample_event(struct perf_tool *tool __used,
308 union perf_event *event,
307 struct perf_sample *sample, 309 struct perf_sample *sample,
308 struct perf_evsel *evsel __used, 310 struct perf_evsel *evsel __used,
309 struct perf_session *session) 311 struct machine *machine)
310{ 312{
311 struct thread *thread = perf_session__findnew(session, event->ip.pid); 313 struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
312 314
313 if (thread == NULL) { 315 if (thread == NULL) {
314 pr_debug("problem processing %d event, skipping it.\n", 316 pr_debug("problem processing %d event, skipping it.\n",
@@ -324,7 +326,7 @@ static int process_sample_event(union perf_event *event,
324 return 0; 326 return 0;
325} 327}
326 328
327static struct perf_event_ops event_ops = { 329static struct perf_tool perf_kmem = {
328 .sample = process_sample_event, 330 .sample = process_sample_event,
329 .comm = perf_event__process_comm, 331 .comm = perf_event__process_comm,
330 .ordered_samples = true, 332 .ordered_samples = true,
@@ -483,7 +485,7 @@ static int __cmd_kmem(void)
483{ 485{
484 int err = -EINVAL; 486 int err = -EINVAL;
485 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 487 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
486 0, false, &event_ops); 488 0, false, &perf_kmem);
487 if (session == NULL) 489 if (session == NULL)
488 return -ENOMEM; 490 return -ENOMEM;
489 491
@@ -494,7 +496,7 @@ static int __cmd_kmem(void)
494 goto out_delete; 496 goto out_delete;
495 497
496 setup_pager(); 498 setup_pager();
497 err = perf_session__process_events(session, &event_ops); 499 err = perf_session__process_events(session, &perf_kmem);
498 if (err != 0) 500 if (err != 0)
499 goto out_delete; 501 goto out_delete;
500 sort_result(); 502 sort_result();
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 34d1e853829d..032324a76b87 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -38,7 +38,7 @@ static const struct option kvm_options[] = {
38 OPT_BOOLEAN(0, "guest", &perf_guest, 38 OPT_BOOLEAN(0, "guest", &perf_guest,
39 "Collect guest os data"), 39 "Collect guest os data"),
40 OPT_BOOLEAN(0, "host", &perf_host, 40 OPT_BOOLEAN(0, "host", &perf_host,
41 "Collect guest os data"), 41 "Collect host os data"),
42 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", 42 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
43 "guest mount directory under which every guest os" 43 "guest mount directory under which every guest os"
44 " instance has a subdir"), 44 " instance has a subdir"),
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 899080ace267..2296c391d0f5 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -12,6 +12,7 @@
12 12
13#include "util/debug.h" 13#include "util/debug.h"
14#include "util/session.h" 14#include "util/session.h"
15#include "util/tool.h"
15 16
16#include <sys/types.h> 17#include <sys/types.h>
17#include <sys/prctl.h> 18#include <sys/prctl.h>
@@ -325,7 +326,7 @@ alloc_failed:
325 die("memory allocation failed\n"); 326 die("memory allocation failed\n");
326} 327}
327 328
328static char const *input_name = "perf.data"; 329static const char *input_name;
329 330
330struct raw_event_sample { 331struct raw_event_sample {
331 u32 size; 332 u32 size;
@@ -845,12 +846,13 @@ static void dump_info(void)
845 die("Unknown type of information\n"); 846 die("Unknown type of information\n");
846} 847}
847 848
848static int process_sample_event(union perf_event *event, 849static int process_sample_event(struct perf_tool *tool __used,
850 union perf_event *event,
849 struct perf_sample *sample, 851 struct perf_sample *sample,
850 struct perf_evsel *evsel __used, 852 struct perf_evsel *evsel __used,
851 struct perf_session *s) 853 struct machine *machine)
852{ 854{
853 struct thread *thread = perf_session__findnew(s, sample->tid); 855 struct thread *thread = machine__findnew_thread(machine, sample->tid);
854 856
855 if (thread == NULL) { 857 if (thread == NULL) {
856 pr_debug("problem processing %d event, skipping it.\n", 858 pr_debug("problem processing %d event, skipping it.\n",
@@ -863,7 +865,7 @@ static int process_sample_event(union perf_event *event,
863 return 0; 865 return 0;
864} 866}
865 867
866static struct perf_event_ops eops = { 868static struct perf_tool eops = {
867 .sample = process_sample_event, 869 .sample = process_sample_event,
868 .comm = perf_event__process_comm, 870 .comm = perf_event__process_comm,
869 .ordered_samples = true, 871 .ordered_samples = true,
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 710ae3d0a489..59d43abfbfec 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -46,7 +46,6 @@
46 46
47#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" 47#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
48#define DEFAULT_FUNC_FILTER "!_*" 48#define DEFAULT_FUNC_FILTER "!_*"
49#define MAX_PATH_LEN 256
50 49
51/* Session management structure */ 50/* Session management structure */
52static struct { 51static struct {
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6ab58cc99d53..0abfb18b911f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
22#include "util/evsel.h" 22#include "util/evsel.h"
23#include "util/debug.h" 23#include "util/debug.h"
24#include "util/session.h" 24#include "util/session.h"
25#include "util/tool.h"
25#include "util/symbol.h" 26#include "util/symbol.h"
26#include "util/cpumap.h" 27#include "util/cpumap.h"
27#include "util/thread_map.h" 28#include "util/thread_map.h"
@@ -35,55 +36,36 @@ enum write_mode_t {
35 WRITE_APPEND 36 WRITE_APPEND
36}; 37};
37 38
38static u64 user_interval = ULLONG_MAX; 39struct perf_record {
39static u64 default_interval = 0; 40 struct perf_tool tool;
40 41 struct perf_record_opts opts;
41static unsigned int page_size; 42 u64 bytes_written;
42static unsigned int mmap_pages = UINT_MAX; 43 const char *output_name;
43static unsigned int user_freq = UINT_MAX; 44 struct perf_evlist *evlist;
44static int freq = 1000; 45 struct perf_session *session;
45static int output; 46 const char *progname;
46static int pipe_output = 0; 47 int output;
47static const char *output_name = NULL; 48 unsigned int page_size;
48static bool group = false; 49 int realtime_prio;
49static int realtime_prio = 0; 50 enum write_mode_t write_mode;
50static bool nodelay = false; 51 bool no_buildid;
51static bool raw_samples = false; 52 bool no_buildid_cache;
52static bool sample_id_all_avail = true; 53 bool force;
53static bool system_wide = false; 54 bool file_new;
54static pid_t target_pid = -1; 55 bool append_file;
55static pid_t target_tid = -1; 56 long samples;
56static pid_t child_pid = -1; 57 off_t post_processing_offset;
57static bool no_inherit = false; 58};
58static enum write_mode_t write_mode = WRITE_FORCE; 59
59static bool call_graph = false; 60static void advance_output(struct perf_record *rec, size_t size)
60static bool inherit_stat = false;
61static bool no_samples = false;
62static bool sample_address = false;
63static bool sample_time = false;
64static bool no_buildid = false;
65static bool no_buildid_cache = false;
66static struct perf_evlist *evsel_list;
67
68static long samples = 0;
69static u64 bytes_written = 0;
70
71static int file_new = 1;
72static off_t post_processing_offset;
73
74static struct perf_session *session;
75static const char *cpu_list;
76static const char *progname;
77
78static void advance_output(size_t size)
79{ 61{
80 bytes_written += size; 62 rec->bytes_written += size;
81} 63}
82 64
83static void write_output(void *buf, size_t size) 65static void write_output(struct perf_record *rec, void *buf, size_t size)
84{ 66{
85 while (size) { 67 while (size) {
86 int ret = write(output, buf, size); 68 int ret = write(rec->output, buf, size);
87 69
88 if (ret < 0) 70 if (ret < 0)
89 die("failed to write"); 71 die("failed to write");
@@ -91,30 +73,33 @@ static void write_output(void *buf, size_t size)
91 size -= ret; 73 size -= ret;
92 buf += ret; 74 buf += ret;
93 75
94 bytes_written += ret; 76 rec->bytes_written += ret;
95 } 77 }
96} 78}
97 79
98static int process_synthesized_event(union perf_event *event, 80static int process_synthesized_event(struct perf_tool *tool,
81 union perf_event *event,
99 struct perf_sample *sample __used, 82 struct perf_sample *sample __used,
100 struct perf_session *self __used) 83 struct machine *machine __used)
101{ 84{
102 write_output(event, event->header.size); 85 struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 write_output(rec, event, event->header.size);
103 return 0; 87 return 0;
104} 88}
105 89
106static void mmap_read(struct perf_mmap *md) 90static void perf_record__mmap_read(struct perf_record *rec,
91 struct perf_mmap *md)
107{ 92{
108 unsigned int head = perf_mmap__read_head(md); 93 unsigned int head = perf_mmap__read_head(md);
109 unsigned int old = md->prev; 94 unsigned int old = md->prev;
110 unsigned char *data = md->base + page_size; 95 unsigned char *data = md->base + rec->page_size;
111 unsigned long size; 96 unsigned long size;
112 void *buf; 97 void *buf;
113 98
114 if (old == head) 99 if (old == head)
115 return; 100 return;
116 101
117 samples++; 102 rec->samples++;
118 103
119 size = head - old; 104 size = head - old;
120 105
@@ -123,14 +108,14 @@ static void mmap_read(struct perf_mmap *md)
123 size = md->mask + 1 - (old & md->mask); 108 size = md->mask + 1 - (old & md->mask);
124 old += size; 109 old += size;
125 110
126 write_output(buf, size); 111 write_output(rec, buf, size);
127 } 112 }
128 113
129 buf = &data[old & md->mask]; 114 buf = &data[old & md->mask];
130 size = head - old; 115 size = head - old;
131 old += size; 116 old += size;
132 117
133 write_output(buf, size); 118 write_output(rec, buf, size);
134 119
135 md->prev = old; 120 md->prev = old;
136 perf_mmap__write_tail(md, old); 121 perf_mmap__write_tail(md, old);
@@ -149,17 +134,18 @@ static void sig_handler(int sig)
149 signr = sig; 134 signr = sig;
150} 135}
151 136
152static void sig_atexit(void) 137static void perf_record__sig_exit(int exit_status __used, void *arg)
153{ 138{
139 struct perf_record *rec = arg;
154 int status; 140 int status;
155 141
156 if (child_pid > 0) { 142 if (rec->evlist->workload.pid > 0) {
157 if (!child_finished) 143 if (!child_finished)
158 kill(child_pid, SIGTERM); 144 kill(rec->evlist->workload.pid, SIGTERM);
159 145
160 wait(&status); 146 wait(&status);
161 if (WIFSIGNALED(status)) 147 if (WIFSIGNALED(status))
162 psignal(WTERMSIG(status), progname); 148 psignal(WTERMSIG(status), rec->progname);
163 } 149 }
164 150
165 if (signr == -1 || signr == SIGUSR1) 151 if (signr == -1 || signr == SIGUSR1)
@@ -169,78 +155,6 @@ static void sig_atexit(void)
169 kill(getpid(), signr); 155 kill(getpid(), signr);
170} 156}
171 157
172static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
173{
174 struct perf_event_attr *attr = &evsel->attr;
175 int track = !evsel->idx; /* only the first counter needs these */
176
177 attr->disabled = 1;
178 attr->inherit = !no_inherit;
179 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
180 PERF_FORMAT_TOTAL_TIME_RUNNING |
181 PERF_FORMAT_ID;
182
183 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184
185 if (evlist->nr_entries > 1)
186 attr->sample_type |= PERF_SAMPLE_ID;
187
188 /*
189 * We default some events to a 1 default interval. But keep
190 * it a weak assumption overridable by the user.
191 */
192 if (!attr->sample_period || (user_freq != UINT_MAX &&
193 user_interval != ULLONG_MAX)) {
194 if (freq) {
195 attr->sample_type |= PERF_SAMPLE_PERIOD;
196 attr->freq = 1;
197 attr->sample_freq = freq;
198 } else {
199 attr->sample_period = default_interval;
200 }
201 }
202
203 if (no_samples)
204 attr->sample_freq = 0;
205
206 if (inherit_stat)
207 attr->inherit_stat = 1;
208
209 if (sample_address) {
210 attr->sample_type |= PERF_SAMPLE_ADDR;
211 attr->mmap_data = track;
212 }
213
214 if (call_graph)
215 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
216
217 if (system_wide)
218 attr->sample_type |= PERF_SAMPLE_CPU;
219
220 if (sample_id_all_avail &&
221 (sample_time || system_wide || !no_inherit || cpu_list))
222 attr->sample_type |= PERF_SAMPLE_TIME;
223
224 if (raw_samples) {
225 attr->sample_type |= PERF_SAMPLE_TIME;
226 attr->sample_type |= PERF_SAMPLE_RAW;
227 attr->sample_type |= PERF_SAMPLE_CPU;
228 }
229
230 if (nodelay) {
231 attr->watermark = 0;
232 attr->wakeup_events = 1;
233 }
234
235 attr->mmap = track;
236 attr->comm = track;
237
238 if (target_pid == -1 && target_tid == -1 && !system_wide) {
239 attr->disabled = 1;
240 attr->enable_on_exec = 1;
241 }
242}
243
244static bool perf_evlist__equal(struct perf_evlist *evlist, 158static bool perf_evlist__equal(struct perf_evlist *evlist,
245 struct perf_evlist *other) 159 struct perf_evlist *other)
246{ 160{
@@ -260,15 +174,17 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
260 return true; 174 return true;
261} 175}
262 176
263static void open_counters(struct perf_evlist *evlist) 177static void perf_record__open(struct perf_record *rec)
264{ 178{
265 struct perf_evsel *pos, *first; 179 struct perf_evsel *pos, *first;
266 180 struct perf_evlist *evlist = rec->evlist;
267 if (evlist->cpus->map[0] < 0) 181 struct perf_session *session = rec->session;
268 no_inherit = true; 182 struct perf_record_opts *opts = &rec->opts;
269 183
270 first = list_entry(evlist->entries.next, struct perf_evsel, node); 184 first = list_entry(evlist->entries.next, struct perf_evsel, node);
271 185
186 perf_evlist__config_attrs(evlist, opts);
187
272 list_for_each_entry(pos, &evlist->entries, node) { 188 list_for_each_entry(pos, &evlist->entries, node) {
273 struct perf_event_attr *attr = &pos->attr; 189 struct perf_event_attr *attr = &pos->attr;
274 struct xyarray *group_fd = NULL; 190 struct xyarray *group_fd = NULL;
@@ -286,29 +202,27 @@ static void open_counters(struct perf_evlist *evlist)
286 */ 202 */
287 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; 203 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
288 204
289 if (group && pos != first) 205 if (opts->group && pos != first)
290 group_fd = first->fd; 206 group_fd = first->fd;
291
292 config_attr(pos, evlist);
293retry_sample_id: 207retry_sample_id:
294 attr->sample_id_all = sample_id_all_avail ? 1 : 0; 208 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
295try_again: 209try_again:
296 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group, 210 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
297 group_fd) < 0) { 211 opts->group, group_fd) < 0) {
298 int err = errno; 212 int err = errno;
299 213
300 if (err == EPERM || err == EACCES) { 214 if (err == EPERM || err == EACCES) {
301 ui__error_paranoid(); 215 ui__error_paranoid();
302 exit(EXIT_FAILURE); 216 exit(EXIT_FAILURE);
303 } else if (err == ENODEV && cpu_list) { 217 } else if (err == ENODEV && opts->cpu_list) {
304 die("No such device - did you specify" 218 die("No such device - did you specify"
305 " an out-of-range profile CPU?\n"); 219 " an out-of-range profile CPU?\n");
306 } else if (err == EINVAL && sample_id_all_avail) { 220 } else if (err == EINVAL && opts->sample_id_all_avail) {
307 /* 221 /*
308 * Old kernel, no attr->sample_id_type_all field 222 * Old kernel, no attr->sample_id_type_all field
309 */ 223 */
310 sample_id_all_avail = false; 224 opts->sample_id_all_avail = false;
311 if (!sample_time && !raw_samples && !time_needed) 225 if (!opts->sample_time && !opts->raw_samples && !time_needed)
312 attr->sample_type &= ~PERF_SAMPLE_TIME; 226 attr->sample_type &= ~PERF_SAMPLE_TIME;
313 227
314 goto retry_sample_id; 228 goto retry_sample_id;
@@ -358,10 +272,20 @@ try_again:
358 exit(-1); 272 exit(-1);
359 } 273 }
360 274
361 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) 275 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
276 if (errno == EPERM)
277 die("Permission error mapping pages.\n"
278 "Consider increasing "
279 "/proc/sys/kernel/perf_event_mlock_kb,\n"
280 "or try again with a smaller value of -m/--mmap_pages.\n"
281 "(current value: %d)\n", opts->mmap_pages);
282 else if (!is_power_of_2(opts->mmap_pages))
283 die("--mmap_pages/-m value must be a power of two.");
284
362 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 285 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
286 }
363 287
364 if (file_new) 288 if (rec->file_new)
365 session->evlist = evlist; 289 session->evlist = evlist;
366 else { 290 else {
367 if (!perf_evlist__equal(session->evlist, evlist)) { 291 if (!perf_evlist__equal(session->evlist, evlist)) {
@@ -373,29 +297,32 @@ try_again:
373 perf_session__update_sample_type(session); 297 perf_session__update_sample_type(session);
374} 298}
375 299
376static int process_buildids(void) 300static int process_buildids(struct perf_record *rec)
377{ 301{
378 u64 size = lseek(output, 0, SEEK_CUR); 302 u64 size = lseek(rec->output, 0, SEEK_CUR);
379 303
380 if (size == 0) 304 if (size == 0)
381 return 0; 305 return 0;
382 306
383 session->fd = output; 307 rec->session->fd = rec->output;
384 return __perf_session__process_events(session, post_processing_offset, 308 return __perf_session__process_events(rec->session, rec->post_processing_offset,
385 size - post_processing_offset, 309 size - rec->post_processing_offset,
386 size, &build_id__mark_dso_hit_ops); 310 size, &build_id__mark_dso_hit_ops);
387} 311}
388 312
389static void atexit_header(void) 313static void perf_record__exit(int status __used, void *arg)
390{ 314{
391 if (!pipe_output) { 315 struct perf_record *rec = arg;
392 session->header.data_size += bytes_written; 316
393 317 if (!rec->opts.pipe_output) {
394 if (!no_buildid) 318 rec->session->header.data_size += rec->bytes_written;
395 process_buildids(); 319
396 perf_session__write_header(session, evsel_list, output, true); 320 if (!rec->no_buildid)
397 perf_session__delete(session); 321 process_buildids(rec);
398 perf_evlist__delete(evsel_list); 322 perf_session__write_header(rec->session, rec->evlist,
323 rec->output, true);
324 perf_session__delete(rec->session);
325 perf_evlist__delete(rec->evlist);
399 symbol__exit(); 326 symbol__exit();
400 } 327 }
401} 328}
@@ -403,7 +330,7 @@ static void atexit_header(void)
403static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 330static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
404{ 331{
405 int err; 332 int err;
406 struct perf_session *psession = data; 333 struct perf_tool *tool = data;
407 334
408 if (machine__is_host(machine)) 335 if (machine__is_host(machine))
409 return; 336 return;
@@ -416,8 +343,8 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
416 *method is used to avoid symbol missing when the first addr is 343 *method is used to avoid symbol missing when the first addr is
417 *in module instead of in guest kernel. 344 *in module instead of in guest kernel.
418 */ 345 */
419 err = perf_event__synthesize_modules(process_synthesized_event, 346 err = perf_event__synthesize_modules(tool, process_synthesized_event,
420 psession, machine); 347 machine);
421 if (err < 0) 348 if (err < 0)
422 pr_err("Couldn't record guest kernel [%d]'s reference" 349 pr_err("Couldn't record guest kernel [%d]'s reference"
423 " relocation symbol.\n", machine->pid); 350 " relocation symbol.\n", machine->pid);
@@ -426,12 +353,11 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
426 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 353 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
427 * have no _text sometimes. 354 * have no _text sometimes.
428 */ 355 */
429 err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 356 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
430 psession, machine, "_text"); 357 machine, "_text");
431 if (err < 0) 358 if (err < 0)
432 err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 359 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
433 psession, machine, 360 machine, "_stext");
434 "_stext");
435 if (err < 0) 361 if (err < 0)
436 pr_err("Couldn't record guest kernel [%d]'s reference" 362 pr_err("Couldn't record guest kernel [%d]'s reference"
437 " relocation symbol.\n", machine->pid); 363 " relocation symbol.\n", machine->pid);
@@ -442,73 +368,71 @@ static struct perf_event_header finished_round_event = {
442 .type = PERF_RECORD_FINISHED_ROUND, 368 .type = PERF_RECORD_FINISHED_ROUND,
443}; 369};
444 370
445static void mmap_read_all(void) 371static void perf_record__mmap_read_all(struct perf_record *rec)
446{ 372{
447 int i; 373 int i;
448 374
449 for (i = 0; i < evsel_list->nr_mmaps; i++) { 375 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
450 if (evsel_list->mmap[i].base) 376 if (rec->evlist->mmap[i].base)
451 mmap_read(&evsel_list->mmap[i]); 377 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
452 } 378 }
453 379
454 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) 380 if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
455 write_output(&finished_round_event, sizeof(finished_round_event)); 381 write_output(rec, &finished_round_event, sizeof(finished_round_event));
456} 382}
457 383
458static int __cmd_record(int argc, const char **argv) 384static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
459{ 385{
460 struct stat st; 386 struct stat st;
461 int flags; 387 int flags;
462 int err; 388 int err, output;
463 unsigned long waking = 0; 389 unsigned long waking = 0;
464 int child_ready_pipe[2], go_pipe[2];
465 const bool forks = argc > 0; 390 const bool forks = argc > 0;
466 char buf;
467 struct machine *machine; 391 struct machine *machine;
392 struct perf_tool *tool = &rec->tool;
393 struct perf_record_opts *opts = &rec->opts;
394 struct perf_evlist *evsel_list = rec->evlist;
395 const char *output_name = rec->output_name;
396 struct perf_session *session;
468 397
469 progname = argv[0]; 398 rec->progname = argv[0];
470 399
471 page_size = sysconf(_SC_PAGE_SIZE); 400 rec->page_size = sysconf(_SC_PAGE_SIZE);
472 401
473 atexit(sig_atexit); 402 on_exit(perf_record__sig_exit, rec);
474 signal(SIGCHLD, sig_handler); 403 signal(SIGCHLD, sig_handler);
475 signal(SIGINT, sig_handler); 404 signal(SIGINT, sig_handler);
476 signal(SIGUSR1, sig_handler); 405 signal(SIGUSR1, sig_handler);
477 406
478 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
479 perror("failed to create pipes");
480 exit(-1);
481 }
482
483 if (!output_name) { 407 if (!output_name) {
484 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 408 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
485 pipe_output = 1; 409 opts->pipe_output = true;
486 else 410 else
487 output_name = "perf.data"; 411 rec->output_name = output_name = "perf.data";
488 } 412 }
489 if (output_name) { 413 if (output_name) {
490 if (!strcmp(output_name, "-")) 414 if (!strcmp(output_name, "-"))
491 pipe_output = 1; 415 opts->pipe_output = true;
492 else if (!stat(output_name, &st) && st.st_size) { 416 else if (!stat(output_name, &st) && st.st_size) {
493 if (write_mode == WRITE_FORCE) { 417 if (rec->write_mode == WRITE_FORCE) {
494 char oldname[PATH_MAX]; 418 char oldname[PATH_MAX];
495 snprintf(oldname, sizeof(oldname), "%s.old", 419 snprintf(oldname, sizeof(oldname), "%s.old",
496 output_name); 420 output_name);
497 unlink(oldname); 421 unlink(oldname);
498 rename(output_name, oldname); 422 rename(output_name, oldname);
499 } 423 }
500 } else if (write_mode == WRITE_APPEND) { 424 } else if (rec->write_mode == WRITE_APPEND) {
501 write_mode = WRITE_FORCE; 425 rec->write_mode = WRITE_FORCE;
502 } 426 }
503 } 427 }
504 428
505 flags = O_CREAT|O_RDWR; 429 flags = O_CREAT|O_RDWR;
506 if (write_mode == WRITE_APPEND) 430 if (rec->write_mode == WRITE_APPEND)
507 file_new = 0; 431 rec->file_new = 0;
508 else 432 else
509 flags |= O_TRUNC; 433 flags |= O_TRUNC;
510 434
511 if (pipe_output) 435 if (opts->pipe_output)
512 output = STDOUT_FILENO; 436 output = STDOUT_FILENO;
513 else 437 else
514 output = open(output_name, flags, S_IRUSR | S_IWUSR); 438 output = open(output_name, flags, S_IRUSR | S_IWUSR);
@@ -517,17 +441,21 @@ static int __cmd_record(int argc, const char **argv)
517 exit(-1); 441 exit(-1);
518 } 442 }
519 443
444 rec->output = output;
445
520 session = perf_session__new(output_name, O_WRONLY, 446 session = perf_session__new(output_name, O_WRONLY,
521 write_mode == WRITE_FORCE, false, NULL); 447 rec->write_mode == WRITE_FORCE, false, NULL);
522 if (session == NULL) { 448 if (session == NULL) {
523 pr_err("Not enough memory for reading perf file header\n"); 449 pr_err("Not enough memory for reading perf file header\n");
524 return -1; 450 return -1;
525 } 451 }
526 452
527 if (!no_buildid) 453 rec->session = session;
454
455 if (!rec->no_buildid)
528 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 456 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
529 457
530 if (!file_new) { 458 if (!rec->file_new) {
531 err = perf_session__read_header(session, output); 459 err = perf_session__read_header(session, output);
532 if (err < 0) 460 if (err < 0)
533 goto out_delete_session; 461 goto out_delete_session;
@@ -549,94 +477,57 @@ static int __cmd_record(int argc, const char **argv)
549 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); 477 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
550 perf_header__set_feat(&session->header, HEADER_CPUID); 478 perf_header__set_feat(&session->header, HEADER_CPUID);
551 479
552 /* 512 kiB: default amount of unprivileged mlocked memory */
553 if (mmap_pages == UINT_MAX)
554 mmap_pages = (512 * 1024) / page_size;
555
556 if (forks) { 480 if (forks) {
557 child_pid = fork(); 481 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
558 if (child_pid < 0) { 482 if (err < 0) {
559 perror("failed to fork"); 483 pr_err("Couldn't run the workload!\n");
560 exit(-1); 484 goto out_delete_session;
561 }
562
563 if (!child_pid) {
564 if (pipe_output)
565 dup2(2, 1);
566 close(child_ready_pipe[0]);
567 close(go_pipe[1]);
568 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
569
570 /*
571 * Do a dummy execvp to get the PLT entry resolved,
572 * so we avoid the resolver overhead on the real
573 * execvp call.
574 */
575 execvp("", (char **)argv);
576
577 /*
578 * Tell the parent we're ready to go
579 */
580 close(child_ready_pipe[1]);
581
582 /*
583 * Wait until the parent tells us to go.
584 */
585 if (read(go_pipe[0], &buf, 1) == -1)
586 perror("unable to read pipe");
587
588 execvp(argv[0], (char **)argv);
589
590 perror(argv[0]);
591 kill(getppid(), SIGUSR1);
592 exit(-1);
593 }
594
595 if (!system_wide && target_tid == -1 && target_pid == -1)
596 evsel_list->threads->map[0] = child_pid;
597
598 close(child_ready_pipe[1]);
599 close(go_pipe[0]);
600 /*
601 * wait for child to settle
602 */
603 if (read(child_ready_pipe[0], &buf, 1) == -1) {
604 perror("unable to read pipe");
605 exit(-1);
606 } 485 }
607 close(child_ready_pipe[0]);
608 } 486 }
609 487
610 open_counters(evsel_list); 488 perf_record__open(rec);
611 489
612 /* 490 /*
613 * perf_session__delete(session) will be called at atexit_header() 491 * perf_session__delete(session) will be called at perf_record__exit()
614 */ 492 */
615 atexit(atexit_header); 493 on_exit(perf_record__exit, rec);
616 494
617 if (pipe_output) { 495 if (opts->pipe_output) {
618 err = perf_header__write_pipe(output); 496 err = perf_header__write_pipe(output);
619 if (err < 0) 497 if (err < 0)
620 return err; 498 return err;
621 } else if (file_new) { 499 } else if (rec->file_new) {
622 err = perf_session__write_header(session, evsel_list, 500 err = perf_session__write_header(session, evsel_list,
623 output, false); 501 output, false);
624 if (err < 0) 502 if (err < 0)
625 return err; 503 return err;
626 } 504 }
627 505
628 post_processing_offset = lseek(output, 0, SEEK_CUR); 506 if (!!rec->no_buildid
507 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
508 pr_err("Couldn't generating buildids. "
509 "Use --no-buildid to profile anyway.\n");
510 return -1;
511 }
629 512
630 if (pipe_output) { 513 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
631 err = perf_session__synthesize_attrs(session, 514
632 process_synthesized_event); 515 machine = perf_session__find_host_machine(session);
516 if (!machine) {
517 pr_err("Couldn't find native kernel information.\n");
518 return -1;
519 }
520
521 if (opts->pipe_output) {
522 err = perf_event__synthesize_attrs(tool, session,
523 process_synthesized_event);
633 if (err < 0) { 524 if (err < 0) {
634 pr_err("Couldn't synthesize attrs.\n"); 525 pr_err("Couldn't synthesize attrs.\n");
635 return err; 526 return err;
636 } 527 }
637 528
638 err = perf_event__synthesize_event_types(process_synthesized_event, 529 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
639 session); 530 machine);
640 if (err < 0) { 531 if (err < 0) {
641 pr_err("Couldn't synthesize event_types.\n"); 532 pr_err("Couldn't synthesize event_types.\n");
642 return err; 533 return err;
@@ -651,56 +542,49 @@ static int __cmd_record(int argc, const char **argv)
651 * return this more properly and also 542 * return this more properly and also
652 * propagate errors that now are calling die() 543 * propagate errors that now are calling die()
653 */ 544 */
654 err = perf_event__synthesize_tracing_data(output, evsel_list, 545 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
655 process_synthesized_event, 546 process_synthesized_event);
656 session);
657 if (err <= 0) { 547 if (err <= 0) {
658 pr_err("Couldn't record tracing data.\n"); 548 pr_err("Couldn't record tracing data.\n");
659 return err; 549 return err;
660 } 550 }
661 advance_output(err); 551 advance_output(rec, err);
662 } 552 }
663 } 553 }
664 554
665 machine = perf_session__find_host_machine(session); 555 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
666 if (!machine) { 556 machine, "_text");
667 pr_err("Couldn't find native kernel information.\n");
668 return -1;
669 }
670
671 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
672 session, machine, "_text");
673 if (err < 0) 557 if (err < 0)
674 err = perf_event__synthesize_kernel_mmap(process_synthesized_event, 558 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
675 session, machine, "_stext"); 559 machine, "_stext");
676 if (err < 0) 560 if (err < 0)
677 pr_err("Couldn't record kernel reference relocation symbol\n" 561 pr_err("Couldn't record kernel reference relocation symbol\n"
678 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 562 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
679 "Check /proc/kallsyms permission or run as root.\n"); 563 "Check /proc/kallsyms permission or run as root.\n");
680 564
681 err = perf_event__synthesize_modules(process_synthesized_event, 565 err = perf_event__synthesize_modules(tool, process_synthesized_event,
682 session, machine); 566 machine);
683 if (err < 0) 567 if (err < 0)
684 pr_err("Couldn't record kernel module information.\n" 568 pr_err("Couldn't record kernel module information.\n"
685 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 569 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
686 "Check /proc/modules permission or run as root.\n"); 570 "Check /proc/modules permission or run as root.\n");
687 571
688 if (perf_guest) 572 if (perf_guest)
689 perf_session__process_machines(session, 573 perf_session__process_machines(session, tool,
690 perf_event__synthesize_guest_os); 574 perf_event__synthesize_guest_os);
691 575
692 if (!system_wide) 576 if (!opts->system_wide)
693 perf_event__synthesize_thread_map(evsel_list->threads, 577 perf_event__synthesize_thread_map(tool, evsel_list->threads,
694 process_synthesized_event, 578 process_synthesized_event,
695 session); 579 machine);
696 else 580 else
697 perf_event__synthesize_threads(process_synthesized_event, 581 perf_event__synthesize_threads(tool, process_synthesized_event,
698 session); 582 machine);
699 583
700 if (realtime_prio) { 584 if (rec->realtime_prio) {
701 struct sched_param param; 585 struct sched_param param;
702 586
703 param.sched_priority = realtime_prio; 587 param.sched_priority = rec->realtime_prio;
704 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 588 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
705 pr_err("Could not set realtime priority.\n"); 589 pr_err("Could not set realtime priority.\n");
706 exit(-1); 590 exit(-1);
@@ -713,14 +597,14 @@ static int __cmd_record(int argc, const char **argv)
713 * Let the child rip 597 * Let the child rip
714 */ 598 */
715 if (forks) 599 if (forks)
716 close(go_pipe[1]); 600 perf_evlist__start_workload(evsel_list);
717 601
718 for (;;) { 602 for (;;) {
719 int hits = samples; 603 int hits = rec->samples;
720 604
721 mmap_read_all(); 605 perf_record__mmap_read_all(rec);
722 606
723 if (hits == samples) { 607 if (hits == rec->samples) {
724 if (done) 608 if (done)
725 break; 609 break;
726 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); 610 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
@@ -741,9 +625,9 @@ static int __cmd_record(int argc, const char **argv)
741 */ 625 */
742 fprintf(stderr, 626 fprintf(stderr,
743 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", 627 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
744 (double)bytes_written / 1024.0 / 1024.0, 628 (double)rec->bytes_written / 1024.0 / 1024.0,
745 output_name, 629 output_name,
746 bytes_written / 24); 630 rec->bytes_written / 24);
747 631
748 return 0; 632 return 0;
749 633
@@ -758,58 +642,89 @@ static const char * const record_usage[] = {
758 NULL 642 NULL
759}; 643};
760 644
761static bool force, append_file; 645/*
646 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
647 * because we need to have access to it in perf_record__exit, that is called
648 * after cmd_record() exits, but since record_options need to be accessible to
649 * builtin-script, leave it here.
650 *
651 * At least we don't ouch it in all the other functions here directly.
652 *
653 * Just say no to tons of global variables, sigh.
654 */
655static struct perf_record record = {
656 .opts = {
657 .target_pid = -1,
658 .target_tid = -1,
659 .mmap_pages = UINT_MAX,
660 .user_freq = UINT_MAX,
661 .user_interval = ULLONG_MAX,
662 .freq = 1000,
663 .sample_id_all_avail = true,
664 },
665 .write_mode = WRITE_FORCE,
666 .file_new = true,
667};
762 668
669/*
670 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
671 * with it and switch to use the library functions in perf_evlist that came
672 * from builtin-record.c, i.e. use perf_record_opts,
673 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
674 * using pipes, etc.
675 */
763const struct option record_options[] = { 676const struct option record_options[] = {
764 OPT_CALLBACK('e', "event", &evsel_list, "event", 677 OPT_CALLBACK('e', "event", &record.evlist, "event",
765 "event selector. use 'perf list' to list available events", 678 "event selector. use 'perf list' to list available events",
766 parse_events_option), 679 parse_events_option),
767 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 680 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
768 "event filter", parse_filter), 681 "event filter", parse_filter),
769 OPT_INTEGER('p', "pid", &target_pid, 682 OPT_INTEGER('p', "pid", &record.opts.target_pid,
770 "record events on existing process id"), 683 "record events on existing process id"),
771 OPT_INTEGER('t', "tid", &target_tid, 684 OPT_INTEGER('t', "tid", &record.opts.target_tid,
772 "record events on existing thread id"), 685 "record events on existing thread id"),
773 OPT_INTEGER('r', "realtime", &realtime_prio, 686 OPT_INTEGER('r', "realtime", &record.realtime_prio,
774 "collect data with this RT SCHED_FIFO priority"), 687 "collect data with this RT SCHED_FIFO priority"),
775 OPT_BOOLEAN('D', "no-delay", &nodelay, 688 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
776 "collect data without buffering"), 689 "collect data without buffering"),
777 OPT_BOOLEAN('R', "raw-samples", &raw_samples, 690 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
778 "collect raw sample records from all opened counters"), 691 "collect raw sample records from all opened counters"),
779 OPT_BOOLEAN('a', "all-cpus", &system_wide, 692 OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
780 "system-wide collection from all CPUs"), 693 "system-wide collection from all CPUs"),
781 OPT_BOOLEAN('A', "append", &append_file, 694 OPT_BOOLEAN('A', "append", &record.append_file,
782 "append to the output file to do incremental profiling"), 695 "append to the output file to do incremental profiling"),
783 OPT_STRING('C', "cpu", &cpu_list, "cpu", 696 OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
784 "list of cpus to monitor"), 697 "list of cpus to monitor"),
785 OPT_BOOLEAN('f', "force", &force, 698 OPT_BOOLEAN('f', "force", &record.force,
786 "overwrite existing data file (deprecated)"), 699 "overwrite existing data file (deprecated)"),
787 OPT_U64('c', "count", &user_interval, "event period to sample"), 700 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
788 OPT_STRING('o', "output", &output_name, "file", 701 OPT_STRING('o', "output", &record.output_name, "file",
789 "output file name"), 702 "output file name"),
790 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 703 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
791 "child tasks do not inherit counters"), 704 "child tasks do not inherit counters"),
792 OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), 705 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
793 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 706 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
794 OPT_BOOLEAN(0, "group", &group, 707 "number of mmap data pages"),
708 OPT_BOOLEAN(0, "group", &record.opts.group,
795 "put the counters into a counter group"), 709 "put the counters into a counter group"),
796 OPT_BOOLEAN('g', "call-graph", &call_graph, 710 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
797 "do call-graph (stack chain/backtrace) recording"), 711 "do call-graph (stack chain/backtrace) recording"),
798 OPT_INCR('v', "verbose", &verbose, 712 OPT_INCR('v', "verbose", &verbose,
799 "be more verbose (show counter open errors, etc)"), 713 "be more verbose (show counter open errors, etc)"),
800 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 714 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801 OPT_BOOLEAN('s', "stat", &inherit_stat, 715 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
802 "per thread counts"), 716 "per thread counts"),
803 OPT_BOOLEAN('d', "data", &sample_address, 717 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
804 "Sample addresses"), 718 "Sample addresses"),
805 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"), 719 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
806 OPT_BOOLEAN('n', "no-samples", &no_samples, 720 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
721 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
807 "don't sample"), 722 "don't sample"),
808 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache, 723 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
809 "do not update the buildid cache"), 724 "do not update the buildid cache"),
810 OPT_BOOLEAN('B', "no-buildid", &no_buildid, 725 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
811 "do not collect buildids in perf.data"), 726 "do not collect buildids in perf.data"),
812 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 727 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
813 "monitor event in cgroup name only", 728 "monitor event in cgroup name only",
814 parse_cgroups), 729 parse_cgroups),
815 OPT_END() 730 OPT_END()
@@ -819,6 +734,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
819{ 734{
820 int err = -ENOMEM; 735 int err = -ENOMEM;
821 struct perf_evsel *pos; 736 struct perf_evsel *pos;
737 struct perf_evlist *evsel_list;
738 struct perf_record *rec = &record;
822 739
823 perf_header__set_cmdline(argc, argv); 740 perf_header__set_cmdline(argc, argv);
824 741
@@ -826,23 +743,25 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
826 if (evsel_list == NULL) 743 if (evsel_list == NULL)
827 return -ENOMEM; 744 return -ENOMEM;
828 745
746 rec->evlist = evsel_list;
747
829 argc = parse_options(argc, argv, record_options, record_usage, 748 argc = parse_options(argc, argv, record_options, record_usage,
830 PARSE_OPT_STOP_AT_NON_OPTION); 749 PARSE_OPT_STOP_AT_NON_OPTION);
831 if (!argc && target_pid == -1 && target_tid == -1 && 750 if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
832 !system_wide && !cpu_list) 751 !rec->opts.system_wide && !rec->opts.cpu_list)
833 usage_with_options(record_usage, record_options); 752 usage_with_options(record_usage, record_options);
834 753
835 if (force && append_file) { 754 if (rec->force && rec->append_file) {
836 fprintf(stderr, "Can't overwrite and append at the same time." 755 fprintf(stderr, "Can't overwrite and append at the same time."
837 " You need to choose between -f and -A"); 756 " You need to choose between -f and -A");
838 usage_with_options(record_usage, record_options); 757 usage_with_options(record_usage, record_options);
839 } else if (append_file) { 758 } else if (rec->append_file) {
840 write_mode = WRITE_APPEND; 759 rec->write_mode = WRITE_APPEND;
841 } else { 760 } else {
842 write_mode = WRITE_FORCE; 761 rec->write_mode = WRITE_FORCE;
843 } 762 }
844 763
845 if (nr_cgroups && !system_wide) { 764 if (nr_cgroups && !rec->opts.system_wide) {
846 fprintf(stderr, "cgroup monitoring only available in" 765 fprintf(stderr, "cgroup monitoring only available in"
847 " system-wide mode\n"); 766 " system-wide mode\n");
848 usage_with_options(record_usage, record_options); 767 usage_with_options(record_usage, record_options);
@@ -860,7 +779,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
860"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 779"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
861"even with a suitable vmlinux or kallsyms file.\n\n"); 780"even with a suitable vmlinux or kallsyms file.\n\n");
862 781
863 if (no_buildid_cache || no_buildid) 782 if (rec->no_buildid_cache || rec->no_buildid)
864 disable_buildid_cache(); 783 disable_buildid_cache();
865 784
866 if (evsel_list->nr_entries == 0 && 785 if (evsel_list->nr_entries == 0 &&
@@ -869,43 +788,37 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
869 goto out_symbol_exit; 788 goto out_symbol_exit;
870 } 789 }
871 790
872 if (target_pid != -1) 791 if (rec->opts.target_pid != -1)
873 target_tid = target_pid; 792 rec->opts.target_tid = rec->opts.target_pid;
874 793
875 if (perf_evlist__create_maps(evsel_list, target_pid, 794 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
876 target_tid, cpu_list) < 0) 795 rec->opts.target_tid, rec->opts.cpu_list) < 0)
877 usage_with_options(record_usage, record_options); 796 usage_with_options(record_usage, record_options);
878 797
879 list_for_each_entry(pos, &evsel_list->entries, node) { 798 list_for_each_entry(pos, &evsel_list->entries, node) {
880 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
881 evsel_list->threads->nr) < 0)
882 goto out_free_fd;
883 if (perf_header__push_event(pos->attr.config, event_name(pos))) 799 if (perf_header__push_event(pos->attr.config, event_name(pos)))
884 goto out_free_fd; 800 goto out_free_fd;
885 } 801 }
886 802
887 if (perf_evlist__alloc_pollfd(evsel_list) < 0) 803 if (rec->opts.user_interval != ULLONG_MAX)
888 goto out_free_fd; 804 rec->opts.default_interval = rec->opts.user_interval;
889 805 if (rec->opts.user_freq != UINT_MAX)
890 if (user_interval != ULLONG_MAX) 806 rec->opts.freq = rec->opts.user_freq;
891 default_interval = user_interval;
892 if (user_freq != UINT_MAX)
893 freq = user_freq;
894 807
895 /* 808 /*
896 * User specified count overrides default frequency. 809 * User specified count overrides default frequency.
897 */ 810 */
898 if (default_interval) 811 if (rec->opts.default_interval)
899 freq = 0; 812 rec->opts.freq = 0;
900 else if (freq) { 813 else if (rec->opts.freq) {
901 default_interval = freq; 814 rec->opts.default_interval = rec->opts.freq;
902 } else { 815 } else {
903 fprintf(stderr, "frequency and count are zero, aborting\n"); 816 fprintf(stderr, "frequency and count are zero, aborting\n");
904 err = -EINVAL; 817 err = -EINVAL;
905 goto out_free_fd; 818 goto out_free_fd;
906 } 819 }
907 820
908 err = __cmd_record(argc, argv); 821 err = __cmd_record(&record, argc, argv);
909out_free_fd: 822out_free_fd:
910 perf_evlist__delete_maps(evsel_list); 823 perf_evlist__delete_maps(evsel_list);
911out_symbol_exit: 824out_symbol_exit:
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4d7c8340c326..25d34d483e49 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -25,6 +25,7 @@
25#include "util/evsel.h" 25#include "util/evsel.h"
26#include "util/header.h" 26#include "util/header.h"
27#include "util/session.h" 27#include "util/session.h"
28#include "util/tool.h"
28 29
29#include "util/parse-options.h" 30#include "util/parse-options.h"
30#include "util/parse-events.h" 31#include "util/parse-events.h"
@@ -35,38 +36,35 @@
35 36
36#include <linux/bitmap.h> 37#include <linux/bitmap.h>
37 38
38static char const *input_name = "perf.data"; 39struct perf_report {
39 40 struct perf_tool tool;
40static bool force, use_tui, use_stdio; 41 struct perf_session *session;
41static bool hide_unresolved; 42 char const *input_name;
42static bool dont_use_callchains; 43 bool force, use_tui, use_stdio;
43static bool show_full_info; 44 bool hide_unresolved;
44 45 bool dont_use_callchains;
45static bool show_threads; 46 bool show_full_info;
46static struct perf_read_values show_threads_values; 47 bool show_threads;
47 48 bool inverted_callchain;
48static const char default_pretty_printing_style[] = "normal"; 49 struct perf_read_values show_threads_values;
49static const char *pretty_printing_style = default_pretty_printing_style; 50 const char *pretty_printing_style;
50 51 symbol_filter_t annotate_init;
51static char callchain_default_opt[] = "fractal,0.5,callee"; 52 const char *cpu_list;
52static bool inverted_callchain; 53 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
53static symbol_filter_t annotate_init; 54};
54
55static const char *cpu_list;
56static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
57 55
58static int perf_session__add_hist_entry(struct perf_session *session, 56static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
59 struct addr_location *al, 57 struct addr_location *al,
60 struct perf_sample *sample, 58 struct perf_sample *sample,
61 struct perf_evsel *evsel) 59 struct machine *machine)
62{ 60{
63 struct symbol *parent = NULL; 61 struct symbol *parent = NULL;
64 int err = 0; 62 int err = 0;
65 struct hist_entry *he; 63 struct hist_entry *he;
66 64
67 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { 65 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
68 err = perf_session__resolve_callchain(session, al->thread, 66 err = machine__resolve_callchain(machine, evsel, al->thread,
69 sample->callchain, &parent); 67 sample->callchain, &parent);
70 if (err) 68 if (err)
71 return err; 69 return err;
72 } 70 }
@@ -76,7 +74,8 @@ static int perf_session__add_hist_entry(struct perf_session *session,
76 return -ENOMEM; 74 return -ENOMEM;
77 75
78 if (symbol_conf.use_callchain) { 76 if (symbol_conf.use_callchain) {
79 err = callchain_append(he->callchain, &session->callchain_cursor, 77 err = callchain_append(he->callchain,
78 &evsel->hists.callchain_cursor,
80 sample->period); 79 sample->period);
81 if (err) 80 if (err)
82 return err; 81 return err;
@@ -92,8 +91,7 @@ static int perf_session__add_hist_entry(struct perf_session *session,
92 assert(evsel != NULL); 91 assert(evsel != NULL);
93 92
94 err = -ENOMEM; 93 err = -ENOMEM;
95 if (notes->src == NULL && 94 if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
96 symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0)
97 goto out; 95 goto out;
98 96
99 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 97 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
@@ -106,30 +104,32 @@ out:
106} 104}
107 105
108 106
109static int process_sample_event(union perf_event *event, 107static int process_sample_event(struct perf_tool *tool,
108 union perf_event *event,
110 struct perf_sample *sample, 109 struct perf_sample *sample,
111 struct perf_evsel *evsel, 110 struct perf_evsel *evsel,
112 struct perf_session *session) 111 struct machine *machine)
113{ 112{
113 struct perf_report *rep = container_of(tool, struct perf_report, tool);
114 struct addr_location al; 114 struct addr_location al;
115 115
116 if (perf_event__preprocess_sample(event, session, &al, sample, 116 if (perf_event__preprocess_sample(event, machine, &al, sample,
117 annotate_init) < 0) { 117 rep->annotate_init) < 0) {
118 fprintf(stderr, "problem processing %d event, skipping it.\n", 118 fprintf(stderr, "problem processing %d event, skipping it.\n",
119 event->header.type); 119 event->header.type);
120 return -1; 120 return -1;
121 } 121 }
122 122
123 if (al.filtered || (hide_unresolved && al.sym == NULL)) 123 if (al.filtered || (rep->hide_unresolved && al.sym == NULL))
124 return 0; 124 return 0;
125 125
126 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 126 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
127 return 0; 127 return 0;
128 128
129 if (al.map != NULL) 129 if (al.map != NULL)
130 al.map->dso->hit = 1; 130 al.map->dso->hit = 1;
131 131
132 if (perf_session__add_hist_entry(session, &al, sample, evsel)) { 132 if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
133 pr_debug("problem incrementing symbol period, skipping event\n"); 133 pr_debug("problem incrementing symbol period, skipping event\n");
134 return -1; 134 return -1;
135 } 135 }
@@ -137,15 +137,17 @@ static int process_sample_event(union perf_event *event,
137 return 0; 137 return 0;
138} 138}
139 139
140static int process_read_event(union perf_event *event, 140static int process_read_event(struct perf_tool *tool,
141 union perf_event *event,
141 struct perf_sample *sample __used, 142 struct perf_sample *sample __used,
142 struct perf_session *session) 143 struct perf_evsel *evsel,
144 struct machine *machine __used)
143{ 145{
144 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, 146 struct perf_report *rep = container_of(tool, struct perf_report, tool);
145 event->read.id); 147
146 if (show_threads) { 148 if (rep->show_threads) {
147 const char *name = evsel ? event_name(evsel) : "unknown"; 149 const char *name = evsel ? event_name(evsel) : "unknown";
148 perf_read_values_add_value(&show_threads_values, 150 perf_read_values_add_value(&rep->show_threads_values,
149 event->read.pid, event->read.tid, 151 event->read.pid, event->read.tid,
150 event->read.id, 152 event->read.id,
151 name, 153 name,
@@ -159,8 +161,10 @@ static int process_read_event(union perf_event *event,
159 return 0; 161 return 0;
160} 162}
161 163
162static int perf_session__setup_sample_type(struct perf_session *self) 164static int perf_report__setup_sample_type(struct perf_report *rep)
163{ 165{
166 struct perf_session *self = rep->session;
167
164 if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) { 168 if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
165 if (sort__has_parent) { 169 if (sort__has_parent) {
166 ui__warning("Selected --sort parent, but no " 170 ui__warning("Selected --sort parent, but no "
@@ -173,7 +177,8 @@ static int perf_session__setup_sample_type(struct perf_session *self)
173 "you call 'perf record' without -g?\n"); 177 "you call 'perf record' without -g?\n");
174 return -1; 178 return -1;
175 } 179 }
176 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && 180 } else if (!rep->dont_use_callchains &&
181 callchain_param.mode != CHAIN_NONE &&
177 !symbol_conf.use_callchain) { 182 !symbol_conf.use_callchain) {
178 symbol_conf.use_callchain = true; 183 symbol_conf.use_callchain = true;
179 if (callchain_register_param(&callchain_param) < 0) { 184 if (callchain_register_param(&callchain_param) < 0) {
@@ -186,22 +191,6 @@ static int perf_session__setup_sample_type(struct perf_session *self)
186 return 0; 191 return 0;
187} 192}
188 193
189static struct perf_event_ops event_ops = {
190 .sample = process_sample_event,
191 .mmap = perf_event__process_mmap,
192 .comm = perf_event__process_comm,
193 .exit = perf_event__process_task,
194 .fork = perf_event__process_task,
195 .lost = perf_event__process_lost,
196 .read = process_read_event,
197 .attr = perf_event__process_attr,
198 .event_type = perf_event__process_event_type,
199 .tracing_data = perf_event__process_tracing_data,
200 .build_id = perf_event__process_build_id,
201 .ordered_samples = true,
202 .ordering_requires_timestamps = true,
203};
204
205extern volatile int session_done; 194extern volatile int session_done;
206 195
207static void sig_handler(int sig __used) 196static void sig_handler(int sig __used)
@@ -224,6 +213,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
224} 213}
225 214
226static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, 215static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
216 struct perf_report *rep,
227 const char *help) 217 const char *help)
228{ 218{
229 struct perf_evsel *pos; 219 struct perf_evsel *pos;
@@ -241,18 +231,18 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
241 parent_pattern == default_parent_pattern) { 231 parent_pattern == default_parent_pattern) {
242 fprintf(stdout, "#\n# (%s)\n#\n", help); 232 fprintf(stdout, "#\n# (%s)\n#\n", help);
243 233
244 if (show_threads) { 234 if (rep->show_threads) {
245 bool style = !strcmp(pretty_printing_style, "raw"); 235 bool style = !strcmp(rep->pretty_printing_style, "raw");
246 perf_read_values_display(stdout, &show_threads_values, 236 perf_read_values_display(stdout, &rep->show_threads_values,
247 style); 237 style);
248 perf_read_values_destroy(&show_threads_values); 238 perf_read_values_destroy(&rep->show_threads_values);
249 } 239 }
250 } 240 }
251 241
252 return 0; 242 return 0;
253} 243}
254 244
255static int __cmd_report(void) 245static int __cmd_report(struct perf_report *rep)
256{ 246{
257 int ret = -EINVAL; 247 int ret = -EINVAL;
258 u64 nr_samples; 248 u64 nr_samples;
@@ -264,27 +254,31 @@ static int __cmd_report(void)
264 254
265 signal(SIGINT, sig_handler); 255 signal(SIGINT, sig_handler);
266 256
267 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 257 session = perf_session__new(rep->input_name, O_RDONLY,
258 rep->force, false, &rep->tool);
268 if (session == NULL) 259 if (session == NULL)
269 return -ENOMEM; 260 return -ENOMEM;
270 261
271 if (cpu_list) { 262 rep->session = session;
272 ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 263
264 if (rep->cpu_list) {
265 ret = perf_session__cpu_bitmap(session, rep->cpu_list,
266 rep->cpu_bitmap);
273 if (ret) 267 if (ret)
274 goto out_delete; 268 goto out_delete;
275 } 269 }
276 270
277 if (use_browser <= 0) 271 if (use_browser <= 0)
278 perf_session__fprintf_info(session, stdout, show_full_info); 272 perf_session__fprintf_info(session, stdout, rep->show_full_info);
279 273
280 if (show_threads) 274 if (rep->show_threads)
281 perf_read_values_init(&show_threads_values); 275 perf_read_values_init(&rep->show_threads_values);
282 276
283 ret = perf_session__setup_sample_type(session); 277 ret = perf_report__setup_sample_type(rep);
284 if (ret) 278 if (ret)
285 goto out_delete; 279 goto out_delete;
286 280
287 ret = perf_session__process_events(session, &event_ops); 281 ret = perf_session__process_events(session, &rep->tool);
288 if (ret) 282 if (ret)
289 goto out_delete; 283 goto out_delete;
290 284
@@ -327,7 +321,7 @@ static int __cmd_report(void)
327 } 321 }
328 322
329 if (nr_samples == 0) { 323 if (nr_samples == 0) {
330 ui__warning("The %s file has no samples!\n", input_name); 324 ui__warning("The %s file has no samples!\n", session->filename);
331 goto out_delete; 325 goto out_delete;
332 } 326 }
333 327
@@ -335,7 +329,7 @@ static int __cmd_report(void)
335 perf_evlist__tui_browse_hists(session->evlist, help, 329 perf_evlist__tui_browse_hists(session->evlist, help,
336 NULL, NULL, 0); 330 NULL, NULL, 0);
337 } else 331 } else
338 perf_evlist__tty_browse_hists(session->evlist, help); 332 perf_evlist__tty_browse_hists(session->evlist, rep, help);
339 333
340out_delete: 334out_delete:
341 /* 335 /*
@@ -354,9 +348,9 @@ out_delete:
354} 348}
355 349
356static int 350static int
357parse_callchain_opt(const struct option *opt __used, const char *arg, 351parse_callchain_opt(const struct option *opt, const char *arg, int unset)
358 int unset)
359{ 352{
353 struct perf_report *rep = (struct perf_report *)opt->value;
360 char *tok, *tok2; 354 char *tok, *tok2;
361 char *endptr; 355 char *endptr;
362 356
@@ -364,7 +358,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
364 * --no-call-graph 358 * --no-call-graph
365 */ 359 */
366 if (unset) { 360 if (unset) {
367 dont_use_callchains = true; 361 rep->dont_use_callchains = true;
368 return 0; 362 return 0;
369 } 363 }
370 364
@@ -412,7 +406,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
412 goto setup; 406 goto setup;
413 407
414 if (tok2[0] != 'c') { 408 if (tok2[0] != 'c') {
415 callchain_param.print_limit = strtod(tok2, &endptr); 409 callchain_param.print_limit = strtoul(tok2, &endptr, 0);
416 tok2 = strtok(NULL, ","); 410 tok2 = strtok(NULL, ",");
417 if (!tok2) 411 if (!tok2)
418 goto setup; 412 goto setup;
@@ -433,13 +427,34 @@ setup:
433 return 0; 427 return 0;
434} 428}
435 429
436static const char * const report_usage[] = { 430int cmd_report(int argc, const char **argv, const char *prefix __used)
437 "perf report [<options>] <command>", 431{
438 NULL 432 struct stat st;
439}; 433 char callchain_default_opt[] = "fractal,0.5,callee";
440 434 const char * const report_usage[] = {
441static const struct option options[] = { 435 "perf report [<options>]",
442 OPT_STRING('i', "input", &input_name, "file", 436 NULL
437 };
438 struct perf_report report = {
439 .tool = {
440 .sample = process_sample_event,
441 .mmap = perf_event__process_mmap,
442 .comm = perf_event__process_comm,
443 .exit = perf_event__process_task,
444 .fork = perf_event__process_task,
445 .lost = perf_event__process_lost,
446 .read = process_read_event,
447 .attr = perf_event__process_attr,
448 .event_type = perf_event__process_event_type,
449 .tracing_data = perf_event__process_tracing_data,
450 .build_id = perf_event__process_build_id,
451 .ordered_samples = true,
452 .ordering_requires_timestamps = true,
453 },
454 .pretty_printing_style = "normal",
455 };
456 const struct option options[] = {
457 OPT_STRING('i', "input", &report.input_name, "file",
443 "input file name"), 458 "input file name"),
444 OPT_INCR('v', "verbose", &verbose, 459 OPT_INCR('v', "verbose", &verbose,
445 "be more verbose (show symbol address, etc)"), 460 "be more verbose (show symbol address, etc)"),
@@ -449,17 +464,18 @@ static const struct option options[] = {
449 "file", "vmlinux pathname"), 464 "file", "vmlinux pathname"),
450 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, 465 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
451 "file", "kallsyms pathname"), 466 "file", "kallsyms pathname"),
452 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 467 OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"),
453 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 468 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
454 "load module symbols - WARNING: use only with -k and LIVE kernel"), 469 "load module symbols - WARNING: use only with -k and LIVE kernel"),
455 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 470 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
456 "Show a column with the number of samples"), 471 "Show a column with the number of samples"),
457 OPT_BOOLEAN('T', "threads", &show_threads, 472 OPT_BOOLEAN('T', "threads", &report.show_threads,
458 "Show per-thread event counters"), 473 "Show per-thread event counters"),
459 OPT_STRING(0, "pretty", &pretty_printing_style, "key", 474 OPT_STRING(0, "pretty", &report.pretty_printing_style, "key",
460 "pretty printing style key: normal raw"), 475 "pretty printing style key: normal raw"),
461 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 476 OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"),
462 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 477 OPT_BOOLEAN(0, "stdio", &report.use_stdio,
478 "Use the stdio interface"),
463 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 479 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
464 "sort by key(s): pid, comm, dso, symbol, parent"), 480 "sort by key(s): pid, comm, dso, symbol, parent"),
465 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 481 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
@@ -468,13 +484,14 @@ static const struct option options[] = {
468 "regex filter to identify parent, see: '--sort parent'"), 484 "regex filter to identify parent, see: '--sort parent'"),
469 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 485 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
470 "Only display entries with parent-match"), 486 "Only display entries with parent-match"),
471 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order", 487 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
472 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. " 488 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. "
473 "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt), 489 "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
474 OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"), 490 OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
491 "alias for inverted call graph"),
475 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 492 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
476 "only consider symbols in these dsos"), 493 "only consider symbols in these dsos"),
477 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 494 OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
478 "only consider symbols in these comms"), 495 "only consider symbols in these comms"),
479 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 496 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
480 "only consider these symbols"), 497 "only consider these symbols"),
@@ -484,12 +501,13 @@ static const struct option options[] = {
484 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 501 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
485 "separator for columns, no spaces will be added between " 502 "separator for columns, no spaces will be added between "
486 "columns '.' is reserved."), 503 "columns '.' is reserved."),
487 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, 504 OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved,
488 "Only display entries resolved to a symbol"), 505 "Only display entries resolved to a symbol"),
489 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 506 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
490 "Look for files with symbols relative to this directory"), 507 "Look for files with symbols relative to this directory"),
491 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 508 OPT_STRING('C', "cpu", &report.cpu_list, "cpu",
492 OPT_BOOLEAN('I', "show-info", &show_full_info, 509 "list of cpus to profile"),
510 OPT_BOOLEAN('I', "show-info", &report.show_full_info,
493 "Display extended information about perf.data file"), 511 "Display extended information about perf.data file"),
494 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, 512 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
495 "Interleave source code with assembly code (default)"), 513 "Interleave source code with assembly code (default)"),
@@ -500,24 +518,30 @@ static const struct option options[] = {
500 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 518 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
501 "Show a column with the sum of periods"), 519 "Show a column with the sum of periods"),
502 OPT_END() 520 OPT_END()
503}; 521 };
504 522
505int cmd_report(int argc, const char **argv, const char *prefix __used)
506{
507 argc = parse_options(argc, argv, options, report_usage, 0); 523 argc = parse_options(argc, argv, options, report_usage, 0);
508 524
509 if (use_stdio) 525 if (report.use_stdio)
510 use_browser = 0; 526 use_browser = 0;
511 else if (use_tui) 527 else if (report.use_tui)
512 use_browser = 1; 528 use_browser = 1;
513 529
514 if (inverted_callchain) 530 if (report.inverted_callchain)
515 callchain_param.order = ORDER_CALLER; 531 callchain_param.order = ORDER_CALLER;
516 532
517 if (strcmp(input_name, "-") != 0) 533 if (!report.input_name || !strlen(report.input_name)) {
534 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
535 report.input_name = "-";
536 else
537 report.input_name = "perf.data";
538 }
539
540 if (strcmp(report.input_name, "-") != 0)
518 setup_browser(true); 541 setup_browser(true);
519 else 542 else
520 use_browser = 0; 543 use_browser = 0;
544
521 /* 545 /*
522 * Only in the newt browser we are doing integrated annotation, 546 * Only in the newt browser we are doing integrated annotation,
523 * so don't allocate extra space that won't be used in the stdio 547 * so don't allocate extra space that won't be used in the stdio
@@ -525,7 +549,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
525 */ 549 */
526 if (use_browser > 0) { 550 if (use_browser > 0) {
527 symbol_conf.priv_size = sizeof(struct annotation); 551 symbol_conf.priv_size = sizeof(struct annotation);
528 annotate_init = symbol__annotate_init; 552 report.annotate_init = symbol__annotate_init;
529 /* 553 /*
530 * For searching by name on the "Browse map details". 554 * For searching by name on the "Browse map details".
531 * providing it only in verbose mode not to bloat too 555 * providing it only in verbose mode not to bloat too
@@ -572,5 +596,5 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
572 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); 596 sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
573 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); 597 sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
574 598
575 return __cmd_report(); 599 return __cmd_report(&report);
576} 600}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5177964943e7..fb8b5f83b4a0 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2,11 +2,14 @@
2#include "perf.h" 2#include "perf.h"
3 3
4#include "util/util.h" 4#include "util/util.h"
5#include "util/evlist.h"
5#include "util/cache.h" 6#include "util/cache.h"
7#include "util/evsel.h"
6#include "util/symbol.h" 8#include "util/symbol.h"
7#include "util/thread.h" 9#include "util/thread.h"
8#include "util/header.h" 10#include "util/header.h"
9#include "util/session.h" 11#include "util/session.h"
12#include "util/tool.h"
10 13
11#include "util/parse-options.h" 14#include "util/parse-options.h"
12#include "util/trace-event.h" 15#include "util/trace-event.h"
@@ -19,7 +22,7 @@
19#include <pthread.h> 22#include <pthread.h>
20#include <math.h> 23#include <math.h>
21 24
22static char const *input_name = "perf.data"; 25static const char *input_name;
23 26
24static char default_sort_order[] = "avg, max, switch, runtime"; 27static char default_sort_order[] = "avg, max, switch, runtime";
25static const char *sort_order = default_sort_order; 28static const char *sort_order = default_sort_order;
@@ -723,21 +726,21 @@ struct trace_migrate_task_event {
723 726
724struct trace_sched_handler { 727struct trace_sched_handler {
725 void (*switch_event)(struct trace_switch_event *, 728 void (*switch_event)(struct trace_switch_event *,
726 struct perf_session *, 729 struct machine *,
727 struct event *, 730 struct event *,
728 int cpu, 731 int cpu,
729 u64 timestamp, 732 u64 timestamp,
730 struct thread *thread); 733 struct thread *thread);
731 734
732 void (*runtime_event)(struct trace_runtime_event *, 735 void (*runtime_event)(struct trace_runtime_event *,
733 struct perf_session *, 736 struct machine *,
734 struct event *, 737 struct event *,
735 int cpu, 738 int cpu,
736 u64 timestamp, 739 u64 timestamp,
737 struct thread *thread); 740 struct thread *thread);
738 741
739 void (*wakeup_event)(struct trace_wakeup_event *, 742 void (*wakeup_event)(struct trace_wakeup_event *,
740 struct perf_session *, 743 struct machine *,
741 struct event *, 744 struct event *,
742 int cpu, 745 int cpu,
743 u64 timestamp, 746 u64 timestamp,
@@ -750,7 +753,7 @@ struct trace_sched_handler {
750 struct thread *thread); 753 struct thread *thread);
751 754
752 void (*migrate_task_event)(struct trace_migrate_task_event *, 755 void (*migrate_task_event)(struct trace_migrate_task_event *,
753 struct perf_session *session, 756 struct machine *machine,
754 struct event *, 757 struct event *,
755 int cpu, 758 int cpu,
756 u64 timestamp, 759 u64 timestamp,
@@ -760,7 +763,7 @@ struct trace_sched_handler {
760 763
761static void 764static void
762replay_wakeup_event(struct trace_wakeup_event *wakeup_event, 765replay_wakeup_event(struct trace_wakeup_event *wakeup_event,
763 struct perf_session *session __used, 766 struct machine *machine __used,
764 struct event *event, 767 struct event *event,
765 int cpu __used, 768 int cpu __used,
766 u64 timestamp __used, 769 u64 timestamp __used,
@@ -787,7 +790,7 @@ static u64 cpu_last_switched[MAX_CPUS];
787 790
788static void 791static void
789replay_switch_event(struct trace_switch_event *switch_event, 792replay_switch_event(struct trace_switch_event *switch_event,
790 struct perf_session *session __used, 793 struct machine *machine __used,
791 struct event *event, 794 struct event *event,
792 int cpu, 795 int cpu,
793 u64 timestamp, 796 u64 timestamp,
@@ -1021,7 +1024,7 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
1021 1024
1022static void 1025static void
1023latency_switch_event(struct trace_switch_event *switch_event, 1026latency_switch_event(struct trace_switch_event *switch_event,
1024 struct perf_session *session, 1027 struct machine *machine,
1025 struct event *event __used, 1028 struct event *event __used,
1026 int cpu, 1029 int cpu,
1027 u64 timestamp, 1030 u64 timestamp,
@@ -1045,8 +1048,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
1045 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1048 die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
1046 1049
1047 1050
1048 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1051 sched_out = machine__findnew_thread(machine, switch_event->prev_pid);
1049 sched_in = perf_session__findnew(session, switch_event->next_pid); 1052 sched_in = machine__findnew_thread(machine, switch_event->next_pid);
1050 1053
1051 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); 1054 out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
1052 if (!out_events) { 1055 if (!out_events) {
@@ -1074,13 +1077,13 @@ latency_switch_event(struct trace_switch_event *switch_event,
1074 1077
1075static void 1078static void
1076latency_runtime_event(struct trace_runtime_event *runtime_event, 1079latency_runtime_event(struct trace_runtime_event *runtime_event,
1077 struct perf_session *session, 1080 struct machine *machine,
1078 struct event *event __used, 1081 struct event *event __used,
1079 int cpu, 1082 int cpu,
1080 u64 timestamp, 1083 u64 timestamp,
1081 struct thread *this_thread __used) 1084 struct thread *this_thread __used)
1082{ 1085{
1083 struct thread *thread = perf_session__findnew(session, runtime_event->pid); 1086 struct thread *thread = machine__findnew_thread(machine, runtime_event->pid);
1084 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); 1087 struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
1085 1088
1086 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1089 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
@@ -1097,7 +1100,7 @@ latency_runtime_event(struct trace_runtime_event *runtime_event,
1097 1100
1098static void 1101static void
1099latency_wakeup_event(struct trace_wakeup_event *wakeup_event, 1102latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1100 struct perf_session *session, 1103 struct machine *machine,
1101 struct event *__event __used, 1104 struct event *__event __used,
1102 int cpu __used, 1105 int cpu __used,
1103 u64 timestamp, 1106 u64 timestamp,
@@ -1111,7 +1114,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1111 if (!wakeup_event->success) 1114 if (!wakeup_event->success)
1112 return; 1115 return;
1113 1116
1114 wakee = perf_session__findnew(session, wakeup_event->pid); 1117 wakee = machine__findnew_thread(machine, wakeup_event->pid);
1115 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); 1118 atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
1116 if (!atoms) { 1119 if (!atoms) {
1117 thread_atoms_insert(wakee); 1120 thread_atoms_insert(wakee);
@@ -1145,7 +1148,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
1145 1148
1146static void 1149static void
1147latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, 1150latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
1148 struct perf_session *session, 1151 struct machine *machine,
1149 struct event *__event __used, 1152 struct event *__event __used,
1150 int cpu __used, 1153 int cpu __used,
1151 u64 timestamp, 1154 u64 timestamp,
@@ -1161,7 +1164,7 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
1161 if (profile_cpu == -1) 1164 if (profile_cpu == -1)
1162 return; 1165 return;
1163 1166
1164 migrant = perf_session__findnew(session, migrate_task_event->pid); 1167 migrant = machine__findnew_thread(machine, migrate_task_event->pid);
1165 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); 1168 atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
1166 if (!atoms) { 1169 if (!atoms) {
1167 thread_atoms_insert(migrant); 1170 thread_atoms_insert(migrant);
@@ -1356,12 +1359,13 @@ static void sort_lat(void)
1356static struct trace_sched_handler *trace_handler; 1359static struct trace_sched_handler *trace_handler;
1357 1360
1358static void 1361static void
1359process_sched_wakeup_event(void *data, struct perf_session *session, 1362process_sched_wakeup_event(struct perf_tool *tool __used,
1360 struct event *event, 1363 struct event *event,
1361 int cpu __used, 1364 struct perf_sample *sample,
1362 u64 timestamp __used, 1365 struct machine *machine,
1363 struct thread *thread __used) 1366 struct thread *thread)
1364{ 1367{
1368 void *data = sample->raw_data;
1365 struct trace_wakeup_event wakeup_event; 1369 struct trace_wakeup_event wakeup_event;
1366 1370
1367 FILL_COMMON_FIELDS(wakeup_event, event, data); 1371 FILL_COMMON_FIELDS(wakeup_event, event, data);
@@ -1373,8 +1377,8 @@ process_sched_wakeup_event(void *data, struct perf_session *session,
1373 FILL_FIELD(wakeup_event, cpu, event, data); 1377 FILL_FIELD(wakeup_event, cpu, event, data);
1374 1378
1375 if (trace_handler->wakeup_event) 1379 if (trace_handler->wakeup_event)
1376 trace_handler->wakeup_event(&wakeup_event, session, event, 1380 trace_handler->wakeup_event(&wakeup_event, machine, event,
1377 cpu, timestamp, thread); 1381 sample->cpu, sample->time, thread);
1378} 1382}
1379 1383
1380/* 1384/*
@@ -1392,7 +1396,7 @@ static char next_shortname2 = '0';
1392 1396
1393static void 1397static void
1394map_switch_event(struct trace_switch_event *switch_event, 1398map_switch_event(struct trace_switch_event *switch_event,
1395 struct perf_session *session, 1399 struct machine *machine,
1396 struct event *event __used, 1400 struct event *event __used,
1397 int this_cpu, 1401 int this_cpu,
1398 u64 timestamp, 1402 u64 timestamp,
@@ -1420,8 +1424,8 @@ map_switch_event(struct trace_switch_event *switch_event,
1420 die("hm, delta: %" PRIu64 " < 0 ?\n", delta); 1424 die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
1421 1425
1422 1426
1423 sched_out = perf_session__findnew(session, switch_event->prev_pid); 1427 sched_out = machine__findnew_thread(machine, switch_event->prev_pid);
1424 sched_in = perf_session__findnew(session, switch_event->next_pid); 1428 sched_in = machine__findnew_thread(machine, switch_event->next_pid);
1425 1429
1426 curr_thread[this_cpu] = sched_in; 1430 curr_thread[this_cpu] = sched_in;
1427 1431
@@ -1469,14 +1473,15 @@ map_switch_event(struct trace_switch_event *switch_event,
1469 } 1473 }
1470} 1474}
1471 1475
1472
1473static void 1476static void
1474process_sched_switch_event(void *data, struct perf_session *session, 1477process_sched_switch_event(struct perf_tool *tool __used,
1475 struct event *event, 1478 struct event *event,
1476 int this_cpu, 1479 struct perf_sample *sample,
1477 u64 timestamp __used, 1480 struct machine *machine,
1478 struct thread *thread __used) 1481 struct thread *thread)
1479{ 1482{
1483 int this_cpu = sample->cpu;
1484 void *data = sample->raw_data;
1480 struct trace_switch_event switch_event; 1485 struct trace_switch_event switch_event;
1481 1486
1482 FILL_COMMON_FIELDS(switch_event, event, data); 1487 FILL_COMMON_FIELDS(switch_event, event, data);
@@ -1498,19 +1503,20 @@ process_sched_switch_event(void *data, struct perf_session *session,
1498 nr_context_switch_bugs++; 1503 nr_context_switch_bugs++;
1499 } 1504 }
1500 if (trace_handler->switch_event) 1505 if (trace_handler->switch_event)
1501 trace_handler->switch_event(&switch_event, session, event, 1506 trace_handler->switch_event(&switch_event, machine, event,
1502 this_cpu, timestamp, thread); 1507 this_cpu, sample->time, thread);
1503 1508
1504 curr_pid[this_cpu] = switch_event.next_pid; 1509 curr_pid[this_cpu] = switch_event.next_pid;
1505} 1510}
1506 1511
1507static void 1512static void
1508process_sched_runtime_event(void *data, struct perf_session *session, 1513process_sched_runtime_event(struct perf_tool *tool __used,
1509 struct event *event, 1514 struct event *event,
1510 int cpu __used, 1515 struct perf_sample *sample,
1511 u64 timestamp __used, 1516 struct machine *machine,
1512 struct thread *thread __used) 1517 struct thread *thread)
1513{ 1518{
1519 void *data = sample->raw_data;
1514 struct trace_runtime_event runtime_event; 1520 struct trace_runtime_event runtime_event;
1515 1521
1516 FILL_ARRAY(runtime_event, comm, event, data); 1522 FILL_ARRAY(runtime_event, comm, event, data);
@@ -1519,16 +1525,18 @@ process_sched_runtime_event(void *data, struct perf_session *session,
1519 FILL_FIELD(runtime_event, vruntime, event, data); 1525 FILL_FIELD(runtime_event, vruntime, event, data);
1520 1526
1521 if (trace_handler->runtime_event) 1527 if (trace_handler->runtime_event)
1522 trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread); 1528 trace_handler->runtime_event(&runtime_event, machine, event,
1529 sample->cpu, sample->time, thread);
1523} 1530}
1524 1531
1525static void 1532static void
1526process_sched_fork_event(void *data, 1533process_sched_fork_event(struct perf_tool *tool __used,
1527 struct event *event, 1534 struct event *event,
1528 int cpu __used, 1535 struct perf_sample *sample,
1529 u64 timestamp __used, 1536 struct machine *machine __used,
1530 struct thread *thread __used) 1537 struct thread *thread)
1531{ 1538{
1539 void *data = sample->raw_data;
1532 struct trace_fork_event fork_event; 1540 struct trace_fork_event fork_event;
1533 1541
1534 FILL_COMMON_FIELDS(fork_event, event, data); 1542 FILL_COMMON_FIELDS(fork_event, event, data);
@@ -1540,13 +1548,14 @@ process_sched_fork_event(void *data,
1540 1548
1541 if (trace_handler->fork_event) 1549 if (trace_handler->fork_event)
1542 trace_handler->fork_event(&fork_event, event, 1550 trace_handler->fork_event(&fork_event, event,
1543 cpu, timestamp, thread); 1551 sample->cpu, sample->time, thread);
1544} 1552}
1545 1553
1546static void 1554static void
1547process_sched_exit_event(struct event *event, 1555process_sched_exit_event(struct perf_tool *tool __used,
1548 int cpu __used, 1556 struct event *event,
1549 u64 timestamp __used, 1557 struct perf_sample *sample __used,
1558 struct machine *machine __used,
1550 struct thread *thread __used) 1559 struct thread *thread __used)
1551{ 1560{
1552 if (verbose) 1561 if (verbose)
@@ -1554,12 +1563,13 @@ process_sched_exit_event(struct event *event,
1554} 1563}
1555 1564
1556static void 1565static void
1557process_sched_migrate_task_event(void *data, struct perf_session *session, 1566process_sched_migrate_task_event(struct perf_tool *tool __used,
1558 struct event *event, 1567 struct event *event,
1559 int cpu __used, 1568 struct perf_sample *sample,
1560 u64 timestamp __used, 1569 struct machine *machine,
1561 struct thread *thread __used) 1570 struct thread *thread)
1562{ 1571{
1572 void *data = sample->raw_data;
1563 struct trace_migrate_task_event migrate_task_event; 1573 struct trace_migrate_task_event migrate_task_event;
1564 1574
1565 FILL_COMMON_FIELDS(migrate_task_event, event, data); 1575 FILL_COMMON_FIELDS(migrate_task_event, event, data);
@@ -1570,67 +1580,47 @@ process_sched_migrate_task_event(void *data, struct perf_session *session,
1570 FILL_FIELD(migrate_task_event, cpu, event, data); 1580 FILL_FIELD(migrate_task_event, cpu, event, data);
1571 1581
1572 if (trace_handler->migrate_task_event) 1582 if (trace_handler->migrate_task_event)
1573 trace_handler->migrate_task_event(&migrate_task_event, session, 1583 trace_handler->migrate_task_event(&migrate_task_event, machine,
1574 event, cpu, timestamp, thread); 1584 event, sample->cpu,
1585 sample->time, thread);
1575} 1586}
1576 1587
1577static void process_raw_event(union perf_event *raw_event __used, 1588typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event *event,
1578 struct perf_session *session, void *data, int cpu, 1589 struct perf_sample *sample,
1579 u64 timestamp, struct thread *thread) 1590 struct machine *machine,
1580{ 1591 struct thread *thread);
1581 struct event *event;
1582 int type;
1583
1584
1585 type = trace_parse_common_type(data);
1586 event = trace_find_event(type);
1587
1588 if (!strcmp(event->name, "sched_switch"))
1589 process_sched_switch_event(data, session, event, cpu, timestamp, thread);
1590 if (!strcmp(event->name, "sched_stat_runtime"))
1591 process_sched_runtime_event(data, session, event, cpu, timestamp, thread);
1592 if (!strcmp(event->name, "sched_wakeup"))
1593 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread);
1594 if (!strcmp(event->name, "sched_wakeup_new"))
1595 process_sched_wakeup_event(data, session, event, cpu, timestamp, thread);
1596 if (!strcmp(event->name, "sched_process_fork"))
1597 process_sched_fork_event(data, event, cpu, timestamp, thread);
1598 if (!strcmp(event->name, "sched_process_exit"))
1599 process_sched_exit_event(event, cpu, timestamp, thread);
1600 if (!strcmp(event->name, "sched_migrate_task"))
1601 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
1602}
1603 1592
1604static int process_sample_event(union perf_event *event, 1593static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
1605 struct perf_sample *sample, 1594 union perf_event *event __used,
1606 struct perf_evsel *evsel __used, 1595 struct perf_sample *sample,
1607 struct perf_session *session) 1596 struct perf_evsel *evsel,
1597 struct machine *machine)
1608{ 1598{
1609 struct thread *thread; 1599 struct thread *thread = machine__findnew_thread(machine, sample->pid);
1610
1611 if (!(session->sample_type & PERF_SAMPLE_RAW))
1612 return 0;
1613 1600
1614 thread = perf_session__findnew(session, sample->pid);
1615 if (thread == NULL) { 1601 if (thread == NULL) {
1616 pr_debug("problem processing %d event, skipping it.\n", 1602 pr_debug("problem processing %s event, skipping it.\n",
1617 event->header.type); 1603 evsel->name);
1618 return -1; 1604 return -1;
1619 } 1605 }
1620 1606
1621 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1607 evsel->hists.stats.total_period += sample->period;
1608 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
1622 1609
1623 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu) 1610 if (evsel->handler.func != NULL) {
1624 return 0; 1611 tracepoint_handler f = evsel->handler.func;
1625 1612
1626 process_raw_event(event, session, sample->raw_data, sample->cpu, 1613 if (evsel->handler.data == NULL)
1627 sample->time, thread); 1614 evsel->handler.data = trace_find_event(evsel->attr.config);
1615
1616 f(tool, evsel->handler.data, sample, machine, thread);
1617 }
1628 1618
1629 return 0; 1619 return 0;
1630} 1620}
1631 1621
1632static struct perf_event_ops event_ops = { 1622static struct perf_tool perf_sched = {
1633 .sample = process_sample_event, 1623 .sample = perf_sched__process_tracepoint_sample,
1634 .comm = perf_event__process_comm, 1624 .comm = perf_event__process_comm,
1635 .lost = perf_event__process_lost, 1625 .lost = perf_event__process_lost,
1636 .fork = perf_event__process_task, 1626 .fork = perf_event__process_task,
@@ -1640,13 +1630,25 @@ static struct perf_event_ops event_ops = {
1640static void read_events(bool destroy, struct perf_session **psession) 1630static void read_events(bool destroy, struct perf_session **psession)
1641{ 1631{
1642 int err = -EINVAL; 1632 int err = -EINVAL;
1633 const struct perf_evsel_str_handler handlers[] = {
1634 { "sched:sched_switch", process_sched_switch_event, },
1635 { "sched:sched_stat_runtime", process_sched_runtime_event, },
1636 { "sched:sched_wakeup", process_sched_wakeup_event, },
1637 { "sched:sched_wakeup_new", process_sched_wakeup_event, },
1638 { "sched:sched_process_fork", process_sched_fork_event, },
1639 { "sched:sched_process_exit", process_sched_exit_event, },
1640 { "sched:sched_migrate_task", process_sched_migrate_task_event, },
1641 };
1643 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 1642 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
1644 0, false, &event_ops); 1643 0, false, &perf_sched);
1645 if (session == NULL) 1644 if (session == NULL)
1646 die("No Memory"); 1645 die("No Memory");
1647 1646
1647 err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers);
1648 assert(err == 0);
1649
1648 if (perf_session__has_traces(session, "record -R")) { 1650 if (perf_session__has_traces(session, "record -R")) {
1649 err = perf_session__process_events(session, &event_ops); 1651 err = perf_session__process_events(session, &perf_sched);
1650 if (err) 1652 if (err)
1651 die("Failed to process events, error %d", err); 1653 die("Failed to process events, error %d", err);
1652 1654
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 2f62a2952269..fd1909afcfd6 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -7,6 +7,7 @@
7#include "util/header.h" 7#include "util/header.h"
8#include "util/parse-options.h" 8#include "util/parse-options.h"
9#include "util/session.h" 9#include "util/session.h"
10#include "util/tool.h"
10#include "util/symbol.h" 11#include "util/symbol.h"
11#include "util/thread.h" 12#include "util/thread.h"
12#include "util/trace-event.h" 13#include "util/trace-event.h"
@@ -23,6 +24,7 @@ static u64 nr_unordered;
23extern const struct option record_options[]; 24extern const struct option record_options[];
24static bool no_callchain; 25static bool no_callchain;
25static bool show_full_info; 26static bool show_full_info;
27static bool system_wide;
26static const char *cpu_list; 28static const char *cpu_list;
27static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 29static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
28 30
@@ -315,7 +317,7 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
315 317
316static void print_sample_addr(union perf_event *event, 318static void print_sample_addr(union perf_event *event,
317 struct perf_sample *sample, 319 struct perf_sample *sample,
318 struct perf_session *session, 320 struct machine *machine,
319 struct thread *thread, 321 struct thread *thread,
320 struct perf_event_attr *attr) 322 struct perf_event_attr *attr)
321{ 323{
@@ -328,11 +330,11 @@ static void print_sample_addr(union perf_event *event,
328 if (!sample_addr_correlates_sym(attr)) 330 if (!sample_addr_correlates_sym(attr))
329 return; 331 return;
330 332
331 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 333 thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
332 event->ip.pid, sample->addr, &al); 334 sample->addr, &al);
333 if (!al.map) 335 if (!al.map)
334 thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE, 336 thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
335 event->ip.pid, sample->addr, &al); 337 sample->addr, &al);
336 338
337 al.cpu = sample->cpu; 339 al.cpu = sample->cpu;
338 al.sym = NULL; 340 al.sym = NULL;
@@ -362,7 +364,7 @@ static void print_sample_addr(union perf_event *event,
362static void process_event(union perf_event *event __unused, 364static void process_event(union perf_event *event __unused,
363 struct perf_sample *sample, 365 struct perf_sample *sample,
364 struct perf_evsel *evsel, 366 struct perf_evsel *evsel,
365 struct perf_session *session, 367 struct machine *machine,
366 struct thread *thread) 368 struct thread *thread)
367{ 369{
368 struct perf_event_attr *attr = &evsel->attr; 370 struct perf_event_attr *attr = &evsel->attr;
@@ -377,15 +379,15 @@ static void process_event(union perf_event *event __unused,
377 sample->raw_size); 379 sample->raw_size);
378 380
379 if (PRINT_FIELD(ADDR)) 381 if (PRINT_FIELD(ADDR))
380 print_sample_addr(event, sample, session, thread, attr); 382 print_sample_addr(event, sample, machine, thread, attr);
381 383
382 if (PRINT_FIELD(IP)) { 384 if (PRINT_FIELD(IP)) {
383 if (!symbol_conf.use_callchain) 385 if (!symbol_conf.use_callchain)
384 printf(" "); 386 printf(" ");
385 else 387 else
386 printf("\n"); 388 printf("\n");
387 perf_session__print_ip(event, sample, session, 389 perf_event__print_ip(event, sample, machine, evsel,
388 PRINT_FIELD(SYM), PRINT_FIELD(DSO)); 390 PRINT_FIELD(SYM), PRINT_FIELD(DSO));
389 } 391 }
390 392
391 printf("\n"); 393 printf("\n");
@@ -432,14 +434,16 @@ static int cleanup_scripting(void)
432 return scripting_ops->stop_script(); 434 return scripting_ops->stop_script();
433} 435}
434 436
435static char const *input_name = "perf.data"; 437static const char *input_name;
436 438
437static int process_sample_event(union perf_event *event, 439static int process_sample_event(struct perf_tool *tool __used,
440 union perf_event *event,
438 struct perf_sample *sample, 441 struct perf_sample *sample,
439 struct perf_evsel *evsel, 442 struct perf_evsel *evsel,
440 struct perf_session *session) 443 struct machine *machine)
441{ 444{
442 struct thread *thread = perf_session__findnew(session, event->ip.pid); 445 struct addr_location al;
446 struct thread *thread = machine__findnew_thread(machine, event->ip.tid);
443 447
444 if (thread == NULL) { 448 if (thread == NULL) {
445 pr_debug("problem processing %d event, skipping it.\n", 449 pr_debug("problem processing %d event, skipping it.\n",
@@ -458,16 +462,25 @@ static int process_sample_event(union perf_event *event,
458 return 0; 462 return 0;
459 } 463 }
460 464
465 if (perf_event__preprocess_sample(event, machine, &al, sample, 0) < 0) {
466 pr_err("problem processing %d event, skipping it.\n",
467 event->header.type);
468 return -1;
469 }
470
471 if (al.filtered)
472 return 0;
473
461 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 474 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
462 return 0; 475 return 0;
463 476
464 scripting_ops->process_event(event, sample, evsel, session, thread); 477 scripting_ops->process_event(event, sample, evsel, machine, thread);
465 478
466 session->hists.stats.total_period += sample->period; 479 evsel->hists.stats.total_period += sample->period;
467 return 0; 480 return 0;
468} 481}
469 482
470static struct perf_event_ops event_ops = { 483static struct perf_tool perf_script = {
471 .sample = process_sample_event, 484 .sample = process_sample_event,
472 .mmap = perf_event__process_mmap, 485 .mmap = perf_event__process_mmap,
473 .comm = perf_event__process_comm, 486 .comm = perf_event__process_comm,
@@ -494,7 +507,7 @@ static int __cmd_script(struct perf_session *session)
494 507
495 signal(SIGINT, sig_handler); 508 signal(SIGINT, sig_handler);
496 509
497 ret = perf_session__process_events(session, &event_ops); 510 ret = perf_session__process_events(session, &perf_script);
498 511
499 if (debug_mode) 512 if (debug_mode)
500 pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); 513 pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
@@ -523,12 +536,6 @@ static struct script_spec *script_spec__new(const char *spec,
523 return s; 536 return s;
524} 537}
525 538
526static void script_spec__delete(struct script_spec *s)
527{
528 free(s->spec);
529 free(s);
530}
531
532static void script_spec__add(struct script_spec *s) 539static void script_spec__add(struct script_spec *s)
533{ 540{
534 list_add_tail(&s->node, &script_specs); 541 list_add_tail(&s->node, &script_specs);
@@ -554,16 +561,11 @@ static struct script_spec *script_spec__findnew(const char *spec,
554 561
555 s = script_spec__new(spec, ops); 562 s = script_spec__new(spec, ops);
556 if (!s) 563 if (!s)
557 goto out_delete_spec; 564 return NULL;
558 565
559 script_spec__add(s); 566 script_spec__add(s);
560 567
561 return s; 568 return s;
562
563out_delete_spec:
564 script_spec__delete(s);
565
566 return NULL;
567} 569}
568 570
569int script_spec_register(const char *spec, struct scripting_ops *ops) 571int script_spec_register(const char *spec, struct scripting_ops *ops)
@@ -681,7 +683,8 @@ static int parse_output_fields(const struct option *opt __used,
681 type = PERF_TYPE_RAW; 683 type = PERF_TYPE_RAW;
682 else { 684 else {
683 fprintf(stderr, "Invalid event type in field string.\n"); 685 fprintf(stderr, "Invalid event type in field string.\n");
684 return -EINVAL; 686 rc = -EINVAL;
687 goto out;
685 } 688 }
686 689
687 if (output[type].user_set) 690 if (output[type].user_set)
@@ -923,6 +926,24 @@ static int read_script_info(struct script_desc *desc, const char *filename)
923 return 0; 926 return 0;
924} 927}
925 928
929static char *get_script_root(struct dirent *script_dirent, const char *suffix)
930{
931 char *script_root, *str;
932
933 script_root = strdup(script_dirent->d_name);
934 if (!script_root)
935 return NULL;
936
937 str = (char *)ends_with(script_root, suffix);
938 if (!str) {
939 free(script_root);
940 return NULL;
941 }
942
943 *str = '\0';
944 return script_root;
945}
946
926static int list_available_scripts(const struct option *opt __used, 947static int list_available_scripts(const struct option *opt __used,
927 const char *s __used, int unset __used) 948 const char *s __used, int unset __used)
928{ 949{
@@ -934,7 +955,6 @@ static int list_available_scripts(const struct option *opt __used,
934 struct script_desc *desc; 955 struct script_desc *desc;
935 char first_half[BUFSIZ]; 956 char first_half[BUFSIZ];
936 char *script_root; 957 char *script_root;
937 char *str;
938 958
939 snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); 959 snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
940 960
@@ -950,16 +970,14 @@ static int list_available_scripts(const struct option *opt __used,
950 continue; 970 continue;
951 971
952 for_each_script(lang_path, lang_dir, script_dirent, script_next) { 972 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
953 script_root = strdup(script_dirent.d_name); 973 script_root = get_script_root(&script_dirent, REPORT_SUFFIX);
954 str = (char *)ends_with(script_root, REPORT_SUFFIX); 974 if (script_root) {
955 if (str) {
956 *str = '\0';
957 desc = script_desc__findnew(script_root); 975 desc = script_desc__findnew(script_root);
958 snprintf(script_path, MAXPATHLEN, "%s/%s", 976 snprintf(script_path, MAXPATHLEN, "%s/%s",
959 lang_path, script_dirent.d_name); 977 lang_path, script_dirent.d_name);
960 read_script_info(desc, script_path); 978 read_script_info(desc, script_path);
979 free(script_root);
961 } 980 }
962 free(script_root);
963 } 981 }
964 } 982 }
965 983
@@ -981,8 +999,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
981 char script_path[MAXPATHLEN]; 999 char script_path[MAXPATHLEN];
982 DIR *scripts_dir, *lang_dir; 1000 DIR *scripts_dir, *lang_dir;
983 char lang_path[MAXPATHLEN]; 1001 char lang_path[MAXPATHLEN];
984 char *str, *__script_root; 1002 char *__script_root;
985 char *path = NULL;
986 1003
987 snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); 1004 snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
988 1005
@@ -998,23 +1015,18 @@ static char *get_script_path(const char *script_root, const char *suffix)
998 continue; 1015 continue;
999 1016
1000 for_each_script(lang_path, lang_dir, script_dirent, script_next) { 1017 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
1001 __script_root = strdup(script_dirent.d_name); 1018 __script_root = get_script_root(&script_dirent, suffix);
1002 str = (char *)ends_with(__script_root, suffix); 1019 if (__script_root && !strcmp(script_root, __script_root)) {
1003 if (str) { 1020 free(__script_root);
1004 *str = '\0';
1005 if (strcmp(__script_root, script_root))
1006 continue;
1007 snprintf(script_path, MAXPATHLEN, "%s/%s", 1021 snprintf(script_path, MAXPATHLEN, "%s/%s",
1008 lang_path, script_dirent.d_name); 1022 lang_path, script_dirent.d_name);
1009 path = strdup(script_path); 1023 return strdup(script_path);
1010 free(__script_root);
1011 break;
1012 } 1024 }
1013 free(__script_root); 1025 free(__script_root);
1014 } 1026 }
1015 } 1027 }
1016 1028
1017 return path; 1029 return NULL;
1018} 1030}
1019 1031
1020static bool is_top_script(const char *script_path) 1032static bool is_top_script(const char *script_path)
@@ -1083,7 +1095,11 @@ static const struct option options[] = {
1083 OPT_CALLBACK('f', "fields", NULL, "str", 1095 OPT_CALLBACK('f', "fields", NULL, "str",
1084 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", 1096 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
1085 parse_output_fields), 1097 parse_output_fields),
1086 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"), 1098 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1099 "system-wide collection from all CPUs"),
1100 OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
1101 OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
1102 "only display events for these comms"),
1087 OPT_BOOLEAN('I', "show-info", &show_full_info, 1103 OPT_BOOLEAN('I', "show-info", &show_full_info,
1088 "display extended information from perf.data file"), 1104 "display extended information from perf.data file"),
1089 OPT_END() 1105 OPT_END()
@@ -1110,7 +1126,6 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1110 struct perf_session *session; 1126 struct perf_session *session;
1111 char *script_path = NULL; 1127 char *script_path = NULL;
1112 const char **__argv; 1128 const char **__argv;
1113 bool system_wide;
1114 int i, j, err; 1129 int i, j, err;
1115 1130
1116 setup_scripting(); 1131 setup_scripting();
@@ -1178,15 +1193,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1178 } 1193 }
1179 1194
1180 if (!pid) { 1195 if (!pid) {
1181 system_wide = true;
1182 j = 0; 1196 j = 0;
1183 1197
1184 dup2(live_pipe[1], 1); 1198 dup2(live_pipe[1], 1);
1185 close(live_pipe[0]); 1199 close(live_pipe[0]);
1186 1200
1187 if (!is_top_script(argv[0])) 1201 if (is_top_script(argv[0])) {
1202 system_wide = true;
1203 } else if (!system_wide) {
1188 system_wide = !have_cmd(argc - rep_args, 1204 system_wide = !have_cmd(argc - rep_args,
1189 &argv[rep_args]); 1205 &argv[rep_args]);
1206 }
1190 1207
1191 __argv = malloc((argc + 6) * sizeof(const char *)); 1208 __argv = malloc((argc + 6) * sizeof(const char *));
1192 if (!__argv) 1209 if (!__argv)
@@ -1234,10 +1251,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1234 script_path = rep_script_path; 1251 script_path = rep_script_path;
1235 1252
1236 if (script_path) { 1253 if (script_path) {
1237 system_wide = false;
1238 j = 0; 1254 j = 0;
1239 1255
1240 if (rec_script_path) 1256 if (!rec_script_path)
1257 system_wide = false;
1258 else if (!system_wide)
1241 system_wide = !have_cmd(argc - 1, &argv[1]); 1259 system_wide = !have_cmd(argc - 1, &argv[1]);
1242 1260
1243 __argv = malloc((argc + 2) * sizeof(const char *)); 1261 __argv = malloc((argc + 2) * sizeof(const char *));
@@ -1261,7 +1279,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1261 if (!script_name) 1279 if (!script_name)
1262 setup_pager(); 1280 setup_pager();
1263 1281
1264 session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops); 1282 session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script);
1265 if (session == NULL) 1283 if (session == NULL)
1266 return -ENOMEM; 1284 return -ENOMEM;
1267 1285
@@ -1287,7 +1305,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1287 return -1; 1305 return -1;
1288 } 1306 }
1289 1307
1290 input = open(input_name, O_RDONLY); 1308 input = open(session->filename, O_RDONLY); /* input_name */
1291 if (input < 0) { 1309 if (input < 0) {
1292 perror("failed to open file"); 1310 perror("failed to open file");
1293 exit(-1); 1311 exit(-1);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 955930e0a5c3..f5d2a63eba66 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -578,6 +578,33 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
578 avg / avg_stats(&walltime_nsecs_stats)); 578 avg / avg_stats(&walltime_nsecs_stats));
579} 579}
580 580
581/* used for get_ratio_color() */
582enum grc_type {
583 GRC_STALLED_CYCLES_FE,
584 GRC_STALLED_CYCLES_BE,
585 GRC_CACHE_MISSES,
586 GRC_MAX_NR
587};
588
589static const char *get_ratio_color(enum grc_type type, double ratio)
590{
591 static const double grc_table[GRC_MAX_NR][3] = {
592 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
593 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
594 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
595 };
596 const char *color = PERF_COLOR_NORMAL;
597
598 if (ratio > grc_table[type][0])
599 color = PERF_COLOR_RED;
600 else if (ratio > grc_table[type][1])
601 color = PERF_COLOR_MAGENTA;
602 else if (ratio > grc_table[type][2])
603 color = PERF_COLOR_YELLOW;
604
605 return color;
606}
607
581static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg) 608static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
582{ 609{
583 double total, ratio = 0.0; 610 double total, ratio = 0.0;
@@ -588,13 +615,7 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us
588 if (total) 615 if (total)
589 ratio = avg / total * 100.0; 616 ratio = avg / total * 100.0;
590 617
591 color = PERF_COLOR_NORMAL; 618 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
592 if (ratio > 50.0)
593 color = PERF_COLOR_RED;
594 else if (ratio > 30.0)
595 color = PERF_COLOR_MAGENTA;
596 else if (ratio > 10.0)
597 color = PERF_COLOR_YELLOW;
598 619
599 fprintf(output, " # "); 620 fprintf(output, " # ");
600 color_fprintf(output, color, "%6.2f%%", ratio); 621 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -611,13 +632,7 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use
611 if (total) 632 if (total)
612 ratio = avg / total * 100.0; 633 ratio = avg / total * 100.0;
613 634
614 color = PERF_COLOR_NORMAL; 635 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
615 if (ratio > 75.0)
616 color = PERF_COLOR_RED;
617 else if (ratio > 50.0)
618 color = PERF_COLOR_MAGENTA;
619 else if (ratio > 20.0)
620 color = PERF_COLOR_YELLOW;
621 636
622 fprintf(output, " # "); 637 fprintf(output, " # ");
623 color_fprintf(output, color, "%6.2f%%", ratio); 638 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -634,13 +649,7 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double
634 if (total) 649 if (total)
635 ratio = avg / total * 100.0; 650 ratio = avg / total * 100.0;
636 651
637 color = PERF_COLOR_NORMAL; 652 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
638 if (ratio > 20.0)
639 color = PERF_COLOR_RED;
640 else if (ratio > 10.0)
641 color = PERF_COLOR_MAGENTA;
642 else if (ratio > 5.0)
643 color = PERF_COLOR_YELLOW;
644 653
645 fprintf(output, " # "); 654 fprintf(output, " # ");
646 color_fprintf(output, color, "%6.2f%%", ratio); 655 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -657,13 +666,7 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
657 if (total) 666 if (total)
658 ratio = avg / total * 100.0; 667 ratio = avg / total * 100.0;
659 668
660 color = PERF_COLOR_NORMAL; 669 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
661 if (ratio > 20.0)
662 color = PERF_COLOR_RED;
663 else if (ratio > 10.0)
664 color = PERF_COLOR_MAGENTA;
665 else if (ratio > 5.0)
666 color = PERF_COLOR_YELLOW;
667 670
668 fprintf(output, " # "); 671 fprintf(output, " # ");
669 color_fprintf(output, color, "%6.2f%%", ratio); 672 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -680,13 +683,7 @@ static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, dou
680 if (total) 683 if (total)
681 ratio = avg / total * 100.0; 684 ratio = avg / total * 100.0;
682 685
683 color = PERF_COLOR_NORMAL; 686 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
684 if (ratio > 20.0)
685 color = PERF_COLOR_RED;
686 else if (ratio > 10.0)
687 color = PERF_COLOR_MAGENTA;
688 else if (ratio > 5.0)
689 color = PERF_COLOR_YELLOW;
690 687
691 fprintf(output, " # "); 688 fprintf(output, " # ");
692 color_fprintf(output, color, "%6.2f%%", ratio); 689 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -703,13 +700,7 @@ static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do
703 if (total) 700 if (total)
704 ratio = avg / total * 100.0; 701 ratio = avg / total * 100.0;
705 702
706 color = PERF_COLOR_NORMAL; 703 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
707 if (ratio > 20.0)
708 color = PERF_COLOR_RED;
709 else if (ratio > 10.0)
710 color = PERF_COLOR_MAGENTA;
711 else if (ratio > 5.0)
712 color = PERF_COLOR_YELLOW;
713 704
714 fprintf(output, " # "); 705 fprintf(output, " # ");
715 color_fprintf(output, color, "%6.2f%%", ratio); 706 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -726,13 +717,7 @@ static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do
726 if (total) 717 if (total)
727 ratio = avg / total * 100.0; 718 ratio = avg / total * 100.0;
728 719
729 color = PERF_COLOR_NORMAL; 720 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
730 if (ratio > 20.0)
731 color = PERF_COLOR_RED;
732 else if (ratio > 10.0)
733 color = PERF_COLOR_MAGENTA;
734 else if (ratio > 5.0)
735 color = PERF_COLOR_YELLOW;
736 721
737 fprintf(output, " # "); 722 fprintf(output, " # ");
738 color_fprintf(output, color, "%6.2f%%", ratio); 723 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -749,13 +734,7 @@ static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, doub
749 if (total) 734 if (total)
750 ratio = avg / total * 100.0; 735 ratio = avg / total * 100.0;
751 736
752 color = PERF_COLOR_NORMAL; 737 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
753 if (ratio > 20.0)
754 color = PERF_COLOR_RED;
755 else if (ratio > 10.0)
756 color = PERF_COLOR_MAGENTA;
757 else if (ratio > 5.0)
758 color = PERF_COLOR_YELLOW;
759 738
760 fprintf(output, " # "); 739 fprintf(output, " # ");
761 color_fprintf(output, color, "%6.2f%%", ratio); 740 color_fprintf(output, color, "%6.2f%%", ratio);
@@ -1108,22 +1087,13 @@ static const struct option options[] = {
1108 */ 1087 */
1109static int add_default_attributes(void) 1088static int add_default_attributes(void)
1110{ 1089{
1111 struct perf_evsel *pos;
1112 size_t attr_nr = 0;
1113 size_t c;
1114
1115 /* Set attrs if no event is selected and !null_run: */ 1090 /* Set attrs if no event is selected and !null_run: */
1116 if (null_run) 1091 if (null_run)
1117 return 0; 1092 return 0;
1118 1093
1119 if (!evsel_list->nr_entries) { 1094 if (!evsel_list->nr_entries) {
1120 for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { 1095 if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0)
1121 pos = perf_evsel__new(default_attrs + c, c + attr_nr); 1096 return -1;
1122 if (pos == NULL)
1123 return -1;
1124 perf_evlist__add(evsel_list, pos);
1125 }
1126 attr_nr += c;
1127 } 1097 }
1128 1098
1129 /* Detailed events get appended to the event list: */ 1099 /* Detailed events get appended to the event list: */
@@ -1132,38 +1102,21 @@ static int add_default_attributes(void)
1132 return 0; 1102 return 0;
1133 1103
1134 /* Append detailed run extra attributes: */ 1104 /* Append detailed run extra attributes: */
1135 for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { 1105 if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0)
1136 pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); 1106 return -1;
1137 if (pos == NULL)
1138 return -1;
1139 perf_evlist__add(evsel_list, pos);
1140 }
1141 attr_nr += c;
1142 1107
1143 if (detailed_run < 2) 1108 if (detailed_run < 2)
1144 return 0; 1109 return 0;
1145 1110
1146 /* Append very detailed run extra attributes: */ 1111 /* Append very detailed run extra attributes: */
1147 for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { 1112 if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0)
1148 pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); 1113 return -1;
1149 if (pos == NULL)
1150 return -1;
1151 perf_evlist__add(evsel_list, pos);
1152 }
1153 1114
1154 if (detailed_run < 3) 1115 if (detailed_run < 3)
1155 return 0; 1116 return 0;
1156 1117
1157 /* Append very, very detailed run extra attributes: */ 1118 /* Append very, very detailed run extra attributes: */
1158 for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { 1119 return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs);
1159 pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr);
1160 if (pos == NULL)
1161 return -1;
1162 perf_evlist__add(evsel_list, pos);
1163 }
1164
1165
1166 return 0;
1167} 1120}
1168 1121
1169int cmd_stat(int argc, const char **argv, const char *prefix __used) 1122int cmd_stat(int argc, const char **argv, const char *prefix __used)
@@ -1267,8 +1220,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
1267 1220
1268 list_for_each_entry(pos, &evsel_list->entries, node) { 1221 list_for_each_entry(pos, &evsel_list->entries, node) {
1269 if (perf_evsel__alloc_stat_priv(pos) < 0 || 1222 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1270 perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 || 1223 perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
1271 perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
1272 goto out_free_fd; 1224 goto out_free_fd;
1273 } 1225 }
1274 1226
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 831d1baeac37..2b9a7f497a20 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,6 +7,7 @@
7 7
8#include "util/cache.h" 8#include "util/cache.h"
9#include "util/debug.h" 9#include "util/debug.h"
10#include "util/debugfs.h"
10#include "util/evlist.h" 11#include "util/evlist.h"
11#include "util/parse-options.h" 12#include "util/parse-options.h"
12#include "util/parse-events.h" 13#include "util/parse-events.h"
@@ -14,8 +15,6 @@
14#include "util/thread_map.h" 15#include "util/thread_map.h"
15#include "../../include/linux/hw_breakpoint.h" 16#include "../../include/linux/hw_breakpoint.h"
16 17
17static long page_size;
18
19static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) 18static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
20{ 19{
21 bool *visited = symbol__priv(sym); 20 bool *visited = symbol__priv(sym);
@@ -31,6 +30,7 @@ static int test__vmlinux_matches_kallsyms(void)
31 struct map *kallsyms_map, *vmlinux_map; 30 struct map *kallsyms_map, *vmlinux_map;
32 struct machine kallsyms, vmlinux; 31 struct machine kallsyms, vmlinux;
33 enum map_type type = MAP__FUNCTION; 32 enum map_type type = MAP__FUNCTION;
33 long page_size = sysconf(_SC_PAGE_SIZE);
34 struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", }; 34 struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
35 35
36 /* 36 /*
@@ -247,7 +247,7 @@ static int trace_event__id(const char *evname)
247 247
248 if (asprintf(&filename, 248 if (asprintf(&filename,
249 "%s/syscalls/%s/id", 249 "%s/syscalls/%s/id",
250 debugfs_path, evname) < 0) 250 tracing_events_path, evname) < 0)
251 return -1; 251 return -1;
252 252
253 fd = open(filename, O_RDONLY); 253 fd = open(filename, O_RDONLY);
@@ -603,7 +603,7 @@ out_free_threads:
603 603
604#define TEST_ASSERT_VAL(text, cond) \ 604#define TEST_ASSERT_VAL(text, cond) \
605do { \ 605do { \
606 if (!cond) { \ 606 if (!(cond)) { \
607 pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ 607 pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
608 return -1; \ 608 return -1; \
609 } \ 609 } \
@@ -759,6 +759,103 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
759 return 0; 759 return 0;
760} 760}
761 761
762static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist)
763{
764 struct perf_evsel *evsel = list_entry(evlist->entries.next,
765 struct perf_evsel, node);
766
767 TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
768 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
769 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
770 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
771
772 return test__checkevent_tracepoint(evlist);
773}
774
775static int
776test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist)
777{
778 struct perf_evsel *evsel;
779
780 TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
781
782 list_for_each_entry(evsel, &evlist->entries, node) {
783 TEST_ASSERT_VAL("wrong exclude_user",
784 !evsel->attr.exclude_user);
785 TEST_ASSERT_VAL("wrong exclude_kernel",
786 evsel->attr.exclude_kernel);
787 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
788 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
789 }
790
791 return test__checkevent_tracepoint_multi(evlist);
792}
793
794static int test__checkevent_raw_modifier(struct perf_evlist *evlist)
795{
796 struct perf_evsel *evsel = list_entry(evlist->entries.next,
797 struct perf_evsel, node);
798
799 TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
800 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
801 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
802 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
803
804 return test__checkevent_raw(evlist);
805}
806
807static int test__checkevent_numeric_modifier(struct perf_evlist *evlist)
808{
809 struct perf_evsel *evsel = list_entry(evlist->entries.next,
810 struct perf_evsel, node);
811
812 TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
813 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
814 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
815 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
816
817 return test__checkevent_numeric(evlist);
818}
819
820static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist)
821{
822 struct perf_evsel *evsel = list_entry(evlist->entries.next,
823 struct perf_evsel, node);
824
825 TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
826 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
827 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
828 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
829
830 return test__checkevent_symbolic_name(evlist);
831}
832
833static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist)
834{
835 struct perf_evsel *evsel = list_entry(evlist->entries.next,
836 struct perf_evsel, node);
837
838 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
839 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
840 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
841 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
842
843 return test__checkevent_symbolic_alias(evlist);
844}
845
846static int test__checkevent_genhw_modifier(struct perf_evlist *evlist)
847{
848 struct perf_evsel *evsel = list_entry(evlist->entries.next,
849 struct perf_evsel, node);
850
851 TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
852 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
853 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
854 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
855
856 return test__checkevent_genhw(evlist);
857}
858
762static struct test__event_st { 859static struct test__event_st {
763 const char *name; 860 const char *name;
764 __u32 type; 861 __u32 type;
@@ -808,6 +905,34 @@ static struct test__event_st {
808 .name = "mem:0:w", 905 .name = "mem:0:w",
809 .check = test__checkevent_breakpoint_w, 906 .check = test__checkevent_breakpoint_w,
810 }, 907 },
908 {
909 .name = "syscalls:sys_enter_open:k",
910 .check = test__checkevent_tracepoint_modifier,
911 },
912 {
913 .name = "syscalls:*:u",
914 .check = test__checkevent_tracepoint_multi_modifier,
915 },
916 {
917 .name = "r1:kp",
918 .check = test__checkevent_raw_modifier,
919 },
920 {
921 .name = "1:1:hp",
922 .check = test__checkevent_numeric_modifier,
923 },
924 {
925 .name = "instructions:h",
926 .check = test__checkevent_symbolic_name_modifier,
927 },
928 {
929 .name = "faults:u",
930 .check = test__checkevent_symbolic_alias_modifier,
931 },
932 {
933 .name = "L1-dcache-load-miss:kp",
934 .check = test__checkevent_genhw_modifier,
935 },
811}; 936};
812 937
813#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st)) 938#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st))
@@ -841,6 +966,336 @@ static int test__parse_events(void)
841 966
842 return ret; 967 return ret;
843} 968}
969
970static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t **maskp,
971 size_t *sizep)
972{
973 cpu_set_t *mask;
974 size_t size;
975 int i, cpu = -1, nrcpus = 1024;
976realloc:
977 mask = CPU_ALLOC(nrcpus);
978 size = CPU_ALLOC_SIZE(nrcpus);
979 CPU_ZERO_S(size, mask);
980
981 if (sched_getaffinity(pid, size, mask) == -1) {
982 CPU_FREE(mask);
983 if (errno == EINVAL && nrcpus < (1024 << 8)) {
984 nrcpus = nrcpus << 2;
985 goto realloc;
986 }
987 perror("sched_getaffinity");
988 return -1;
989 }
990
991 for (i = 0; i < nrcpus; i++) {
992 if (CPU_ISSET_S(i, size, mask)) {
993 if (cpu == -1) {
994 cpu = i;
995 *maskp = mask;
996 *sizep = size;
997 } else
998 CPU_CLR_S(i, size, mask);
999 }
1000 }
1001
1002 if (cpu == -1)
1003 CPU_FREE(mask);
1004
1005 return cpu;
1006}
1007
1008static int test__PERF_RECORD(void)
1009{
1010 struct perf_record_opts opts = {
1011 .target_pid = -1,
1012 .target_tid = -1,
1013 .no_delay = true,
1014 .freq = 10,
1015 .mmap_pages = 256,
1016 .sample_id_all_avail = true,
1017 };
1018 cpu_set_t *cpu_mask = NULL;
1019 size_t cpu_mask_size = 0;
1020 struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
1021 struct perf_evsel *evsel;
1022 struct perf_sample sample;
1023 const char *cmd = "sleep";
1024 const char *argv[] = { cmd, "1", NULL, };
1025 char *bname;
1026 u64 sample_type, prev_time = 0;
1027 bool found_cmd_mmap = false,
1028 found_libc_mmap = false,
1029 found_vdso_mmap = false,
1030 found_ld_mmap = false;
1031 int err = -1, errs = 0, i, wakeups = 0, sample_size;
1032 u32 cpu;
1033 int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
1034
1035 if (evlist == NULL || argv == NULL) {
1036 pr_debug("Not enough memory to create evlist\n");
1037 goto out;
1038 }
1039
1040 /*
1041 * We need at least one evsel in the evlist, use the default
1042 * one: "cycles".
1043 */
1044 err = perf_evlist__add_default(evlist);
1045 if (err < 0) {
1046 pr_debug("Not enough memory to create evsel\n");
1047 goto out_delete_evlist;
1048 }
1049
1050 /*
1051 * Create maps of threads and cpus to monitor. In this case
1052 * we start with all threads and cpus (-1, -1) but then in
1053 * perf_evlist__prepare_workload we'll fill in the only thread
1054 * we're monitoring, the one forked there.
1055 */
1056 err = perf_evlist__create_maps(evlist, opts.target_pid,
1057 opts.target_tid, opts.cpu_list);
1058 if (err < 0) {
1059 pr_debug("Not enough memory to create thread/cpu maps\n");
1060 goto out_delete_evlist;
1061 }
1062
1063 /*
1064 * Prepare the workload in argv[] to run, it'll fork it, and then wait
1065 * for perf_evlist__start_workload() to exec it. This is done this way
1066 * so that we have time to open the evlist (calling sys_perf_event_open
1067 * on all the fds) and then mmap them.
1068 */
1069 err = perf_evlist__prepare_workload(evlist, &opts, argv);
1070 if (err < 0) {
1071 pr_debug("Couldn't run the workload!\n");
1072 goto out_delete_evlist;
1073 }
1074
1075 /*
1076 * Config the evsels, setting attr->comm on the first one, etc.
1077 */
1078 evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
1079 evsel->attr.sample_type |= PERF_SAMPLE_CPU;
1080 evsel->attr.sample_type |= PERF_SAMPLE_TID;
1081 evsel->attr.sample_type |= PERF_SAMPLE_TIME;
1082 perf_evlist__config_attrs(evlist, &opts);
1083
1084 err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask,
1085 &cpu_mask_size);
1086 if (err < 0) {
1087 pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
1088 goto out_delete_evlist;
1089 }
1090
1091 cpu = err;
1092
1093 /*
1094 * So that we can check perf_sample.cpu on all the samples.
1095 */
1096 if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
1097 pr_debug("sched_setaffinity: %s\n", strerror(errno));
1098 goto out_free_cpu_mask;
1099 }
1100
1101 /*
1102 * Call sys_perf_event_open on all the fds on all the evsels,
1103 * grouping them if asked to.
1104 */
1105 err = perf_evlist__open(evlist, opts.group);
1106 if (err < 0) {
1107 pr_debug("perf_evlist__open: %s\n", strerror(errno));
1108 goto out_delete_evlist;
1109 }
1110
1111 /*
1112 * mmap the first fd on a given CPU and ask for events for the other
1113 * fds in the same CPU to be injected in the same mmap ring buffer
1114 * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
1115 */
1116 err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
1117 if (err < 0) {
1118 pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
1119 goto out_delete_evlist;
1120 }
1121
1122 /*
1123 * We'll need these two to parse the PERF_SAMPLE_* fields in each
1124 * event.
1125 */
1126 sample_type = perf_evlist__sample_type(evlist);
1127 sample_size = __perf_evsel__sample_size(sample_type);
1128
1129 /*
1130 * Now that all is properly set up, enable the events, they will
1131 * count just on workload.pid, which will start...
1132 */
1133 perf_evlist__enable(evlist);
1134
1135 /*
1136 * Now!
1137 */
1138 perf_evlist__start_workload(evlist);
1139
1140 while (1) {
1141 int before = total_events;
1142
1143 for (i = 0; i < evlist->nr_mmaps; i++) {
1144 union perf_event *event;
1145
1146 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1147 const u32 type = event->header.type;
1148 const char *name = perf_event__name(type);
1149
1150 ++total_events;
1151 if (type < PERF_RECORD_MAX)
1152 nr_events[type]++;
1153
1154 err = perf_event__parse_sample(event, sample_type,
1155 sample_size, true,
1156 &sample, false);
1157 if (err < 0) {
1158 if (verbose)
1159 perf_event__fprintf(event, stderr);
1160 pr_debug("Couldn't parse sample\n");
1161 goto out_err;
1162 }
1163
1164 if (verbose) {
1165 pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
1166 perf_event__fprintf(event, stderr);
1167 }
1168
1169 if (prev_time > sample.time) {
1170 pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n",
1171 name, prev_time, sample.time);
1172 ++errs;
1173 }
1174
1175 prev_time = sample.time;
1176
1177 if (sample.cpu != cpu) {
1178 pr_debug("%s with unexpected cpu, expected %d, got %d\n",
1179 name, cpu, sample.cpu);
1180 ++errs;
1181 }
1182
1183 if ((pid_t)sample.pid != evlist->workload.pid) {
1184 pr_debug("%s with unexpected pid, expected %d, got %d\n",
1185 name, evlist->workload.pid, sample.pid);
1186 ++errs;
1187 }
1188
1189 if ((pid_t)sample.tid != evlist->workload.pid) {
1190 pr_debug("%s with unexpected tid, expected %d, got %d\n",
1191 name, evlist->workload.pid, sample.tid);
1192 ++errs;
1193 }
1194
1195 if ((type == PERF_RECORD_COMM ||
1196 type == PERF_RECORD_MMAP ||
1197 type == PERF_RECORD_FORK ||
1198 type == PERF_RECORD_EXIT) &&
1199 (pid_t)event->comm.pid != evlist->workload.pid) {
1200 pr_debug("%s with unexpected pid/tid\n", name);
1201 ++errs;
1202 }
1203
1204 if ((type == PERF_RECORD_COMM ||
1205 type == PERF_RECORD_MMAP) &&
1206 event->comm.pid != event->comm.tid) {
1207 pr_debug("%s with different pid/tid!\n", name);
1208 ++errs;
1209 }
1210
1211 switch (type) {
1212 case PERF_RECORD_COMM:
1213 if (strcmp(event->comm.comm, cmd)) {
1214 pr_debug("%s with unexpected comm!\n", name);
1215 ++errs;
1216 }
1217 break;
1218 case PERF_RECORD_EXIT:
1219 goto found_exit;
1220 case PERF_RECORD_MMAP:
1221 bname = strrchr(event->mmap.filename, '/');
1222 if (bname != NULL) {
1223 if (!found_cmd_mmap)
1224 found_cmd_mmap = !strcmp(bname + 1, cmd);
1225 if (!found_libc_mmap)
1226 found_libc_mmap = !strncmp(bname + 1, "libc", 4);
1227 if (!found_ld_mmap)
1228 found_ld_mmap = !strncmp(bname + 1, "ld", 2);
1229 } else if (!found_vdso_mmap)
1230 found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]");
1231 break;
1232
1233 case PERF_RECORD_SAMPLE:
1234 /* Just ignore samples for now */
1235 break;
1236 default:
1237 pr_debug("Unexpected perf_event->header.type %d!\n",
1238 type);
1239 ++errs;
1240 }
1241 }
1242 }
1243
1244 /*
1245 * We don't use poll here because at least at 3.1 times the
1246 * PERF_RECORD_{!SAMPLE} events don't honour
1247 * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
1248 */
1249 if (total_events == before && false)
1250 poll(evlist->pollfd, evlist->nr_fds, -1);
1251
1252 sleep(1);
1253 if (++wakeups > 5) {
1254 pr_debug("No PERF_RECORD_EXIT event!\n");
1255 break;
1256 }
1257 }
1258
1259found_exit:
1260 if (nr_events[PERF_RECORD_COMM] > 1) {
1261 pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
1262 ++errs;
1263 }
1264
1265 if (nr_events[PERF_RECORD_COMM] == 0) {
1266 pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd);
1267 ++errs;
1268 }
1269
1270 if (!found_cmd_mmap) {
1271 pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
1272 ++errs;
1273 }
1274
1275 if (!found_libc_mmap) {
1276 pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc");
1277 ++errs;
1278 }
1279
1280 if (!found_ld_mmap) {
1281 pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld");
1282 ++errs;
1283 }
1284
1285 if (!found_vdso_mmap) {
1286 pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
1287 ++errs;
1288 }
1289out_err:
1290 perf_evlist__munmap(evlist);
1291out_free_cpu_mask:
1292 CPU_FREE(cpu_mask);
1293out_delete_evlist:
1294 perf_evlist__delete(evlist);
1295out:
1296 return (err < 0 || errs > 0) ? -1 : 0;
1297}
1298
844static struct test { 1299static struct test {
845 const char *desc; 1300 const char *desc;
846 int (*func)(void); 1301 int (*func)(void);
@@ -866,45 +1321,89 @@ static struct test {
866 .func = test__parse_events, 1321 .func = test__parse_events,
867 }, 1322 },
868 { 1323 {
1324 .desc = "Validate PERF_RECORD_* events & perf_sample fields",
1325 .func = test__PERF_RECORD,
1326 },
1327 {
869 .func = NULL, 1328 .func = NULL,
870 }, 1329 },
871}; 1330};
872 1331
873static int __cmd_test(void) 1332static bool perf_test__matches(int curr, int argc, const char *argv[])
874{ 1333{
875 int i = 0; 1334 int i;
1335
1336 if (argc == 0)
1337 return true;
876 1338
877 page_size = sysconf(_SC_PAGE_SIZE); 1339 for (i = 0; i < argc; ++i) {
1340 char *end;
1341 long nr = strtoul(argv[i], &end, 10);
1342
1343 if (*end == '\0') {
1344 if (nr == curr + 1)
1345 return true;
1346 continue;
1347 }
1348
1349 if (strstr(tests[curr].desc, argv[i]))
1350 return true;
1351 }
1352
1353 return false;
1354}
1355
1356static int __cmd_test(int argc, const char *argv[])
1357{
1358 int i = 0;
878 1359
879 while (tests[i].func) { 1360 while (tests[i].func) {
880 int err; 1361 int curr = i++, err;
881 pr_info("%2d: %s:", i + 1, tests[i].desc); 1362
1363 if (!perf_test__matches(curr, argc, argv))
1364 continue;
1365
1366 pr_info("%2d: %s:", i, tests[curr].desc);
882 pr_debug("\n--- start ---\n"); 1367 pr_debug("\n--- start ---\n");
883 err = tests[i].func(); 1368 err = tests[curr].func();
884 pr_debug("---- end ----\n%s:", tests[i].desc); 1369 pr_debug("---- end ----\n%s:", tests[curr].desc);
885 pr_info(" %s\n", err ? "FAILED!\n" : "Ok"); 1370 pr_info(" %s\n", err ? "FAILED!\n" : "Ok");
886 ++i;
887 } 1371 }
888 1372
889 return 0; 1373 return 0;
890} 1374}
891 1375
892static const char * const test_usage[] = { 1376static int perf_test__list(int argc, const char **argv)
893 "perf test [<options>]", 1377{
894 NULL, 1378 int i = 0;
895}; 1379
1380 while (tests[i].func) {
1381 int curr = i++;
896 1382
897static const struct option test_options[] = { 1383 if (argc > 1 && !strstr(tests[curr].desc, argv[1]))
1384 continue;
1385
1386 pr_info("%2d: %s\n", i, tests[curr].desc);
1387 }
1388
1389 return 0;
1390}
1391
1392int cmd_test(int argc, const char **argv, const char *prefix __used)
1393{
1394 const char * const test_usage[] = {
1395 "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
1396 NULL,
1397 };
1398 const struct option test_options[] = {
898 OPT_INTEGER('v', "verbose", &verbose, 1399 OPT_INTEGER('v', "verbose", &verbose,
899 "be more verbose (show symbol address, etc)"), 1400 "be more verbose (show symbol address, etc)"),
900 OPT_END() 1401 OPT_END()
901}; 1402 };
902 1403
903int cmd_test(int argc, const char **argv, const char *prefix __used)
904{
905 argc = parse_options(argc, argv, test_options, test_usage, 0); 1404 argc = parse_options(argc, argv, test_options, test_usage, 0);
906 if (argc) 1405 if (argc >= 1 && !strcmp(argv[0], "list"))
907 usage_with_options(test_usage, test_options); 1406 return perf_test__list(argc, argv);
908 1407
909 symbol_conf.priv_size = sizeof(int); 1408 symbol_conf.priv_size = sizeof(int);
910 symbol_conf.sort_by_name = true; 1409 symbol_conf.sort_by_name = true;
@@ -915,5 +1414,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
915 1414
916 setup_pager(); 1415 setup_pager();
917 1416
918 return __cmd_test(); 1417 return __cmd_test(argc, argv);
919} 1418}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index aa26f4d66d10..3b75b2e21ea5 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -19,6 +19,7 @@
19#include "util/color.h" 19#include "util/color.h"
20#include <linux/list.h> 20#include <linux/list.h>
21#include "util/cache.h" 21#include "util/cache.h"
22#include "util/evsel.h"
22#include <linux/rbtree.h> 23#include <linux/rbtree.h>
23#include "util/symbol.h" 24#include "util/symbol.h"
24#include "util/callchain.h" 25#include "util/callchain.h"
@@ -31,13 +32,14 @@
31#include "util/event.h" 32#include "util/event.h"
32#include "util/session.h" 33#include "util/session.h"
33#include "util/svghelper.h" 34#include "util/svghelper.h"
35#include "util/tool.h"
34 36
35#define SUPPORT_OLD_POWER_EVENTS 1 37#define SUPPORT_OLD_POWER_EVENTS 1
36#define PWR_EVENT_EXIT -1 38#define PWR_EVENT_EXIT -1
37 39
38 40
39static char const *input_name = "perf.data"; 41static const char *input_name;
40static char const *output_name = "output.svg"; 42static const char *output_name = "output.svg";
41 43
42static unsigned int numcpus; 44static unsigned int numcpus;
43static u64 min_freq; /* Lowest CPU frequency seen */ 45static u64 min_freq; /* Lowest CPU frequency seen */
@@ -273,25 +275,28 @@ static int cpus_cstate_state[MAX_CPUS];
273static u64 cpus_pstate_start_times[MAX_CPUS]; 275static u64 cpus_pstate_start_times[MAX_CPUS];
274static u64 cpus_pstate_state[MAX_CPUS]; 276static u64 cpus_pstate_state[MAX_CPUS];
275 277
276static int process_comm_event(union perf_event *event, 278static int process_comm_event(struct perf_tool *tool __used,
279 union perf_event *event,
277 struct perf_sample *sample __used, 280 struct perf_sample *sample __used,
278 struct perf_session *session __used) 281 struct machine *machine __used)
279{ 282{
280 pid_set_comm(event->comm.tid, event->comm.comm); 283 pid_set_comm(event->comm.tid, event->comm.comm);
281 return 0; 284 return 0;
282} 285}
283 286
284static int process_fork_event(union perf_event *event, 287static int process_fork_event(struct perf_tool *tool __used,
288 union perf_event *event,
285 struct perf_sample *sample __used, 289 struct perf_sample *sample __used,
286 struct perf_session *session __used) 290 struct machine *machine __used)
287{ 291{
288 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 292 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
289 return 0; 293 return 0;
290} 294}
291 295
292static int process_exit_event(union perf_event *event, 296static int process_exit_event(struct perf_tool *tool __used,
297 union perf_event *event,
293 struct perf_sample *sample __used, 298 struct perf_sample *sample __used,
294 struct perf_session *session __used) 299 struct machine *machine __used)
295{ 300{
296 pid_exit(event->fork.pid, event->fork.time); 301 pid_exit(event->fork.pid, event->fork.time);
297 return 0; 302 return 0;
@@ -486,14 +491,15 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
486} 491}
487 492
488 493
489static int process_sample_event(union perf_event *event __used, 494static int process_sample_event(struct perf_tool *tool __used,
495 union perf_event *event __used,
490 struct perf_sample *sample, 496 struct perf_sample *sample,
491 struct perf_evsel *evsel __used, 497 struct perf_evsel *evsel,
492 struct perf_session *session) 498 struct machine *machine __used)
493{ 499{
494 struct trace_entry *te; 500 struct trace_entry *te;
495 501
496 if (session->sample_type & PERF_SAMPLE_TIME) { 502 if (evsel->attr.sample_type & PERF_SAMPLE_TIME) {
497 if (!first_time || first_time > sample->time) 503 if (!first_time || first_time > sample->time)
498 first_time = sample->time; 504 first_time = sample->time;
499 if (last_time < sample->time) 505 if (last_time < sample->time)
@@ -501,7 +507,7 @@ static int process_sample_event(union perf_event *event __used,
501 } 507 }
502 508
503 te = (void *)sample->raw_data; 509 te = (void *)sample->raw_data;
504 if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) { 510 if ((evsel->attr.sample_type & PERF_SAMPLE_RAW) && sample->raw_size > 0) {
505 char *event_str; 511 char *event_str;
506#ifdef SUPPORT_OLD_POWER_EVENTS 512#ifdef SUPPORT_OLD_POWER_EVENTS
507 struct power_entry_old *peo; 513 struct power_entry_old *peo;
@@ -974,7 +980,7 @@ static void write_svg_file(const char *filename)
974 svg_close(); 980 svg_close();
975} 981}
976 982
977static struct perf_event_ops event_ops = { 983static struct perf_tool perf_timechart = {
978 .comm = process_comm_event, 984 .comm = process_comm_event,
979 .fork = process_fork_event, 985 .fork = process_fork_event,
980 .exit = process_exit_event, 986 .exit = process_exit_event,
@@ -985,7 +991,7 @@ static struct perf_event_ops event_ops = {
985static int __cmd_timechart(void) 991static int __cmd_timechart(void)
986{ 992{
987 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 993 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
988 0, false, &event_ops); 994 0, false, &perf_timechart);
989 int ret = -EINVAL; 995 int ret = -EINVAL;
990 996
991 if (session == NULL) 997 if (session == NULL)
@@ -994,7 +1000,7 @@ static int __cmd_timechart(void)
994 if (!perf_session__has_traces(session, "timechart record")) 1000 if (!perf_session__has_traces(session, "timechart record"))
995 goto out_delete; 1001 goto out_delete;
996 1002
997 ret = perf_session__process_events(session, &event_ops); 1003 ret = perf_session__process_events(session, &perf_timechart);
998 if (ret) 1004 if (ret)
999 goto out_delete; 1005 goto out_delete;
1000 1006
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c9cdedb58134..4f81eeb99875 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -64,44 +64,6 @@
64#include <linux/unistd.h> 64#include <linux/unistd.h>
65#include <linux/types.h> 65#include <linux/types.h>
66 66
67static struct perf_top top = {
68 .count_filter = 5,
69 .delay_secs = 2,
70 .target_pid = -1,
71 .target_tid = -1,
72 .freq = 1000, /* 1 KHz */
73};
74
75static bool system_wide = false;
76
77static bool use_tui, use_stdio;
78
79static bool sort_has_symbols;
80
81static bool dont_use_callchains;
82static char callchain_default_opt[] = "fractal,0.5,callee";
83
84
85static int default_interval = 0;
86
87static bool kptr_restrict_warned;
88static bool vmlinux_warned;
89static bool inherit = false;
90static int realtime_prio = 0;
91static bool group = false;
92static bool sample_id_all_avail = true;
93static unsigned int mmap_pages = 128;
94
95static bool dump_symtab = false;
96
97static struct winsize winsize;
98
99static const char *sym_filter = NULL;
100static int sym_pcnt_filter = 5;
101
102/*
103 * Source functions
104 */
105 67
106void get_term_dimensions(struct winsize *ws) 68void get_term_dimensions(struct winsize *ws)
107{ 69{
@@ -125,21 +87,23 @@ void get_term_dimensions(struct winsize *ws)
125 ws->ws_col = 80; 87 ws->ws_col = 80;
126} 88}
127 89
128static void update_print_entries(struct winsize *ws) 90static void perf_top__update_print_entries(struct perf_top *top)
129{ 91{
130 top.print_entries = ws->ws_row; 92 top->print_entries = top->winsize.ws_row;
131 93
132 if (top.print_entries > 9) 94 if (top->print_entries > 9)
133 top.print_entries -= 9; 95 top->print_entries -= 9;
134} 96}
135 97
136static void sig_winch_handler(int sig __used) 98static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
137{ 99{
138 get_term_dimensions(&winsize); 100 struct perf_top *top = arg;
139 update_print_entries(&winsize); 101
102 get_term_dimensions(&top->winsize);
103 perf_top__update_print_entries(top);
140} 104}
141 105
142static int parse_source(struct hist_entry *he) 106static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
143{ 107{
144 struct symbol *sym; 108 struct symbol *sym;
145 struct annotation *notes; 109 struct annotation *notes;
@@ -170,7 +134,7 @@ static int parse_source(struct hist_entry *he)
170 134
171 pthread_mutex_lock(&notes->lock); 135 pthread_mutex_lock(&notes->lock);
172 136
173 if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { 137 if (symbol__alloc_hist(sym) < 0) {
174 pthread_mutex_unlock(&notes->lock); 138 pthread_mutex_unlock(&notes->lock);
175 pr_err("Not enough memory for annotating '%s' symbol!\n", 139 pr_err("Not enough memory for annotating '%s' symbol!\n",
176 sym->name); 140 sym->name);
@@ -181,7 +145,7 @@ static int parse_source(struct hist_entry *he)
181 err = symbol__annotate(sym, map, 0); 145 err = symbol__annotate(sym, map, 0);
182 if (err == 0) { 146 if (err == 0) {
183out_assign: 147out_assign:
184 top.sym_filter_entry = he; 148 top->sym_filter_entry = he;
185 } 149 }
186 150
187 pthread_mutex_unlock(&notes->lock); 151 pthread_mutex_unlock(&notes->lock);
@@ -194,14 +158,16 @@ static void __zero_source_counters(struct hist_entry *he)
194 symbol__annotate_zero_histograms(sym); 158 symbol__annotate_zero_histograms(sym);
195} 159}
196 160
197static void record_precise_ip(struct hist_entry *he, int counter, u64 ip) 161static void perf_top__record_precise_ip(struct perf_top *top,
162 struct hist_entry *he,
163 int counter, u64 ip)
198{ 164{
199 struct annotation *notes; 165 struct annotation *notes;
200 struct symbol *sym; 166 struct symbol *sym;
201 167
202 if (he == NULL || he->ms.sym == NULL || 168 if (he == NULL || he->ms.sym == NULL ||
203 ((top.sym_filter_entry == NULL || 169 ((top->sym_filter_entry == NULL ||
204 top.sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1)) 170 top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
205 return; 171 return;
206 172
207 sym = he->ms.sym; 173 sym = he->ms.sym;
@@ -210,8 +176,7 @@ static void record_precise_ip(struct hist_entry *he, int counter, u64 ip)
210 if (pthread_mutex_trylock(&notes->lock)) 176 if (pthread_mutex_trylock(&notes->lock))
211 return; 177 return;
212 178
213 if (notes->src == NULL && 179 if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
214 symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
215 pthread_mutex_unlock(&notes->lock); 180 pthread_mutex_unlock(&notes->lock);
216 pr_err("Not enough memory for annotating '%s' symbol!\n", 181 pr_err("Not enough memory for annotating '%s' symbol!\n",
217 sym->name); 182 sym->name);
@@ -225,8 +190,9 @@ static void record_precise_ip(struct hist_entry *he, int counter, u64 ip)
225 pthread_mutex_unlock(&notes->lock); 190 pthread_mutex_unlock(&notes->lock);
226} 191}
227 192
228static void show_details(struct hist_entry *he) 193static void perf_top__show_details(struct perf_top *top)
229{ 194{
195 struct hist_entry *he = top->sym_filter_entry;
230 struct annotation *notes; 196 struct annotation *notes;
231 struct symbol *symbol; 197 struct symbol *symbol;
232 int more; 198 int more;
@@ -242,15 +208,15 @@ static void show_details(struct hist_entry *he)
242 if (notes->src == NULL) 208 if (notes->src == NULL)
243 goto out_unlock; 209 goto out_unlock;
244 210
245 printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); 211 printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
246 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 212 printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
247 213
248 more = symbol__annotate_printf(symbol, he->ms.map, top.sym_evsel->idx, 214 more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
249 0, sym_pcnt_filter, top.print_entries, 4); 215 0, top->sym_pcnt_filter, top->print_entries, 4);
250 if (top.zero) 216 if (top->zero)
251 symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); 217 symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
252 else 218 else
253 symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx); 219 symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
254 if (more != 0) 220 if (more != 0)
255 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 221 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
256out_unlock: 222out_unlock:
@@ -259,11 +225,9 @@ out_unlock:
259 225
260static const char CONSOLE_CLEAR[] = ""; 226static const char CONSOLE_CLEAR[] = "";
261 227
262static struct hist_entry * 228static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
263 perf_session__add_hist_entry(struct perf_session *session, 229 struct addr_location *al,
264 struct addr_location *al, 230 struct perf_sample *sample)
265 struct perf_sample *sample,
266 struct perf_evsel *evsel)
267{ 231{
268 struct hist_entry *he; 232 struct hist_entry *he;
269 233
@@ -271,50 +235,51 @@ static struct hist_entry *
271 if (he == NULL) 235 if (he == NULL)
272 return NULL; 236 return NULL;
273 237
274 session->hists.stats.total_period += sample->period; 238 evsel->hists.stats.total_period += sample->period;
275 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 239 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
276 return he; 240 return he;
277} 241}
278 242
279static void print_sym_table(void) 243static void perf_top__print_sym_table(struct perf_top *top)
280{ 244{
281 char bf[160]; 245 char bf[160];
282 int printed = 0; 246 int printed = 0;
283 const int win_width = winsize.ws_col - 1; 247 const int win_width = top->winsize.ws_col - 1;
284 248
285 puts(CONSOLE_CLEAR); 249 puts(CONSOLE_CLEAR);
286 250
287 perf_top__header_snprintf(&top, bf, sizeof(bf)); 251 perf_top__header_snprintf(top, bf, sizeof(bf));
288 printf("%s\n", bf); 252 printf("%s\n", bf);
289 253
290 perf_top__reset_sample_counters(&top); 254 perf_top__reset_sample_counters(top);
291 255
292 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 256 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
293 257
294 if (top.sym_evsel->hists.stats.nr_lost_warned != 258 if (top->sym_evsel->hists.stats.nr_lost_warned !=
295 top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) { 259 top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
296 top.sym_evsel->hists.stats.nr_lost_warned = 260 top->sym_evsel->hists.stats.nr_lost_warned =
297 top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]; 261 top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
298 color_fprintf(stdout, PERF_COLOR_RED, 262 color_fprintf(stdout, PERF_COLOR_RED,
299 "WARNING: LOST %d chunks, Check IO/CPU overload", 263 "WARNING: LOST %d chunks, Check IO/CPU overload",
300 top.sym_evsel->hists.stats.nr_lost_warned); 264 top->sym_evsel->hists.stats.nr_lost_warned);
301 ++printed; 265 ++printed;
302 } 266 }
303 267
304 if (top.sym_filter_entry) { 268 if (top->sym_filter_entry) {
305 show_details(top.sym_filter_entry); 269 perf_top__show_details(top);
306 return; 270 return;
307 } 271 }
308 272
309 hists__collapse_resort_threaded(&top.sym_evsel->hists); 273 hists__collapse_resort_threaded(&top->sym_evsel->hists);
310 hists__output_resort_threaded(&top.sym_evsel->hists); 274 hists__output_resort_threaded(&top->sym_evsel->hists);
311 hists__decay_entries_threaded(&top.sym_evsel->hists, 275 hists__decay_entries_threaded(&top->sym_evsel->hists,
312 top.hide_user_symbols, 276 top->hide_user_symbols,
313 top.hide_kernel_symbols); 277 top->hide_kernel_symbols);
314 hists__output_recalc_col_len(&top.sym_evsel->hists, winsize.ws_row - 3); 278 hists__output_recalc_col_len(&top->sym_evsel->hists,
279 top->winsize.ws_row - 3);
315 putchar('\n'); 280 putchar('\n');
316 hists__fprintf(&top.sym_evsel->hists, NULL, false, false, 281 hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
317 winsize.ws_row - 4 - printed, win_width, stdout); 282 top->winsize.ws_row - 4 - printed, win_width, stdout);
318} 283}
319 284
320static void prompt_integer(int *target, const char *msg) 285static void prompt_integer(int *target, const char *msg)
@@ -352,17 +317,17 @@ static void prompt_percent(int *target, const char *msg)
352 *target = tmp; 317 *target = tmp;
353} 318}
354 319
355static void prompt_symbol(struct hist_entry **target, const char *msg) 320static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
356{ 321{
357 char *buf = malloc(0), *p; 322 char *buf = malloc(0), *p;
358 struct hist_entry *syme = *target, *n, *found = NULL; 323 struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
359 struct rb_node *next; 324 struct rb_node *next;
360 size_t dummy = 0; 325 size_t dummy = 0;
361 326
362 /* zero counters of active symbol */ 327 /* zero counters of active symbol */
363 if (syme) { 328 if (syme) {
364 __zero_source_counters(syme); 329 __zero_source_counters(syme);
365 *target = NULL; 330 top->sym_filter_entry = NULL;
366 } 331 }
367 332
368 fprintf(stdout, "\n%s: ", msg); 333 fprintf(stdout, "\n%s: ", msg);
@@ -373,7 +338,7 @@ static void prompt_symbol(struct hist_entry **target, const char *msg)
373 if (p) 338 if (p)
374 *p = 0; 339 *p = 0;
375 340
376 next = rb_first(&top.sym_evsel->hists.entries); 341 next = rb_first(&top->sym_evsel->hists.entries);
377 while (next) { 342 while (next) {
378 n = rb_entry(next, struct hist_entry, rb_node); 343 n = rb_entry(next, struct hist_entry, rb_node);
379 if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { 344 if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
@@ -386,47 +351,46 @@ static void prompt_symbol(struct hist_entry **target, const char *msg)
386 if (!found) { 351 if (!found) {
387 fprintf(stderr, "Sorry, %s is not active.\n", buf); 352 fprintf(stderr, "Sorry, %s is not active.\n", buf);
388 sleep(1); 353 sleep(1);
389 return;
390 } else 354 } else
391 parse_source(found); 355 perf_top__parse_source(top, found);
392 356
393out_free: 357out_free:
394 free(buf); 358 free(buf);
395} 359}
396 360
397static void print_mapped_keys(void) 361static void perf_top__print_mapped_keys(struct perf_top *top)
398{ 362{
399 char *name = NULL; 363 char *name = NULL;
400 364
401 if (top.sym_filter_entry) { 365 if (top->sym_filter_entry) {
402 struct symbol *sym = top.sym_filter_entry->ms.sym; 366 struct symbol *sym = top->sym_filter_entry->ms.sym;
403 name = sym->name; 367 name = sym->name;
404 } 368 }
405 369
406 fprintf(stdout, "\nMapped keys:\n"); 370 fprintf(stdout, "\nMapped keys:\n");
407 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs); 371 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top->delay_secs);
408 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries); 372 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries);
409 373
410 if (top.evlist->nr_entries > 1) 374 if (top->evlist->nr_entries > 1)
411 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel)); 375 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top->sym_evsel));
412 376
413 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter); 377 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
414 378
415 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 379 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
416 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 380 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
417 fprintf(stdout, "\t[S] stop annotation.\n"); 381 fprintf(stdout, "\t[S] stop annotation.\n");
418 382
419 fprintf(stdout, 383 fprintf(stdout,
420 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 384 "\t[K] hide kernel_symbols symbols. \t(%s)\n",
421 top.hide_kernel_symbols ? "yes" : "no"); 385 top->hide_kernel_symbols ? "yes" : "no");
422 fprintf(stdout, 386 fprintf(stdout,
423 "\t[U] hide user symbols. \t(%s)\n", 387 "\t[U] hide user symbols. \t(%s)\n",
424 top.hide_user_symbols ? "yes" : "no"); 388 top->hide_user_symbols ? "yes" : "no");
425 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0); 389 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top->zero ? 1 : 0);
426 fprintf(stdout, "\t[qQ] quit.\n"); 390 fprintf(stdout, "\t[qQ] quit.\n");
427} 391}
428 392
429static int key_mapped(int c) 393static int perf_top__key_mapped(struct perf_top *top, int c)
430{ 394{
431 switch (c) { 395 switch (c) {
432 case 'd': 396 case 'd':
@@ -442,7 +406,7 @@ static int key_mapped(int c)
442 case 'S': 406 case 'S':
443 return 1; 407 return 1;
444 case 'E': 408 case 'E':
445 return top.evlist->nr_entries > 1 ? 1 : 0; 409 return top->evlist->nr_entries > 1 ? 1 : 0;
446 default: 410 default:
447 break; 411 break;
448 } 412 }
@@ -450,13 +414,13 @@ static int key_mapped(int c)
450 return 0; 414 return 0;
451} 415}
452 416
453static void handle_keypress(int c) 417static void perf_top__handle_keypress(struct perf_top *top, int c)
454{ 418{
455 if (!key_mapped(c)) { 419 if (!perf_top__key_mapped(top, c)) {
456 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 420 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
457 struct termios tc, save; 421 struct termios tc, save;
458 422
459 print_mapped_keys(); 423 perf_top__print_mapped_keys(top);
460 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 424 fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
461 fflush(stdout); 425 fflush(stdout);
462 426
@@ -471,81 +435,86 @@ static void handle_keypress(int c)
471 c = getc(stdin); 435 c = getc(stdin);
472 436
473 tcsetattr(0, TCSAFLUSH, &save); 437 tcsetattr(0, TCSAFLUSH, &save);
474 if (!key_mapped(c)) 438 if (!perf_top__key_mapped(top, c))
475 return; 439 return;
476 } 440 }
477 441
478 switch (c) { 442 switch (c) {
479 case 'd': 443 case 'd':
480 prompt_integer(&top.delay_secs, "Enter display delay"); 444 prompt_integer(&top->delay_secs, "Enter display delay");
481 if (top.delay_secs < 1) 445 if (top->delay_secs < 1)
482 top.delay_secs = 1; 446 top->delay_secs = 1;
483 break; 447 break;
484 case 'e': 448 case 'e':
485 prompt_integer(&top.print_entries, "Enter display entries (lines)"); 449 prompt_integer(&top->print_entries, "Enter display entries (lines)");
486 if (top.print_entries == 0) { 450 if (top->print_entries == 0) {
487 sig_winch_handler(SIGWINCH); 451 struct sigaction act = {
488 signal(SIGWINCH, sig_winch_handler); 452 .sa_sigaction = perf_top__sig_winch,
453 .sa_flags = SA_SIGINFO,
454 };
455 perf_top__sig_winch(SIGWINCH, NULL, top);
456 sigaction(SIGWINCH, &act, NULL);
489 } else 457 } else
490 signal(SIGWINCH, SIG_DFL); 458 signal(SIGWINCH, SIG_DFL);
491 break; 459 break;
492 case 'E': 460 case 'E':
493 if (top.evlist->nr_entries > 1) { 461 if (top->evlist->nr_entries > 1) {
494 /* Select 0 as the default event: */ 462 /* Select 0 as the default event: */
495 int counter = 0; 463 int counter = 0;
496 464
497 fprintf(stderr, "\nAvailable events:"); 465 fprintf(stderr, "\nAvailable events:");
498 466
499 list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) 467 list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
500 fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel)); 468 fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
501 469
502 prompt_integer(&counter, "Enter details event counter"); 470 prompt_integer(&counter, "Enter details event counter");
503 471
504 if (counter >= top.evlist->nr_entries) { 472 if (counter >= top->evlist->nr_entries) {
505 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); 473 top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
506 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel)); 474 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
507 sleep(1); 475 sleep(1);
508 break; 476 break;
509 } 477 }
510 list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) 478 list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
511 if (top.sym_evsel->idx == counter) 479 if (top->sym_evsel->idx == counter)
512 break; 480 break;
513 } else 481 } else
514 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); 482 top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
515 break; 483 break;
516 case 'f': 484 case 'f':
517 prompt_integer(&top.count_filter, "Enter display event count filter"); 485 prompt_integer(&top->count_filter, "Enter display event count filter");
518 break; 486 break;
519 case 'F': 487 case 'F':
520 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 488 prompt_percent(&top->sym_pcnt_filter,
489 "Enter details display event filter (percent)");
521 break; 490 break;
522 case 'K': 491 case 'K':
523 top.hide_kernel_symbols = !top.hide_kernel_symbols; 492 top->hide_kernel_symbols = !top->hide_kernel_symbols;
524 break; 493 break;
525 case 'q': 494 case 'q':
526 case 'Q': 495 case 'Q':
527 printf("exiting.\n"); 496 printf("exiting.\n");
528 if (dump_symtab) 497 if (top->dump_symtab)
529 perf_session__fprintf_dsos(top.session, stderr); 498 perf_session__fprintf_dsos(top->session, stderr);
530 exit(0); 499 exit(0);
531 case 's': 500 case 's':
532 prompt_symbol(&top.sym_filter_entry, "Enter details symbol"); 501 perf_top__prompt_symbol(top, "Enter details symbol");
533 break; 502 break;
534 case 'S': 503 case 'S':
535 if (!top.sym_filter_entry) 504 if (!top->sym_filter_entry)
536 break; 505 break;
537 else { 506 else {
538 struct hist_entry *syme = top.sym_filter_entry; 507 struct hist_entry *syme = top->sym_filter_entry;
539 508
540 top.sym_filter_entry = NULL; 509 top->sym_filter_entry = NULL;
541 __zero_source_counters(syme); 510 __zero_source_counters(syme);
542 } 511 }
543 break; 512 break;
544 case 'U': 513 case 'U':
545 top.hide_user_symbols = !top.hide_user_symbols; 514 top->hide_user_symbols = !top->hide_user_symbols;
546 break; 515 break;
547 case 'z': 516 case 'z':
548 top.zero = !top.zero; 517 top->zero = !top->zero;
549 break; 518 break;
550 default: 519 default:
551 break; 520 break;
@@ -563,28 +532,30 @@ static void perf_top__sort_new_samples(void *arg)
563 hists__collapse_resort_threaded(&t->sym_evsel->hists); 532 hists__collapse_resort_threaded(&t->sym_evsel->hists);
564 hists__output_resort_threaded(&t->sym_evsel->hists); 533 hists__output_resort_threaded(&t->sym_evsel->hists);
565 hists__decay_entries_threaded(&t->sym_evsel->hists, 534 hists__decay_entries_threaded(&t->sym_evsel->hists,
566 top.hide_user_symbols, 535 t->hide_user_symbols,
567 top.hide_kernel_symbols); 536 t->hide_kernel_symbols);
568} 537}
569 538
570static void *display_thread_tui(void *arg __used) 539static void *display_thread_tui(void *arg)
571{ 540{
541 struct perf_top *top = arg;
572 const char *help = "For a higher level overview, try: perf top --sort comm,dso"; 542 const char *help = "For a higher level overview, try: perf top --sort comm,dso";
573 543
574 perf_top__sort_new_samples(&top); 544 perf_top__sort_new_samples(top);
575 perf_evlist__tui_browse_hists(top.evlist, help, 545 perf_evlist__tui_browse_hists(top->evlist, help,
576 perf_top__sort_new_samples, 546 perf_top__sort_new_samples,
577 &top, top.delay_secs); 547 top, top->delay_secs);
578 548
579 exit_browser(0); 549 exit_browser(0);
580 exit(0); 550 exit(0);
581 return NULL; 551 return NULL;
582} 552}
583 553
584static void *display_thread(void *arg __used) 554static void *display_thread(void *arg)
585{ 555{
586 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 556 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
587 struct termios tc, save; 557 struct termios tc, save;
558 struct perf_top *top = arg;
588 int delay_msecs, c; 559 int delay_msecs, c;
589 560
590 tcgetattr(0, &save); 561 tcgetattr(0, &save);
@@ -595,13 +566,13 @@ static void *display_thread(void *arg __used)
595 566
596 pthread__unblock_sigwinch(); 567 pthread__unblock_sigwinch();
597repeat: 568repeat:
598 delay_msecs = top.delay_secs * 1000; 569 delay_msecs = top->delay_secs * 1000;
599 tcsetattr(0, TCSANOW, &tc); 570 tcsetattr(0, TCSANOW, &tc);
600 /* trash return*/ 571 /* trash return*/
601 getc(stdin); 572 getc(stdin);
602 573
603 while (1) { 574 while (1) {
604 print_sym_table(); 575 perf_top__print_sym_table(top);
605 /* 576 /*
606 * Either timeout expired or we got an EINTR due to SIGWINCH, 577 * Either timeout expired or we got an EINTR due to SIGWINCH,
607 * refresh screen in both cases. 578 * refresh screen in both cases.
@@ -621,7 +592,7 @@ process_hotkey:
621 c = getc(stdin); 592 c = getc(stdin);
622 tcsetattr(0, TCSAFLUSH, &save); 593 tcsetattr(0, TCSAFLUSH, &save);
623 594
624 handle_keypress(c); 595 perf_top__handle_keypress(top, c);
625 goto repeat; 596 goto repeat;
626 597
627 return NULL; 598 return NULL;
@@ -673,47 +644,17 @@ static int symbol_filter(struct map *map __used, struct symbol *sym)
673 return 0; 644 return 0;
674} 645}
675 646
676static void perf_event__process_sample(const union perf_event *event, 647static void perf_event__process_sample(struct perf_tool *tool,
648 const union perf_event *event,
677 struct perf_evsel *evsel, 649 struct perf_evsel *evsel,
678 struct perf_sample *sample, 650 struct perf_sample *sample,
679 struct perf_session *session) 651 struct machine *machine)
680{ 652{
653 struct perf_top *top = container_of(tool, struct perf_top, tool);
681 struct symbol *parent = NULL; 654 struct symbol *parent = NULL;
682 u64 ip = event->ip.ip; 655 u64 ip = event->ip.ip;
683 struct addr_location al; 656 struct addr_location al;
684 struct machine *machine;
685 int err; 657 int err;
686 u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
687
688 ++top.samples;
689
690 switch (origin) {
691 case PERF_RECORD_MISC_USER:
692 ++top.us_samples;
693 if (top.hide_user_symbols)
694 return;
695 machine = perf_session__find_host_machine(session);
696 break;
697 case PERF_RECORD_MISC_KERNEL:
698 ++top.kernel_samples;
699 if (top.hide_kernel_symbols)
700 return;
701 machine = perf_session__find_host_machine(session);
702 break;
703 case PERF_RECORD_MISC_GUEST_KERNEL:
704 ++top.guest_kernel_samples;
705 machine = perf_session__find_machine(session, event->ip.pid);
706 break;
707 case PERF_RECORD_MISC_GUEST_USER:
708 ++top.guest_us_samples;
709 /*
710 * TODO: we don't process guest user from host side
711 * except simple counting.
712 */
713 return;
714 default:
715 return;
716 }
717 658
718 if (!machine && perf_guest) { 659 if (!machine && perf_guest) {
719 pr_err("Can't find guest [%d]'s kernel information\n", 660 pr_err("Can't find guest [%d]'s kernel information\n",
@@ -722,14 +663,14 @@ static void perf_event__process_sample(const union perf_event *event,
722 } 663 }
723 664
724 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) 665 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
725 top.exact_samples++; 666 top->exact_samples++;
726 667
727 if (perf_event__preprocess_sample(event, session, &al, sample, 668 if (perf_event__preprocess_sample(event, machine, &al, sample,
728 symbol_filter) < 0 || 669 symbol_filter) < 0 ||
729 al.filtered) 670 al.filtered)
730 return; 671 return;
731 672
732 if (!kptr_restrict_warned && 673 if (!top->kptr_restrict_warned &&
733 symbol_conf.kptr_restrict && 674 symbol_conf.kptr_restrict &&
734 al.cpumode == PERF_RECORD_MISC_KERNEL) { 675 al.cpumode == PERF_RECORD_MISC_KERNEL) {
735 ui__warning( 676 ui__warning(
@@ -740,7 +681,7 @@ static void perf_event__process_sample(const union perf_event *event,
740 " modules" : ""); 681 " modules" : "");
741 if (use_browser <= 0) 682 if (use_browser <= 0)
742 sleep(5); 683 sleep(5);
743 kptr_restrict_warned = true; 684 top->kptr_restrict_warned = true;
744 } 685 }
745 686
746 if (al.sym == NULL) { 687 if (al.sym == NULL) {
@@ -756,7 +697,7 @@ static void perf_event__process_sample(const union perf_event *event,
756 * --hide-kernel-symbols, even if the user specifies an 697 * --hide-kernel-symbols, even if the user specifies an
757 * invalid --vmlinux ;-) 698 * invalid --vmlinux ;-)
758 */ 699 */
759 if (!kptr_restrict_warned && !vmlinux_warned && 700 if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
760 al.map == machine->vmlinux_maps[MAP__FUNCTION] && 701 al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
761 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 702 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
762 if (symbol_conf.vmlinux_name) { 703 if (symbol_conf.vmlinux_name) {
@@ -769,7 +710,7 @@ static void perf_event__process_sample(const union perf_event *event,
769 710
770 if (use_browser <= 0) 711 if (use_browser <= 0)
771 sleep(5); 712 sleep(5);
772 vmlinux_warned = true; 713 top->vmlinux_warned = true;
773 } 714 }
774 } 715 }
775 716
@@ -778,70 +719,109 @@ static void perf_event__process_sample(const union perf_event *event,
778 719
779 if ((sort__has_parent || symbol_conf.use_callchain) && 720 if ((sort__has_parent || symbol_conf.use_callchain) &&
780 sample->callchain) { 721 sample->callchain) {
781 err = perf_session__resolve_callchain(session, al.thread, 722 err = machine__resolve_callchain(machine, evsel, al.thread,
782 sample->callchain, &parent); 723 sample->callchain, &parent);
783 if (err) 724 if (err)
784 return; 725 return;
785 } 726 }
786 727
787 he = perf_session__add_hist_entry(session, &al, sample, evsel); 728 he = perf_evsel__add_hist_entry(evsel, &al, sample);
788 if (he == NULL) { 729 if (he == NULL) {
789 pr_err("Problem incrementing symbol period, skipping event\n"); 730 pr_err("Problem incrementing symbol period, skipping event\n");
790 return; 731 return;
791 } 732 }
792 733
793 if (symbol_conf.use_callchain) { 734 if (symbol_conf.use_callchain) {
794 err = callchain_append(he->callchain, &session->callchain_cursor, 735 err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
795 sample->period); 736 sample->period);
796 if (err) 737 if (err)
797 return; 738 return;
798 } 739 }
799 740
800 if (sort_has_symbols) 741 if (top->sort_has_symbols)
801 record_precise_ip(he, evsel->idx, ip); 742 perf_top__record_precise_ip(top, he, evsel->idx, ip);
802 } 743 }
803 744
804 return; 745 return;
805} 746}
806 747
807static void perf_session__mmap_read_idx(struct perf_session *self, int idx) 748static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
808{ 749{
809 struct perf_sample sample; 750 struct perf_sample sample;
810 struct perf_evsel *evsel; 751 struct perf_evsel *evsel;
752 struct perf_session *session = top->session;
811 union perf_event *event; 753 union perf_event *event;
754 struct machine *machine;
755 u8 origin;
812 int ret; 756 int ret;
813 757
814 while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) { 758 while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
815 ret = perf_session__parse_sample(self, event, &sample); 759 ret = perf_session__parse_sample(session, event, &sample);
816 if (ret) { 760 if (ret) {
817 pr_err("Can't parse sample, err = %d\n", ret); 761 pr_err("Can't parse sample, err = %d\n", ret);
818 continue; 762 continue;
819 } 763 }
820 764
821 evsel = perf_evlist__id2evsel(self->evlist, sample.id); 765 evsel = perf_evlist__id2evsel(session->evlist, sample.id);
822 assert(evsel != NULL); 766 assert(evsel != NULL);
823 767
768 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
769
824 if (event->header.type == PERF_RECORD_SAMPLE) 770 if (event->header.type == PERF_RECORD_SAMPLE)
825 perf_event__process_sample(event, evsel, &sample, self); 771 ++top->samples;
826 else if (event->header.type < PERF_RECORD_MAX) { 772
773 switch (origin) {
774 case PERF_RECORD_MISC_USER:
775 ++top->us_samples;
776 if (top->hide_user_symbols)
777 continue;
778 machine = perf_session__find_host_machine(session);
779 break;
780 case PERF_RECORD_MISC_KERNEL:
781 ++top->kernel_samples;
782 if (top->hide_kernel_symbols)
783 continue;
784 machine = perf_session__find_host_machine(session);
785 break;
786 case PERF_RECORD_MISC_GUEST_KERNEL:
787 ++top->guest_kernel_samples;
788 machine = perf_session__find_machine(session, event->ip.pid);
789 break;
790 case PERF_RECORD_MISC_GUEST_USER:
791 ++top->guest_us_samples;
792 /*
793 * TODO: we don't process guest user from host side
794 * except simple counting.
795 */
796 /* Fall thru */
797 default:
798 continue;
799 }
800
801
802 if (event->header.type == PERF_RECORD_SAMPLE) {
803 perf_event__process_sample(&top->tool, event, evsel,
804 &sample, machine);
805 } else if (event->header.type < PERF_RECORD_MAX) {
827 hists__inc_nr_events(&evsel->hists, event->header.type); 806 hists__inc_nr_events(&evsel->hists, event->header.type);
828 perf_event__process(event, &sample, self); 807 perf_event__process(&top->tool, event, &sample, machine);
829 } else 808 } else
830 ++self->hists.stats.nr_unknown_events; 809 ++session->hists.stats.nr_unknown_events;
831 } 810 }
832} 811}
833 812
834static void perf_session__mmap_read(struct perf_session *self) 813static void perf_top__mmap_read(struct perf_top *top)
835{ 814{
836 int i; 815 int i;
837 816
838 for (i = 0; i < top.evlist->nr_mmaps; i++) 817 for (i = 0; i < top->evlist->nr_mmaps; i++)
839 perf_session__mmap_read_idx(self, i); 818 perf_top__mmap_read_idx(top, i);
840} 819}
841 820
842static void start_counters(struct perf_evlist *evlist) 821static void perf_top__start_counters(struct perf_top *top)
843{ 822{
844 struct perf_evsel *counter, *first; 823 struct perf_evsel *counter, *first;
824 struct perf_evlist *evlist = top->evlist;
845 825
846 first = list_entry(evlist->entries.next, struct perf_evsel, node); 826 first = list_entry(evlist->entries.next, struct perf_evsel, node);
847 827
@@ -849,15 +829,15 @@ static void start_counters(struct perf_evlist *evlist)
849 struct perf_event_attr *attr = &counter->attr; 829 struct perf_event_attr *attr = &counter->attr;
850 struct xyarray *group_fd = NULL; 830 struct xyarray *group_fd = NULL;
851 831
852 if (group && counter != first) 832 if (top->group && counter != first)
853 group_fd = first->fd; 833 group_fd = first->fd;
854 834
855 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 835 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
856 836
857 if (top.freq) { 837 if (top->freq) {
858 attr->sample_type |= PERF_SAMPLE_PERIOD; 838 attr->sample_type |= PERF_SAMPLE_PERIOD;
859 attr->freq = 1; 839 attr->freq = 1;
860 attr->sample_freq = top.freq; 840 attr->sample_freq = top->freq;
861 } 841 }
862 842
863 if (evlist->nr_entries > 1) { 843 if (evlist->nr_entries > 1) {
@@ -870,23 +850,23 @@ static void start_counters(struct perf_evlist *evlist)
870 850
871 attr->mmap = 1; 851 attr->mmap = 1;
872 attr->comm = 1; 852 attr->comm = 1;
873 attr->inherit = inherit; 853 attr->inherit = top->inherit;
874retry_sample_id: 854retry_sample_id:
875 attr->sample_id_all = sample_id_all_avail ? 1 : 0; 855 attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
876try_again: 856try_again:
877 if (perf_evsel__open(counter, top.evlist->cpus, 857 if (perf_evsel__open(counter, top->evlist->cpus,
878 top.evlist->threads, group, 858 top->evlist->threads, top->group,
879 group_fd) < 0) { 859 group_fd) < 0) {
880 int err = errno; 860 int err = errno;
881 861
882 if (err == EPERM || err == EACCES) { 862 if (err == EPERM || err == EACCES) {
883 ui__error_paranoid(); 863 ui__error_paranoid();
884 goto out_err; 864 goto out_err;
885 } else if (err == EINVAL && sample_id_all_avail) { 865 } else if (err == EINVAL && top->sample_id_all_avail) {
886 /* 866 /*
887 * Old kernel, no attr->sample_id_type_all field 867 * Old kernel, no attr->sample_id_type_all field
888 */ 868 */
889 sample_id_all_avail = false; 869 top->sample_id_all_avail = false;
890 goto retry_sample_id; 870 goto retry_sample_id;
891 } 871 }
892 /* 872 /*
@@ -920,7 +900,7 @@ try_again:
920 } 900 }
921 } 901 }
922 902
923 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) { 903 if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
924 ui__warning("Failed to mmap with %d (%s)\n", 904 ui__warning("Failed to mmap with %d (%s)\n",
925 errno, strerror(errno)); 905 errno, strerror(errno));
926 goto out_err; 906 goto out_err;
@@ -933,14 +913,14 @@ out_err:
933 exit(0); 913 exit(0);
934} 914}
935 915
936static int setup_sample_type(void) 916static int perf_top__setup_sample_type(struct perf_top *top)
937{ 917{
938 if (!sort_has_symbols) { 918 if (!top->sort_has_symbols) {
939 if (symbol_conf.use_callchain) { 919 if (symbol_conf.use_callchain) {
940 ui__warning("Selected -g but \"sym\" not present in --sort/-s."); 920 ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
941 return -EINVAL; 921 return -EINVAL;
942 } 922 }
943 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE) { 923 } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
944 if (callchain_register_param(&callchain_param) < 0) { 924 if (callchain_register_param(&callchain_param) < 0) {
945 ui__warning("Can't register callchain params.\n"); 925 ui__warning("Can't register callchain params.\n");
946 return -EINVAL; 926 return -EINVAL;
@@ -950,7 +930,7 @@ static int setup_sample_type(void)
950 return 0; 930 return 0;
951} 931}
952 932
953static int __cmd_top(void) 933static int __cmd_top(struct perf_top *top)
954{ 934{
955 pthread_t thread; 935 pthread_t thread;
956 int ret; 936 int ret;
@@ -958,39 +938,40 @@ static int __cmd_top(void)
958 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 938 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
959 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 939 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
960 */ 940 */
961 top.session = perf_session__new(NULL, O_WRONLY, false, false, NULL); 941 top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
962 if (top.session == NULL) 942 if (top->session == NULL)
963 return -ENOMEM; 943 return -ENOMEM;
964 944
965 ret = setup_sample_type(); 945 ret = perf_top__setup_sample_type(top);
966 if (ret) 946 if (ret)
967 goto out_delete; 947 goto out_delete;
968 948
969 if (top.target_tid != -1) 949 if (top->target_tid != -1)
970 perf_event__synthesize_thread_map(top.evlist->threads, 950 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
971 perf_event__process, top.session); 951 perf_event__process,
952 &top->session->host_machine);
972 else 953 else
973 perf_event__synthesize_threads(perf_event__process, top.session); 954 perf_event__synthesize_threads(&top->tool, perf_event__process,
974 955 &top->session->host_machine);
975 start_counters(top.evlist); 956 perf_top__start_counters(top);
976 top.session->evlist = top.evlist; 957 top->session->evlist = top->evlist;
977 perf_session__update_sample_type(top.session); 958 perf_session__update_sample_type(top->session);
978 959
979 /* Wait for a minimal set of events before starting the snapshot */ 960 /* Wait for a minimal set of events before starting the snapshot */
980 poll(top.evlist->pollfd, top.evlist->nr_fds, 100); 961 poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
981 962
982 perf_session__mmap_read(top.session); 963 perf_top__mmap_read(top);
983 964
984 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : 965 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
985 display_thread), NULL)) { 966 display_thread), top)) {
986 printf("Could not create display thread.\n"); 967 printf("Could not create display thread.\n");
987 exit(-1); 968 exit(-1);
988 } 969 }
989 970
990 if (realtime_prio) { 971 if (top->realtime_prio) {
991 struct sched_param param; 972 struct sched_param param;
992 973
993 param.sched_priority = realtime_prio; 974 param.sched_priority = top->realtime_prio;
994 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 975 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
995 printf("Could not set realtime priority.\n"); 976 printf("Could not set realtime priority.\n");
996 exit(-1); 977 exit(-1);
@@ -998,25 +979,25 @@ static int __cmd_top(void)
998 } 979 }
999 980
1000 while (1) { 981 while (1) {
1001 u64 hits = top.samples; 982 u64 hits = top->samples;
1002 983
1003 perf_session__mmap_read(top.session); 984 perf_top__mmap_read(top);
1004 985
1005 if (hits == top.samples) 986 if (hits == top->samples)
1006 ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100); 987 ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1007 } 988 }
1008 989
1009out_delete: 990out_delete:
1010 perf_session__delete(top.session); 991 perf_session__delete(top->session);
1011 top.session = NULL; 992 top->session = NULL;
1012 993
1013 return 0; 994 return 0;
1014} 995}
1015 996
1016static int 997static int
1017parse_callchain_opt(const struct option *opt __used, const char *arg, 998parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1018 int unset)
1019{ 999{
1000 struct perf_top *top = (struct perf_top *)opt->value;
1020 char *tok, *tok2; 1001 char *tok, *tok2;
1021 char *endptr; 1002 char *endptr;
1022 1003
@@ -1024,7 +1005,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
1024 * --no-call-graph 1005 * --no-call-graph
1025 */ 1006 */
1026 if (unset) { 1007 if (unset) {
1027 dont_use_callchains = true; 1008 top->dont_use_callchains = true;
1028 return 0; 1009 return 0;
1029 } 1010 }
1030 1011
@@ -1052,9 +1033,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
1052 symbol_conf.use_callchain = false; 1033 symbol_conf.use_callchain = false;
1053 1034
1054 return 0; 1035 return 0;
1055 } 1036 } else
1056
1057 else
1058 return -1; 1037 return -1;
1059 1038
1060 /* get the min percentage */ 1039 /* get the min percentage */
@@ -1098,17 +1077,32 @@ static const char * const top_usage[] = {
1098 NULL 1077 NULL
1099}; 1078};
1100 1079
1101static const struct option options[] = { 1080int cmd_top(int argc, const char **argv, const char *prefix __used)
1081{
1082 struct perf_evsel *pos;
1083 int status = -ENOMEM;
1084 struct perf_top top = {
1085 .count_filter = 5,
1086 .delay_secs = 2,
1087 .target_pid = -1,
1088 .target_tid = -1,
1089 .freq = 1000, /* 1 KHz */
1090 .sample_id_all_avail = true,
1091 .mmap_pages = 128,
1092 .sym_pcnt_filter = 5,
1093 };
1094 char callchain_default_opt[] = "fractal,0.5,callee";
1095 const struct option options[] = {
1102 OPT_CALLBACK('e', "event", &top.evlist, "event", 1096 OPT_CALLBACK('e', "event", &top.evlist, "event",
1103 "event selector. use 'perf list' to list available events", 1097 "event selector. use 'perf list' to list available events",
1104 parse_events_option), 1098 parse_events_option),
1105 OPT_INTEGER('c', "count", &default_interval, 1099 OPT_INTEGER('c', "count", &top.default_interval,
1106 "event period to sample"), 1100 "event period to sample"),
1107 OPT_INTEGER('p', "pid", &top.target_pid, 1101 OPT_INTEGER('p', "pid", &top.target_pid,
1108 "profile events on existing process id"), 1102 "profile events on existing process id"),
1109 OPT_INTEGER('t', "tid", &top.target_tid, 1103 OPT_INTEGER('t', "tid", &top.target_tid,
1110 "profile events on existing thread id"), 1104 "profile events on existing thread id"),
1111 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1105 OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1112 "system-wide collection from all CPUs"), 1106 "system-wide collection from all CPUs"),
1113 OPT_STRING('C', "cpu", &top.cpu_list, "cpu", 1107 OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1114 "list of cpus to monitor"), 1108 "list of cpus to monitor"),
@@ -1116,20 +1110,20 @@ static const struct option options[] = {
1116 "file", "vmlinux pathname"), 1110 "file", "vmlinux pathname"),
1117 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, 1111 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1118 "hide kernel symbols"), 1112 "hide kernel symbols"),
1119 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 1113 OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
1120 OPT_INTEGER('r', "realtime", &realtime_prio, 1114 OPT_INTEGER('r', "realtime", &top.realtime_prio,
1121 "collect data with this RT SCHED_FIFO priority"), 1115 "collect data with this RT SCHED_FIFO priority"),
1122 OPT_INTEGER('d', "delay", &top.delay_secs, 1116 OPT_INTEGER('d', "delay", &top.delay_secs,
1123 "number of seconds to delay between refreshes"), 1117 "number of seconds to delay between refreshes"),
1124 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1118 OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1125 "dump the symbol table used for profiling"), 1119 "dump the symbol table used for profiling"),
1126 OPT_INTEGER('f', "count-filter", &top.count_filter, 1120 OPT_INTEGER('f', "count-filter", &top.count_filter,
1127 "only display functions with more events than this"), 1121 "only display functions with more events than this"),
1128 OPT_BOOLEAN('g', "group", &group, 1122 OPT_BOOLEAN('g', "group", &top.group,
1129 "put the counters into a counter group"), 1123 "put the counters into a counter group"),
1130 OPT_BOOLEAN('i', "inherit", &inherit, 1124 OPT_BOOLEAN('i', "inherit", &top.inherit,
1131 "child tasks inherit counters"), 1125 "child tasks inherit counters"),
1132 OPT_STRING(0, "sym-annotate", &sym_filter, "symbol name", 1126 OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1133 "symbol to annotate"), 1127 "symbol to annotate"),
1134 OPT_BOOLEAN('z', "zero", &top.zero, 1128 OPT_BOOLEAN('z', "zero", &top.zero,
1135 "zero history across updates"), 1129 "zero history across updates"),
@@ -1139,15 +1133,15 @@ static const struct option options[] = {
1139 "display this many functions"), 1133 "display this many functions"),
1140 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, 1134 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1141 "hide user symbols"), 1135 "hide user symbols"),
1142 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), 1136 OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
1143 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), 1137 OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1144 OPT_INCR('v', "verbose", &verbose, 1138 OPT_INCR('v', "verbose", &verbose,
1145 "be more verbose (show counter open errors, etc)"), 1139 "be more verbose (show counter open errors, etc)"),
1146 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1140 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1147 "sort by key(s): pid, comm, dso, symbol, parent"), 1141 "sort by key(s): pid, comm, dso, symbol, parent"),
1148 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 1142 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1149 "Show a column with the number of samples"), 1143 "Show a column with the number of samples"),
1150 OPT_CALLBACK_DEFAULT('G', "call-graph", NULL, "output_type,min_percent, call_order", 1144 OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1151 "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " 1145 "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
1152 "Default: fractal,0.5,callee", &parse_callchain_opt, 1146 "Default: fractal,0.5,callee", &parse_callchain_opt,
1153 callchain_default_opt), 1147 callchain_default_opt),
@@ -1166,12 +1160,7 @@ static const struct option options[] = {
1166 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1160 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1167 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1161 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1168 OPT_END() 1162 OPT_END()
1169}; 1163 };
1170
1171int cmd_top(int argc, const char **argv, const char *prefix __used)
1172{
1173 struct perf_evsel *pos;
1174 int status = -ENOMEM;
1175 1164
1176 top.evlist = perf_evlist__new(NULL, NULL); 1165 top.evlist = perf_evlist__new(NULL, NULL);
1177 if (top.evlist == NULL) 1166 if (top.evlist == NULL)
@@ -1188,9 +1177,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1188 1177
1189 setup_sorting(top_usage, options); 1178 setup_sorting(top_usage, options);
1190 1179
1191 if (use_stdio) 1180 if (top.use_stdio)
1192 use_browser = 0; 1181 use_browser = 0;
1193 else if (use_tui) 1182 else if (top.use_tui)
1194 use_browser = 1; 1183 use_browser = 1;
1195 1184
1196 setup_browser(false); 1185 setup_browser(false);
@@ -1215,38 +1204,31 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1215 return -ENOMEM; 1204 return -ENOMEM;
1216 } 1205 }
1217 1206
1207 symbol_conf.nr_events = top.evlist->nr_entries;
1208
1218 if (top.delay_secs < 1) 1209 if (top.delay_secs < 1)
1219 top.delay_secs = 1; 1210 top.delay_secs = 1;
1220 1211
1221 /* 1212 /*
1222 * User specified count overrides default frequency. 1213 * User specified count overrides default frequency.
1223 */ 1214 */
1224 if (default_interval) 1215 if (top.default_interval)
1225 top.freq = 0; 1216 top.freq = 0;
1226 else if (top.freq) { 1217 else if (top.freq) {
1227 default_interval = top.freq; 1218 top.default_interval = top.freq;
1228 } else { 1219 } else {
1229 fprintf(stderr, "frequency and count are zero, aborting\n"); 1220 fprintf(stderr, "frequency and count are zero, aborting\n");
1230 exit(EXIT_FAILURE); 1221 exit(EXIT_FAILURE);
1231 } 1222 }
1232 1223
1233 list_for_each_entry(pos, &top.evlist->entries, node) { 1224 list_for_each_entry(pos, &top.evlist->entries, node) {
1234 if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
1235 top.evlist->threads->nr) < 0)
1236 goto out_free_fd;
1237 /* 1225 /*
1238 * Fill in the ones not specifically initialized via -c: 1226 * Fill in the ones not specifically initialized via -c:
1239 */ 1227 */
1240 if (pos->attr.sample_period) 1228 if (!pos->attr.sample_period)
1241 continue; 1229 pos->attr.sample_period = top.default_interval;
1242
1243 pos->attr.sample_period = default_interval;
1244 } 1230 }
1245 1231
1246 if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
1247 perf_evlist__alloc_mmap(top.evlist) < 0)
1248 goto out_free_fd;
1249
1250 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); 1232 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1251 1233
1252 symbol_conf.priv_size = sizeof(struct annotation); 1234 symbol_conf.priv_size = sizeof(struct annotation);
@@ -1263,16 +1245,20 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1263 * Avoid annotation data structures overhead when symbols aren't on the 1245 * Avoid annotation data structures overhead when symbols aren't on the
1264 * sort list. 1246 * sort list.
1265 */ 1247 */
1266 sort_has_symbols = sort_sym.list.next != NULL; 1248 top.sort_has_symbols = sort_sym.list.next != NULL;
1267 1249
1268 get_term_dimensions(&winsize); 1250 get_term_dimensions(&top.winsize);
1269 if (top.print_entries == 0) { 1251 if (top.print_entries == 0) {
1270 update_print_entries(&winsize); 1252 struct sigaction act = {
1271 signal(SIGWINCH, sig_winch_handler); 1253 .sa_sigaction = perf_top__sig_winch,
1254 .sa_flags = SA_SIGINFO,
1255 };
1256 perf_top__update_print_entries(&top);
1257 sigaction(SIGWINCH, &act, NULL);
1272 } 1258 }
1273 1259
1274 status = __cmd_top(); 1260 status = __cmd_top(&top);
1275out_free_fd: 1261
1276 perf_evlist__delete(top.evlist); 1262 perf_evlist__delete(top.evlist);
1277 1263
1278 return status; 1264 return status;
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 73d0cac8b67e..2b2e225a4d4c 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -29,8 +29,6 @@ struct pager_config {
29 int val; 29 int val;
30}; 30};
31 31
32static char debugfs_mntpt[MAXPATHLEN];
33
34static int pager_command_config(const char *var, const char *value, void *data) 32static int pager_command_config(const char *var, const char *value, void *data)
35{ 33{
36 struct pager_config *c = data; 34 struct pager_config *c = data;
@@ -81,15 +79,6 @@ static void commit_pager_choice(void)
81 } 79 }
82} 80}
83 81
84static void set_debugfs_path(void)
85{
86 char *path;
87
88 path = getenv(PERF_DEBUGFS_ENVIRONMENT);
89 snprintf(debugfs_path, MAXPATHLEN, "%s/%s", path ?: debugfs_mntpt,
90 "tracing/events");
91}
92
93static int handle_options(const char ***argv, int *argc, int *envchanged) 82static int handle_options(const char ***argv, int *argc, int *envchanged)
94{ 83{
95 int handled = 0; 84 int handled = 0;
@@ -161,15 +150,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
161 fprintf(stderr, "No directory given for --debugfs-dir.\n"); 150 fprintf(stderr, "No directory given for --debugfs-dir.\n");
162 usage(perf_usage_string); 151 usage(perf_usage_string);
163 } 152 }
164 strncpy(debugfs_mntpt, (*argv)[1], MAXPATHLEN); 153 debugfs_set_path((*argv)[1]);
165 debugfs_mntpt[MAXPATHLEN - 1] = '\0';
166 if (envchanged) 154 if (envchanged)
167 *envchanged = 1; 155 *envchanged = 1;
168 (*argv)++; 156 (*argv)++;
169 (*argc)--; 157 (*argc)--;
170 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { 158 } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
171 strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN); 159 debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
172 debugfs_mntpt[MAXPATHLEN - 1] = '\0'; 160 fprintf(stderr, "dir: %s\n", debugfs_mountpoint);
173 if (envchanged) 161 if (envchanged)
174 *envchanged = 1; 162 *envchanged = 1;
175 } else { 163 } else {
@@ -281,7 +269,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
281 if (use_pager == -1 && p->option & USE_PAGER) 269 if (use_pager == -1 && p->option & USE_PAGER)
282 use_pager = 1; 270 use_pager = 1;
283 commit_pager_choice(); 271 commit_pager_choice();
284 set_debugfs_path();
285 272
286 status = p->fn(argc, argv, prefix); 273 status = p->fn(argc, argv, prefix);
287 exit_browser(status); 274 exit_browser(status);
@@ -416,17 +403,6 @@ static int run_argv(int *argcp, const char ***argv)
416 return done_alias; 403 return done_alias;
417} 404}
418 405
419/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
420static void get_debugfs_mntpt(void)
421{
422 const char *path = debugfs_mount(NULL);
423
424 if (path)
425 strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
426 else
427 debugfs_mntpt[0] = '\0';
428}
429
430static void pthread__block_sigwinch(void) 406static void pthread__block_sigwinch(void)
431{ 407{
432 sigset_t set; 408 sigset_t set;
@@ -453,7 +429,7 @@ int main(int argc, const char **argv)
453 if (!cmd) 429 if (!cmd)
454 cmd = "perf-help"; 430 cmd = "perf-help";
455 /* get debugfs mount point from /proc/mounts */ 431 /* get debugfs mount point from /proc/mounts */
456 get_debugfs_mntpt(); 432 debugfs_mount(NULL);
457 /* 433 /*
458 * "perf-xxxx" is the same as "perf xxxx", but we obviously: 434 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
459 * 435 *
@@ -476,7 +452,6 @@ int main(int argc, const char **argv)
476 argc--; 452 argc--;
477 handle_options(&argv, &argc, NULL); 453 handle_options(&argv, &argc, NULL);
478 commit_pager_choice(); 454 commit_pager_choice();
479 set_debugfs_path();
480 set_buildid_dir(); 455 set_buildid_dir();
481 456
482 if (argc > 0) { 457 if (argc > 0) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 914c895510f7..64f8bee31ced 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -185,4 +185,28 @@ extern const char perf_version_string[];
185 185
186void pthread__unblock_sigwinch(void); 186void pthread__unblock_sigwinch(void);
187 187
188struct perf_record_opts {
189 pid_t target_pid;
190 pid_t target_tid;
191 bool call_graph;
192 bool group;
193 bool inherit_stat;
194 bool no_delay;
195 bool no_inherit;
196 bool no_samples;
197 bool pipe_output;
198 bool raw_samples;
199 bool sample_address;
200 bool sample_time;
201 bool sample_id_all_avail;
202 bool system_wide;
203 bool period;
204 unsigned int freq;
205 unsigned int mmap_pages;
206 unsigned int user_freq;
207 u64 default_interval;
208 u64 user_interval;
209 const char *cpu_list;
210};
211
188#endif 212#endif
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 119e996035c8..011ed2676604 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -25,17 +25,17 @@ int symbol__annotate_init(struct map *map __used, struct symbol *sym)
25 return 0; 25 return 0;
26} 26}
27 27
28int symbol__alloc_hist(struct symbol *sym, int nevents) 28int symbol__alloc_hist(struct symbol *sym)
29{ 29{
30 struct annotation *notes = symbol__annotation(sym); 30 struct annotation *notes = symbol__annotation(sym);
31 size_t sizeof_sym_hist = (sizeof(struct sym_hist) + 31 size_t sizeof_sym_hist = (sizeof(struct sym_hist) +
32 (sym->end - sym->start) * sizeof(u64)); 32 (sym->end - sym->start) * sizeof(u64));
33 33
34 notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist); 34 notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist);
35 if (notes->src == NULL) 35 if (notes->src == NULL)
36 return -1; 36 return -1;
37 notes->src->sizeof_sym_hist = sizeof_sym_hist; 37 notes->src->sizeof_sym_hist = sizeof_sym_hist;
38 notes->src->nr_histograms = nevents; 38 notes->src->nr_histograms = symbol_conf.nr_events;
39 INIT_LIST_HEAD(&notes->src->source); 39 INIT_LIST_HEAD(&notes->src->source);
40 return 0; 40 return 0;
41} 41}
@@ -334,7 +334,7 @@ fallback:
334 disassembler_style ? "-M " : "", 334 disassembler_style ? "-M " : "",
335 disassembler_style ? disassembler_style : "", 335 disassembler_style ? disassembler_style : "",
336 map__rip_2objdump(map, sym->start), 336 map__rip_2objdump(map, sym->start),
337 map__rip_2objdump(map, sym->end), 337 map__rip_2objdump(map, sym->end+1),
338 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw", 338 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
339 symbol_conf.annotate_src ? "-S" : "", 339 symbol_conf.annotate_src ? "-S" : "",
340 symfs_filename, filename); 340 symfs_filename, filename);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d9072523d342..efa5dc82bfae 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -72,7 +72,7 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
72 72
73int symbol__inc_addr_samples(struct symbol *sym, struct map *map, 73int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
74 int evidx, u64 addr); 74 int evidx, u64 addr);
75int symbol__alloc_hist(struct symbol *sym, int nevents); 75int symbol__alloc_hist(struct symbol *sym);
76void symbol__annotate_zero_histograms(struct symbol *sym); 76void symbol__annotate_zero_histograms(struct symbol *sym);
77 77
78int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); 78int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
@@ -99,8 +99,7 @@ static inline int symbol__tui_annotate(struct symbol *sym __used,
99} 99}
100#else 100#else
101int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, 101int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
102 int nr_events, void(*timer)(void *arg), void *arg, 102 void(*timer)(void *arg), void *arg, int delay_secs);
103 int delay_secs);
104#endif 103#endif
105 104
106extern const char *disassembler_style; 105extern const char *disassembler_style;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index a91cd99f26ea..dff9c7a725f4 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -13,15 +13,18 @@
13#include "symbol.h" 13#include "symbol.h"
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h" 15#include "debug.h"
16#include "session.h"
17#include "tool.h"
16 18
17static int build_id__mark_dso_hit(union perf_event *event, 19static int build_id__mark_dso_hit(struct perf_tool *tool __used,
20 union perf_event *event,
18 struct perf_sample *sample __used, 21 struct perf_sample *sample __used,
19 struct perf_evsel *evsel __used, 22 struct perf_evsel *evsel __used,
20 struct perf_session *session) 23 struct machine *machine)
21{ 24{
22 struct addr_location al; 25 struct addr_location al;
23 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 26 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
24 struct thread *thread = perf_session__findnew(session, event->ip.pid); 27 struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
25 28
26 if (thread == NULL) { 29 if (thread == NULL) {
27 pr_err("problem processing %d event, skipping it.\n", 30 pr_err("problem processing %d event, skipping it.\n",
@@ -29,8 +32,8 @@ static int build_id__mark_dso_hit(union perf_event *event,
29 return -1; 32 return -1;
30 } 33 }
31 34
32 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 35 thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
33 event->ip.pid, event->ip.ip, &al); 36 event->ip.ip, &al);
34 37
35 if (al.map != NULL) 38 if (al.map != NULL)
36 al.map->dso->hit = 1; 39 al.map->dso->hit = 1;
@@ -38,25 +41,26 @@ static int build_id__mark_dso_hit(union perf_event *event,
38 return 0; 41 return 0;
39} 42}
40 43
41static int perf_event__exit_del_thread(union perf_event *event, 44static int perf_event__exit_del_thread(struct perf_tool *tool __used,
45 union perf_event *event,
42 struct perf_sample *sample __used, 46 struct perf_sample *sample __used,
43 struct perf_session *session) 47 struct machine *machine)
44{ 48{
45 struct thread *thread = perf_session__findnew(session, event->fork.tid); 49 struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
46 50
47 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, 51 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
48 event->fork.ppid, event->fork.ptid); 52 event->fork.ppid, event->fork.ptid);
49 53
50 if (thread) { 54 if (thread) {
51 rb_erase(&thread->rb_node, &session->threads); 55 rb_erase(&thread->rb_node, &machine->threads);
52 session->last_match = NULL; 56 machine->last_match = NULL;
53 thread__delete(thread); 57 thread__delete(thread);
54 } 58 }
55 59
56 return 0; 60 return 0;
57} 61}
58 62
59struct perf_event_ops build_id__mark_dso_hit_ops = { 63struct perf_tool build_id__mark_dso_hit_ops = {
60 .sample = build_id__mark_dso_hit, 64 .sample = build_id__mark_dso_hit,
61 .mmap = perf_event__process_mmap, 65 .mmap = perf_event__process_mmap,
62 .fork = perf_event__process_task, 66 .fork = perf_event__process_task,
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 5dafb00eaa06..a993ba87d996 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -3,7 +3,7 @@
3 3
4#include "session.h" 4#include "session.h"
5 5
6extern struct perf_event_ops build_id__mark_dso_hit_ops; 6extern struct perf_tool build_id__mark_dso_hit_ops;
7 7
8char *dso__build_id_filename(struct dso *self, char *bf, size_t size); 8char *dso__build_id_filename(struct dso *self, char *bf, size_t size);
9 9
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 9b4ff16cac96..7f9c0f1ae3a9 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -101,6 +101,9 @@ int callchain_append(struct callchain_root *root,
101int callchain_merge(struct callchain_cursor *cursor, 101int callchain_merge(struct callchain_cursor *cursor,
102 struct callchain_root *dst, struct callchain_root *src); 102 struct callchain_root *dst, struct callchain_root *src);
103 103
104struct ip_callchain;
105union perf_event;
106
104bool ip_callchain__valid(struct ip_callchain *chain, 107bool ip_callchain__valid(struct ip_callchain *chain,
105 const union perf_event *event); 108 const union perf_event *event);
106/* 109/*
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 96bee5c46008..dbe2f16b1a1a 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,7 +3,6 @@
3#include "parse-options.h" 3#include "parse-options.h"
4#include "evsel.h" 4#include "evsel.h"
5#include "cgroup.h" 5#include "cgroup.h"
6#include "debugfs.h" /* MAX_PATH, STR() */
7#include "evlist.h" 6#include "evlist.h"
8 7
9int nr_cgroups; 8int nr_cgroups;
@@ -12,7 +11,7 @@ static int
12cgroupfs_find_mountpoint(char *buf, size_t maxlen) 11cgroupfs_find_mountpoint(char *buf, size_t maxlen)
13{ 12{
14 FILE *fp; 13 FILE *fp;
15 char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; 14 char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
16 char *token, *saved_ptr = NULL; 15 char *token, *saved_ptr = NULL;
17 int found = 0; 16 int found = 0;
18 17
@@ -25,8 +24,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
25 * and inspect every cgroupfs mount point to find one that has 24 * and inspect every cgroupfs mount point to find one that has
26 * perf_event subsystem 25 * perf_event subsystem
27 */ 26 */
28 while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %" 27 while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %"
29 STR(MAX_PATH)"s %*d %*d\n", 28 STR(PATH_MAX)"s %*d %*d\n",
30 mountpoint, type, tokens) == 3) { 29 mountpoint, type, tokens) == 3) {
31 30
32 if (!strcmp(type, "cgroup")) { 31 if (!strcmp(type, "cgroup")) {
@@ -57,15 +56,15 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
57 56
58static int open_cgroup(char *name) 57static int open_cgroup(char *name)
59{ 58{
60 char path[MAX_PATH+1]; 59 char path[PATH_MAX + 1];
61 char mnt[MAX_PATH+1]; 60 char mnt[PATH_MAX + 1];
62 int fd; 61 int fd;
63 62
64 63
65 if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1)) 64 if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
66 return -1; 65 return -1;
67 66
68 snprintf(path, MAX_PATH, "%s/%s", mnt, name); 67 snprintf(path, PATH_MAX, "%s/%s", mnt, name);
69 68
70 fd = open(path, O_RDONLY); 69 fd = open(path, O_RDONLY);
71 if (fd == -1) 70 if (fd == -1)
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 80d9598db31a..0deac6a14b65 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -1,5 +1,8 @@
1/* 1/*
2 * GIT - The information manager from hell 2 * config.c
3 *
4 * Helper functions for parsing config items.
5 * Originally copied from GIT source.
3 * 6 *
4 * Copyright (C) Linus Torvalds, 2005 7 * Copyright (C) Linus Torvalds, 2005
5 * Copyright (C) Johannes Schindelin, 2005 8 * Copyright (C) Johannes Schindelin, 2005
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index a88fefc0cc0a..ffc35e748e89 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -2,8 +2,12 @@
2#include "debugfs.h" 2#include "debugfs.h"
3#include "cache.h" 3#include "cache.h"
4 4
5#include <linux/kernel.h>
6#include <sys/mount.h>
7
5static int debugfs_premounted; 8static int debugfs_premounted;
6static char debugfs_mountpoint[MAX_PATH+1]; 9char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug";
10char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events";
7 11
8static const char *debugfs_known_mountpoints[] = { 12static const char *debugfs_known_mountpoints[] = {
9 "/sys/kernel/debug/", 13 "/sys/kernel/debug/",
@@ -62,11 +66,9 @@ const char *debugfs_find_mountpoint(void)
62 /* give up and parse /proc/mounts */ 66 /* give up and parse /proc/mounts */
63 fp = fopen("/proc/mounts", "r"); 67 fp = fopen("/proc/mounts", "r");
64 if (fp == NULL) 68 if (fp == NULL)
65 die("Can't open /proc/mounts for read"); 69 return NULL;
66 70
67 while (fscanf(fp, "%*s %" 71 while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
68 STR(MAX_PATH)
69 "s %99s %*s %*d %*d\n",
70 debugfs_mountpoint, type) == 2) { 72 debugfs_mountpoint, type) == 2) {
71 if (strcmp(type, "debugfs") == 0) 73 if (strcmp(type, "debugfs") == 0)
72 break; 74 break;
@@ -106,6 +108,12 @@ int debugfs_valid_entry(const char *path)
106 return 0; 108 return 0;
107} 109}
108 110
111static void debugfs_set_tracing_events_path(const char *mountpoint)
112{
113 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
114 mountpoint, "tracing/events");
115}
116
109/* mount the debugfs somewhere if it's not mounted */ 117/* mount the debugfs somewhere if it's not mounted */
110 118
111char *debugfs_mount(const char *mountpoint) 119char *debugfs_mount(const char *mountpoint)
@@ -113,7 +121,7 @@ char *debugfs_mount(const char *mountpoint)
113 /* see if it's already mounted */ 121 /* see if it's already mounted */
114 if (debugfs_find_mountpoint()) { 122 if (debugfs_find_mountpoint()) {
115 debugfs_premounted = 1; 123 debugfs_premounted = 1;
116 return debugfs_mountpoint; 124 goto out;
117 } 125 }
118 126
119 /* if not mounted and no argument */ 127 /* if not mounted and no argument */
@@ -129,12 +137,19 @@ char *debugfs_mount(const char *mountpoint)
129 return NULL; 137 return NULL;
130 138
131 /* save the mountpoint */ 139 /* save the mountpoint */
132 strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
133 debugfs_found = 1; 140 debugfs_found = 1;
134 141 strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
142out:
143 debugfs_set_tracing_events_path(debugfs_mountpoint);
135 return debugfs_mountpoint; 144 return debugfs_mountpoint;
136} 145}
137 146
147void debugfs_set_path(const char *mountpoint)
148{
149 snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
150 debugfs_set_tracing_events_path(mountpoint);
151}
152
138/* umount the debugfs */ 153/* umount the debugfs */
139 154
140int debugfs_umount(void) 155int debugfs_umount(void)
@@ -158,7 +173,7 @@ int debugfs_umount(void)
158 173
159int debugfs_write(const char *entry, const char *value) 174int debugfs_write(const char *entry, const char *value)
160{ 175{
161 char path[MAX_PATH+1]; 176 char path[PATH_MAX + 1];
162 int ret, count; 177 int ret, count;
163 int fd; 178 int fd;
164 179
@@ -203,7 +218,7 @@ int debugfs_write(const char *entry, const char *value)
203 */ 218 */
204int debugfs_read(const char *entry, char *buffer, size_t size) 219int debugfs_read(const char *entry, char *buffer, size_t size)
205{ 220{
206 char path[MAX_PATH+1]; 221 char path[PATH_MAX + 1];
207 int ret; 222 int ret;
208 int fd; 223 int fd;
209 224
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 83a02879745f..4a878f735eb0 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -1,25 +1,18 @@
1#ifndef __DEBUGFS_H__ 1#ifndef __DEBUGFS_H__
2#define __DEBUGFS_H__ 2#define __DEBUGFS_H__
3 3
4#include <sys/mount.h> 4const char *debugfs_find_mountpoint(void);
5int debugfs_valid_mountpoint(const char *debugfs);
6int debugfs_valid_entry(const char *path);
7char *debugfs_mount(const char *mountpoint);
8int debugfs_umount(void);
9void debugfs_set_path(const char *mountpoint);
10int debugfs_write(const char *entry, const char *value);
11int debugfs_read(const char *entry, char *buffer, size_t size);
12void debugfs_force_cleanup(void);
13int debugfs_make_path(const char *element, char *buffer, int size);
5 14
6#ifndef MAX_PATH 15extern char debugfs_mountpoint[];
7# define MAX_PATH 256 16extern char tracing_events_path[];
8#endif
9
10#ifndef STR
11# define _STR(x) #x
12# define STR(x) _STR(x)
13#endif
14
15extern const char *debugfs_find_mountpoint(void);
16extern int debugfs_valid_mountpoint(const char *debugfs);
17extern int debugfs_valid_entry(const char *path);
18extern char *debugfs_mount(const char *mountpoint);
19extern int debugfs_umount(void);
20extern int debugfs_write(const char *entry, const char *value);
21extern int debugfs_read(const char *entry, char *buffer, size_t size);
22extern void debugfs_force_cleanup(void);
23extern int debugfs_make_path(const char *element, char *buffer, int size);
24 17
25#endif /* __DEBUGFS_H__ */ 18#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 437f8ca679a0..73ddaf06b8e7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,7 +1,6 @@
1#include <linux/types.h> 1#include <linux/types.h>
2#include "event.h" 2#include "event.h"
3#include "debug.h" 3#include "debug.h"
4#include "session.h"
5#include "sort.h" 4#include "sort.h"
6#include "string.h" 5#include "string.h"
7#include "strlist.h" 6#include "strlist.h"
@@ -44,36 +43,27 @@ static struct perf_sample synth_sample = {
44 .period = 1, 43 .period = 1,
45}; 44};
46 45
47static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid, 46static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
48 int full, perf_event__handler_t process,
49 struct perf_session *session)
50{ 47{
51 char filename[PATH_MAX]; 48 char filename[PATH_MAX];
52 char bf[BUFSIZ]; 49 char bf[BUFSIZ];
53 FILE *fp; 50 FILE *fp;
54 size_t size = 0; 51 size_t size = 0;
55 DIR *tasks; 52 pid_t tgid = -1;
56 struct dirent dirent, *next;
57 pid_t tgid = 0;
58 53
59 snprintf(filename, sizeof(filename), "/proc/%d/status", pid); 54 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
60 55
61 fp = fopen(filename, "r"); 56 fp = fopen(filename, "r");
62 if (fp == NULL) { 57 if (fp == NULL) {
63out_race:
64 /*
65 * We raced with a task exiting - just return:
66 */
67 pr_debug("couldn't open %s\n", filename); 58 pr_debug("couldn't open %s\n", filename);
68 return 0; 59 return 0;
69 } 60 }
70 61
71 memset(&event->comm, 0, sizeof(event->comm)); 62 while (!comm[0] || (tgid < 0)) {
72
73 while (!event->comm.comm[0] || !event->comm.pid) {
74 if (fgets(bf, sizeof(bf), fp) == NULL) { 63 if (fgets(bf, sizeof(bf), fp) == NULL) {
75 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); 64 pr_warning("couldn't get COMM and pgid, malformed %s\n",
76 goto out; 65 filename);
66 break;
77 } 67 }
78 68
79 if (memcmp(bf, "Name:", 5) == 0) { 69 if (memcmp(bf, "Name:", 5) == 0) {
@@ -81,33 +71,65 @@ out_race:
81 while (*name && isspace(*name)) 71 while (*name && isspace(*name))
82 ++name; 72 ++name;
83 size = strlen(name) - 1; 73 size = strlen(name) - 1;
84 memcpy(event->comm.comm, name, size++); 74 if (size >= len)
75 size = len - 1;
76 memcpy(comm, name, size);
77
85 } else if (memcmp(bf, "Tgid:", 5) == 0) { 78 } else if (memcmp(bf, "Tgid:", 5) == 0) {
86 char *tgids = bf + 5; 79 char *tgids = bf + 5;
87 while (*tgids && isspace(*tgids)) 80 while (*tgids && isspace(*tgids))
88 ++tgids; 81 ++tgids;
89 tgid = event->comm.pid = atoi(tgids); 82 tgid = atoi(tgids);
90 } 83 }
91 } 84 }
92 85
86 fclose(fp);
87
88 return tgid;
89}
90
91static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
92 union perf_event *event, pid_t pid,
93 int full,
94 perf_event__handler_t process,
95 struct machine *machine)
96{
97 char filename[PATH_MAX];
98 size_t size;
99 DIR *tasks;
100 struct dirent dirent, *next;
101 pid_t tgid;
102
103 memset(&event->comm, 0, sizeof(event->comm));
104
105 tgid = perf_event__get_comm_tgid(pid, event->comm.comm,
106 sizeof(event->comm.comm));
107 if (tgid < 0)
108 goto out;
109
110 event->comm.pid = tgid;
93 event->comm.header.type = PERF_RECORD_COMM; 111 event->comm.header.type = PERF_RECORD_COMM;
112
113 size = strlen(event->comm.comm) + 1;
94 size = ALIGN(size, sizeof(u64)); 114 size = ALIGN(size, sizeof(u64));
95 memset(event->comm.comm + size, 0, session->id_hdr_size); 115 memset(event->comm.comm + size, 0, machine->id_hdr_size);
96 event->comm.header.size = (sizeof(event->comm) - 116 event->comm.header.size = (sizeof(event->comm) -
97 (sizeof(event->comm.comm) - size) + 117 (sizeof(event->comm.comm) - size) +
98 session->id_hdr_size); 118 machine->id_hdr_size);
99 if (!full) { 119 if (!full) {
100 event->comm.tid = pid; 120 event->comm.tid = pid;
101 121
102 process(event, &synth_sample, session); 122 process(tool, event, &synth_sample, machine);
103 goto out; 123 goto out;
104 } 124 }
105 125
106 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 126 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
107 127
108 tasks = opendir(filename); 128 tasks = opendir(filename);
109 if (tasks == NULL) 129 if (tasks == NULL) {
110 goto out_race; 130 pr_debug("couldn't open %s\n", filename);
131 return 0;
132 }
111 133
112 while (!readdir_r(tasks, &dirent, &next) && next) { 134 while (!readdir_r(tasks, &dirent, &next) && next) {
113 char *end; 135 char *end;
@@ -115,22 +137,32 @@ out_race:
115 if (*end) 137 if (*end)
116 continue; 138 continue;
117 139
140 /* already have tgid; jut want to update the comm */
141 (void) perf_event__get_comm_tgid(pid, event->comm.comm,
142 sizeof(event->comm.comm));
143
144 size = strlen(event->comm.comm) + 1;
145 size = ALIGN(size, sizeof(u64));
146 memset(event->comm.comm + size, 0, machine->id_hdr_size);
147 event->comm.header.size = (sizeof(event->comm) -
148 (sizeof(event->comm.comm) - size) +
149 machine->id_hdr_size);
150
118 event->comm.tid = pid; 151 event->comm.tid = pid;
119 152
120 process(event, &synth_sample, session); 153 process(tool, event, &synth_sample, machine);
121 } 154 }
122 155
123 closedir(tasks); 156 closedir(tasks);
124out: 157out:
125 fclose(fp);
126
127 return tgid; 158 return tgid;
128} 159}
129 160
130static int perf_event__synthesize_mmap_events(union perf_event *event, 161static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
162 union perf_event *event,
131 pid_t pid, pid_t tgid, 163 pid_t pid, pid_t tgid,
132 perf_event__handler_t process, 164 perf_event__handler_t process,
133 struct perf_session *session) 165 struct machine *machine)
134{ 166{
135 char filename[PATH_MAX]; 167 char filename[PATH_MAX];
136 FILE *fp; 168 FILE *fp;
@@ -193,12 +225,12 @@ static int perf_event__synthesize_mmap_events(union perf_event *event,
193 event->mmap.len -= event->mmap.start; 225 event->mmap.len -= event->mmap.start;
194 event->mmap.header.size = (sizeof(event->mmap) - 226 event->mmap.header.size = (sizeof(event->mmap) -
195 (sizeof(event->mmap.filename) - size)); 227 (sizeof(event->mmap.filename) - size));
196 memset(event->mmap.filename + size, 0, session->id_hdr_size); 228 memset(event->mmap.filename + size, 0, machine->id_hdr_size);
197 event->mmap.header.size += session->id_hdr_size; 229 event->mmap.header.size += machine->id_hdr_size;
198 event->mmap.pid = tgid; 230 event->mmap.pid = tgid;
199 event->mmap.tid = pid; 231 event->mmap.tid = pid;
200 232
201 process(event, &synth_sample, session); 233 process(tool, event, &synth_sample, machine);
202 } 234 }
203 } 235 }
204 236
@@ -206,14 +238,14 @@ static int perf_event__synthesize_mmap_events(union perf_event *event,
206 return 0; 238 return 0;
207} 239}
208 240
209int perf_event__synthesize_modules(perf_event__handler_t process, 241int perf_event__synthesize_modules(struct perf_tool *tool,
210 struct perf_session *session, 242 perf_event__handler_t process,
211 struct machine *machine) 243 struct machine *machine)
212{ 244{
213 struct rb_node *nd; 245 struct rb_node *nd;
214 struct map_groups *kmaps = &machine->kmaps; 246 struct map_groups *kmaps = &machine->kmaps;
215 union perf_event *event = zalloc((sizeof(event->mmap) + 247 union perf_event *event = zalloc((sizeof(event->mmap) +
216 session->id_hdr_size)); 248 machine->id_hdr_size));
217 if (event == NULL) { 249 if (event == NULL) {
218 pr_debug("Not enough memory synthesizing mmap event " 250 pr_debug("Not enough memory synthesizing mmap event "
219 "for kernel modules\n"); 251 "for kernel modules\n");
@@ -243,15 +275,15 @@ int perf_event__synthesize_modules(perf_event__handler_t process,
243 event->mmap.header.type = PERF_RECORD_MMAP; 275 event->mmap.header.type = PERF_RECORD_MMAP;
244 event->mmap.header.size = (sizeof(event->mmap) - 276 event->mmap.header.size = (sizeof(event->mmap) -
245 (sizeof(event->mmap.filename) - size)); 277 (sizeof(event->mmap.filename) - size));
246 memset(event->mmap.filename + size, 0, session->id_hdr_size); 278 memset(event->mmap.filename + size, 0, machine->id_hdr_size);
247 event->mmap.header.size += session->id_hdr_size; 279 event->mmap.header.size += machine->id_hdr_size;
248 event->mmap.start = pos->start; 280 event->mmap.start = pos->start;
249 event->mmap.len = pos->end - pos->start; 281 event->mmap.len = pos->end - pos->start;
250 event->mmap.pid = machine->pid; 282 event->mmap.pid = machine->pid;
251 283
252 memcpy(event->mmap.filename, pos->dso->long_name, 284 memcpy(event->mmap.filename, pos->dso->long_name,
253 pos->dso->long_name_len + 1); 285 pos->dso->long_name_len + 1);
254 process(event, &synth_sample, session); 286 process(tool, event, &synth_sample, machine);
255 } 287 }
256 288
257 free(event); 289 free(event);
@@ -260,40 +292,69 @@ int perf_event__synthesize_modules(perf_event__handler_t process,
260 292
261static int __event__synthesize_thread(union perf_event *comm_event, 293static int __event__synthesize_thread(union perf_event *comm_event,
262 union perf_event *mmap_event, 294 union perf_event *mmap_event,
263 pid_t pid, perf_event__handler_t process, 295 pid_t pid, int full,
264 struct perf_session *session) 296 perf_event__handler_t process,
297 struct perf_tool *tool,
298 struct machine *machine)
265{ 299{
266 pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process, 300 pid_t tgid = perf_event__synthesize_comm(tool, comm_event, pid, full,
267 session); 301 process, machine);
268 if (tgid == -1) 302 if (tgid == -1)
269 return -1; 303 return -1;
270 return perf_event__synthesize_mmap_events(mmap_event, pid, tgid, 304 return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
271 process, session); 305 process, machine);
272} 306}
273 307
274int perf_event__synthesize_thread_map(struct thread_map *threads, 308int perf_event__synthesize_thread_map(struct perf_tool *tool,
309 struct thread_map *threads,
275 perf_event__handler_t process, 310 perf_event__handler_t process,
276 struct perf_session *session) 311 struct machine *machine)
277{ 312{
278 union perf_event *comm_event, *mmap_event; 313 union perf_event *comm_event, *mmap_event;
279 int err = -1, thread; 314 int err = -1, thread, j;
280 315
281 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 316 comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
282 if (comm_event == NULL) 317 if (comm_event == NULL)
283 goto out; 318 goto out;
284 319
285 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); 320 mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size);
286 if (mmap_event == NULL) 321 if (mmap_event == NULL)
287 goto out_free_comm; 322 goto out_free_comm;
288 323
289 err = 0; 324 err = 0;
290 for (thread = 0; thread < threads->nr; ++thread) { 325 for (thread = 0; thread < threads->nr; ++thread) {
291 if (__event__synthesize_thread(comm_event, mmap_event, 326 if (__event__synthesize_thread(comm_event, mmap_event,
292 threads->map[thread], 327 threads->map[thread], 0,
293 process, session)) { 328 process, tool, machine)) {
294 err = -1; 329 err = -1;
295 break; 330 break;
296 } 331 }
332
333 /*
334 * comm.pid is set to thread group id by
335 * perf_event__synthesize_comm
336 */
337 if ((int) comm_event->comm.pid != threads->map[thread]) {
338 bool need_leader = true;
339
340 /* is thread group leader in thread_map? */
341 for (j = 0; j < threads->nr; ++j) {
342 if ((int) comm_event->comm.pid == threads->map[j]) {
343 need_leader = false;
344 break;
345 }
346 }
347
348 /* if not, generate events for it */
349 if (need_leader &&
350 __event__synthesize_thread(comm_event,
351 mmap_event,
352 comm_event->comm.pid, 0,
353 process, tool, machine)) {
354 err = -1;
355 break;
356 }
357 }
297 } 358 }
298 free(mmap_event); 359 free(mmap_event);
299out_free_comm: 360out_free_comm:
@@ -302,19 +363,20 @@ out:
302 return err; 363 return err;
303} 364}
304 365
305int perf_event__synthesize_threads(perf_event__handler_t process, 366int perf_event__synthesize_threads(struct perf_tool *tool,
306 struct perf_session *session) 367 perf_event__handler_t process,
368 struct machine *machine)
307{ 369{
308 DIR *proc; 370 DIR *proc;
309 struct dirent dirent, *next; 371 struct dirent dirent, *next;
310 union perf_event *comm_event, *mmap_event; 372 union perf_event *comm_event, *mmap_event;
311 int err = -1; 373 int err = -1;
312 374
313 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 375 comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
314 if (comm_event == NULL) 376 if (comm_event == NULL)
315 goto out; 377 goto out;
316 378
317 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size); 379 mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size);
318 if (mmap_event == NULL) 380 if (mmap_event == NULL)
319 goto out_free_comm; 381 goto out_free_comm;
320 382
@@ -329,8 +391,8 @@ int perf_event__synthesize_threads(perf_event__handler_t process,
329 if (*end) /* only interested in proper numerical dirents */ 391 if (*end) /* only interested in proper numerical dirents */
330 continue; 392 continue;
331 393
332 __event__synthesize_thread(comm_event, mmap_event, pid, 394 __event__synthesize_thread(comm_event, mmap_event, pid, 1,
333 process, session); 395 process, tool, machine);
334 } 396 }
335 397
336 closedir(proc); 398 closedir(proc);
@@ -365,8 +427,8 @@ static int find_symbol_cb(void *arg, const char *name, char type,
365 return 1; 427 return 1;
366} 428}
367 429
368int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, 430int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
369 struct perf_session *session, 431 perf_event__handler_t process,
370 struct machine *machine, 432 struct machine *machine,
371 const char *symbol_name) 433 const char *symbol_name)
372{ 434{
@@ -383,7 +445,7 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
383 */ 445 */
384 struct process_symbol_args args = { .name = symbol_name, }; 446 struct process_symbol_args args = { .name = symbol_name, };
385 union perf_event *event = zalloc((sizeof(event->mmap) + 447 union perf_event *event = zalloc((sizeof(event->mmap) +
386 session->id_hdr_size)); 448 machine->id_hdr_size));
387 if (event == NULL) { 449 if (event == NULL) {
388 pr_debug("Not enough memory synthesizing mmap event " 450 pr_debug("Not enough memory synthesizing mmap event "
389 "for kernel modules\n"); 451 "for kernel modules\n");
@@ -417,25 +479,32 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
417 size = ALIGN(size, sizeof(u64)); 479 size = ALIGN(size, sizeof(u64));
418 event->mmap.header.type = PERF_RECORD_MMAP; 480 event->mmap.header.type = PERF_RECORD_MMAP;
419 event->mmap.header.size = (sizeof(event->mmap) - 481 event->mmap.header.size = (sizeof(event->mmap) -
420 (sizeof(event->mmap.filename) - size) + session->id_hdr_size); 482 (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
421 event->mmap.pgoff = args.start; 483 event->mmap.pgoff = args.start;
422 event->mmap.start = map->start; 484 event->mmap.start = map->start;
423 event->mmap.len = map->end - event->mmap.start; 485 event->mmap.len = map->end - event->mmap.start;
424 event->mmap.pid = machine->pid; 486 event->mmap.pid = machine->pid;
425 487
426 err = process(event, &synth_sample, session); 488 err = process(tool, event, &synth_sample, machine);
427 free(event); 489 free(event);
428 490
429 return err; 491 return err;
430} 492}
431 493
432int perf_event__process_comm(union perf_event *event, 494size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
495{
496 return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid);
497}
498
499int perf_event__process_comm(struct perf_tool *tool __used,
500 union perf_event *event,
433 struct perf_sample *sample __used, 501 struct perf_sample *sample __used,
434 struct perf_session *session) 502 struct machine *machine)
435{ 503{
436 struct thread *thread = perf_session__findnew(session, event->comm.tid); 504 struct thread *thread = machine__findnew_thread(machine, event->comm.tid);
437 505
438 dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid); 506 if (dump_trace)
507 perf_event__fprintf_comm(event, stdout);
439 508
440 if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { 509 if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
441 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 510 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
@@ -445,13 +514,13 @@ int perf_event__process_comm(union perf_event *event,
445 return 0; 514 return 0;
446} 515}
447 516
448int perf_event__process_lost(union perf_event *event, 517int perf_event__process_lost(struct perf_tool *tool __used,
518 union perf_event *event,
449 struct perf_sample *sample __used, 519 struct perf_sample *sample __used,
450 struct perf_session *session) 520 struct machine *machine __used)
451{ 521{
452 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", 522 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
453 event->lost.id, event->lost.lost); 523 event->lost.id, event->lost.lost);
454 session->hists.stats.total_lost += event->lost.lost;
455 return 0; 524 return 0;
456} 525}
457 526
@@ -468,21 +537,15 @@ static void perf_event__set_kernel_mmap_len(union perf_event *event,
468 maps[MAP__FUNCTION]->end = ~0ULL; 537 maps[MAP__FUNCTION]->end = ~0ULL;
469} 538}
470 539
471static int perf_event__process_kernel_mmap(union perf_event *event, 540static int perf_event__process_kernel_mmap(struct perf_tool *tool __used,
472 struct perf_session *session) 541 union perf_event *event,
542 struct machine *machine)
473{ 543{
474 struct map *map; 544 struct map *map;
475 char kmmap_prefix[PATH_MAX]; 545 char kmmap_prefix[PATH_MAX];
476 struct machine *machine;
477 enum dso_kernel_type kernel_type; 546 enum dso_kernel_type kernel_type;
478 bool is_kernel_mmap; 547 bool is_kernel_mmap;
479 548
480 machine = perf_session__findnew_machine(session, event->mmap.pid);
481 if (!machine) {
482 pr_err("Can't find id %d's machine\n", event->mmap.pid);
483 goto out_problem;
484 }
485
486 machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix)); 549 machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
487 if (machine__is_host(machine)) 550 if (machine__is_host(machine))
488 kernel_type = DSO_TYPE_KERNEL; 551 kernel_type = DSO_TYPE_KERNEL;
@@ -549,9 +612,9 @@ static int perf_event__process_kernel_mmap(union perf_event *event,
549 * time /proc/sys/kernel/kptr_restrict was non zero. 612 * time /proc/sys/kernel/kptr_restrict was non zero.
550 */ 613 */
551 if (event->mmap.pgoff != 0) { 614 if (event->mmap.pgoff != 0) {
552 perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, 615 maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
553 symbol_name, 616 symbol_name,
554 event->mmap.pgoff); 617 event->mmap.pgoff);
555 } 618 }
556 619
557 if (machine__is_default_guest(machine)) { 620 if (machine__is_default_guest(machine)) {
@@ -567,32 +630,35 @@ out_problem:
567 return -1; 630 return -1;
568} 631}
569 632
570int perf_event__process_mmap(union perf_event *event, 633size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
634{
635 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
636 event->mmap.pid, event->mmap.tid, event->mmap.start,
637 event->mmap.len, event->mmap.pgoff, event->mmap.filename);
638}
639
640int perf_event__process_mmap(struct perf_tool *tool,
641 union perf_event *event,
571 struct perf_sample *sample __used, 642 struct perf_sample *sample __used,
572 struct perf_session *session) 643 struct machine *machine)
573{ 644{
574 struct machine *machine;
575 struct thread *thread; 645 struct thread *thread;
576 struct map *map; 646 struct map *map;
577 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 647 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
578 int ret = 0; 648 int ret = 0;
579 649
580 dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", 650 if (dump_trace)
581 event->mmap.pid, event->mmap.tid, event->mmap.start, 651 perf_event__fprintf_mmap(event, stdout);
582 event->mmap.len, event->mmap.pgoff, event->mmap.filename);
583 652
584 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || 653 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
585 cpumode == PERF_RECORD_MISC_KERNEL) { 654 cpumode == PERF_RECORD_MISC_KERNEL) {
586 ret = perf_event__process_kernel_mmap(event, session); 655 ret = perf_event__process_kernel_mmap(tool, event, machine);
587 if (ret < 0) 656 if (ret < 0)
588 goto out_problem; 657 goto out_problem;
589 return 0; 658 return 0;
590 } 659 }
591 660
592 machine = perf_session__find_host_machine(session); 661 thread = machine__findnew_thread(machine, event->mmap.pid);
593 if (machine == NULL)
594 goto out_problem;
595 thread = perf_session__findnew(session, event->mmap.pid);
596 if (thread == NULL) 662 if (thread == NULL)
597 goto out_problem; 663 goto out_problem;
598 map = map__new(&machine->user_dsos, event->mmap.start, 664 map = map__new(&machine->user_dsos, event->mmap.start,
@@ -610,18 +676,26 @@ out_problem:
610 return 0; 676 return 0;
611} 677}
612 678
613int perf_event__process_task(union perf_event *event, 679size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
680{
681 return fprintf(fp, "(%d:%d):(%d:%d)\n",
682 event->fork.pid, event->fork.tid,
683 event->fork.ppid, event->fork.ptid);
684}
685
686int perf_event__process_task(struct perf_tool *tool __used,
687 union perf_event *event,
614 struct perf_sample *sample __used, 688 struct perf_sample *sample __used,
615 struct perf_session *session) 689 struct machine *machine)
616{ 690{
617 struct thread *thread = perf_session__findnew(session, event->fork.tid); 691 struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
618 struct thread *parent = perf_session__findnew(session, event->fork.ptid); 692 struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
619 693
620 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, 694 if (dump_trace)
621 event->fork.ppid, event->fork.ptid); 695 perf_event__fprintf_task(event, stdout);
622 696
623 if (event->header.type == PERF_RECORD_EXIT) { 697 if (event->header.type == PERF_RECORD_EXIT) {
624 perf_session__remove_thread(session, thread); 698 machine__remove_thread(machine, thread);
625 return 0; 699 return 0;
626 } 700 }
627 701
@@ -634,22 +708,45 @@ int perf_event__process_task(union perf_event *event,
634 return 0; 708 return 0;
635} 709}
636 710
637int perf_event__process(union perf_event *event, struct perf_sample *sample, 711size_t perf_event__fprintf(union perf_event *event, FILE *fp)
638 struct perf_session *session) 712{
713 size_t ret = fprintf(fp, "PERF_RECORD_%s",
714 perf_event__name(event->header.type));
715
716 switch (event->header.type) {
717 case PERF_RECORD_COMM:
718 ret += perf_event__fprintf_comm(event, fp);
719 break;
720 case PERF_RECORD_FORK:
721 case PERF_RECORD_EXIT:
722 ret += perf_event__fprintf_task(event, fp);
723 break;
724 case PERF_RECORD_MMAP:
725 ret += perf_event__fprintf_mmap(event, fp);
726 break;
727 default:
728 ret += fprintf(fp, "\n");
729 }
730
731 return ret;
732}
733
734int perf_event__process(struct perf_tool *tool, union perf_event *event,
735 struct perf_sample *sample, struct machine *machine)
639{ 736{
640 switch (event->header.type) { 737 switch (event->header.type) {
641 case PERF_RECORD_COMM: 738 case PERF_RECORD_COMM:
642 perf_event__process_comm(event, sample, session); 739 perf_event__process_comm(tool, event, sample, machine);
643 break; 740 break;
644 case PERF_RECORD_MMAP: 741 case PERF_RECORD_MMAP:
645 perf_event__process_mmap(event, sample, session); 742 perf_event__process_mmap(tool, event, sample, machine);
646 break; 743 break;
647 case PERF_RECORD_FORK: 744 case PERF_RECORD_FORK:
648 case PERF_RECORD_EXIT: 745 case PERF_RECORD_EXIT:
649 perf_event__process_task(event, sample, session); 746 perf_event__process_task(tool, event, sample, machine);
650 break; 747 break;
651 case PERF_RECORD_LOST: 748 case PERF_RECORD_LOST:
652 perf_event__process_lost(event, sample, session); 749 perf_event__process_lost(tool, event, sample, machine);
653 default: 750 default:
654 break; 751 break;
655 } 752 }
@@ -658,36 +755,29 @@ int perf_event__process(union perf_event *event, struct perf_sample *sample,
658} 755}
659 756
660void thread__find_addr_map(struct thread *self, 757void thread__find_addr_map(struct thread *self,
661 struct perf_session *session, u8 cpumode, 758 struct machine *machine, u8 cpumode,
662 enum map_type type, pid_t pid, u64 addr, 759 enum map_type type, u64 addr,
663 struct addr_location *al) 760 struct addr_location *al)
664{ 761{
665 struct map_groups *mg = &self->mg; 762 struct map_groups *mg = &self->mg;
666 struct machine *machine = NULL;
667 763
668 al->thread = self; 764 al->thread = self;
669 al->addr = addr; 765 al->addr = addr;
670 al->cpumode = cpumode; 766 al->cpumode = cpumode;
671 al->filtered = false; 767 al->filtered = false;
672 768
769 if (machine == NULL) {
770 al->map = NULL;
771 return;
772 }
773
673 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { 774 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
674 al->level = 'k'; 775 al->level = 'k';
675 machine = perf_session__find_host_machine(session);
676 if (machine == NULL) {
677 al->map = NULL;
678 return;
679 }
680 mg = &machine->kmaps; 776 mg = &machine->kmaps;
681 } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { 777 } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
682 al->level = '.'; 778 al->level = '.';
683 machine = perf_session__find_host_machine(session);
684 } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { 779 } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
685 al->level = 'g'; 780 al->level = 'g';
686 machine = perf_session__find_machine(session, pid);
687 if (machine == NULL) {
688 al->map = NULL;
689 return;
690 }
691 mg = &machine->kmaps; 781 mg = &machine->kmaps;
692 } else { 782 } else {
693 /* 783 /*
@@ -733,13 +823,12 @@ try_again:
733 al->addr = al->map->map_ip(al->map, al->addr); 823 al->addr = al->map->map_ip(al->map, al->addr);
734} 824}
735 825
736void thread__find_addr_location(struct thread *self, 826void thread__find_addr_location(struct thread *thread, struct machine *machine,
737 struct perf_session *session, u8 cpumode, 827 u8 cpumode, enum map_type type, u64 addr,
738 enum map_type type, pid_t pid, u64 addr,
739 struct addr_location *al, 828 struct addr_location *al,
740 symbol_filter_t filter) 829 symbol_filter_t filter)
741{ 830{
742 thread__find_addr_map(self, session, cpumode, type, pid, addr, al); 831 thread__find_addr_map(thread, machine, cpumode, type, addr, al);
743 if (al->map != NULL) 832 if (al->map != NULL)
744 al->sym = map__find_symbol(al->map, al->addr, filter); 833 al->sym = map__find_symbol(al->map, al->addr, filter);
745 else 834 else
@@ -747,13 +836,13 @@ void thread__find_addr_location(struct thread *self,
747} 836}
748 837
749int perf_event__preprocess_sample(const union perf_event *event, 838int perf_event__preprocess_sample(const union perf_event *event,
750 struct perf_session *session, 839 struct machine *machine,
751 struct addr_location *al, 840 struct addr_location *al,
752 struct perf_sample *sample, 841 struct perf_sample *sample,
753 symbol_filter_t filter) 842 symbol_filter_t filter)
754{ 843{
755 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 844 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
756 struct thread *thread = perf_session__findnew(session, event->ip.pid); 845 struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
757 846
758 if (thread == NULL) 847 if (thread == NULL)
759 return -1; 848 return -1;
@@ -764,18 +853,18 @@ int perf_event__preprocess_sample(const union perf_event *event,
764 853
765 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 854 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
766 /* 855 /*
767 * Have we already created the kernel maps for the host machine? 856 * Have we already created the kernel maps for this machine?
768 * 857 *
769 * This should have happened earlier, when we processed the kernel MMAP 858 * This should have happened earlier, when we processed the kernel MMAP
770 * events, but for older perf.data files there was no such thing, so do 859 * events, but for older perf.data files there was no such thing, so do
771 * it now. 860 * it now.
772 */ 861 */
773 if (cpumode == PERF_RECORD_MISC_KERNEL && 862 if (cpumode == PERF_RECORD_MISC_KERNEL &&
774 session->host_machine.vmlinux_maps[MAP__FUNCTION] == NULL) 863 machine->vmlinux_maps[MAP__FUNCTION] == NULL)
775 machine__create_kernel_maps(&session->host_machine); 864 machine__create_kernel_maps(machine);
776 865
777 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 866 thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
778 event->ip.pid, event->ip.ip, al); 867 event->ip.ip, al);
779 dump_printf(" ...... dso: %s\n", 868 dump_printf(" ...... dso: %s\n",
780 al->map ? al->map->dso->long_name : 869 al->map ? al->map->dso->long_name :
781 al->level == 'H' ? "[hypervisor]" : "<not found>"); 870 al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -783,13 +872,14 @@ int perf_event__preprocess_sample(const union perf_event *event,
783 al->cpu = sample->cpu; 872 al->cpu = sample->cpu;
784 873
785 if (al->map) { 874 if (al->map) {
875 struct dso *dso = al->map->dso;
876
786 if (symbol_conf.dso_list && 877 if (symbol_conf.dso_list &&
787 (!al->map || !al->map->dso || 878 (!dso || !(strlist__has_entry(symbol_conf.dso_list,
788 !(strlist__has_entry(symbol_conf.dso_list, 879 dso->short_name) ||
789 al->map->dso->short_name) || 880 (dso->short_name != dso->long_name &&
790 (al->map->dso->short_name != al->map->dso->long_name && 881 strlist__has_entry(symbol_conf.dso_list,
791 strlist__has_entry(symbol_conf.dso_list, 882 dso->long_name)))))
792 al->map->dso->long_name)))))
793 goto out_filtered; 883 goto out_filtered;
794 884
795 al->sym = map__find_symbol(al->map, al->addr, filter); 885 al->sym = map__find_symbol(al->map, al->addr, filter);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 357a85b85248..cbdeaad9c5e5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -2,6 +2,7 @@
2#define __PERF_RECORD_H 2#define __PERF_RECORD_H
3 3
4#include <limits.h> 4#include <limits.h>
5#include <stdio.h>
5 6
6#include "../perf.h" 7#include "../perf.h"
7#include "map.h" 8#include "map.h"
@@ -141,43 +142,54 @@ union perf_event {
141 142
142void perf_event__print_totals(void); 143void perf_event__print_totals(void);
143 144
144struct perf_session; 145struct perf_tool;
145struct thread_map; 146struct thread_map;
146 147
147typedef int (*perf_event__handler_synth_t)(union perf_event *event, 148typedef int (*perf_event__handler_t)(struct perf_tool *tool,
148 struct perf_session *session); 149 union perf_event *event,
149typedef int (*perf_event__handler_t)(union perf_event *event,
150 struct perf_sample *sample, 150 struct perf_sample *sample,
151 struct perf_session *session); 151 struct machine *machine);
152 152
153int perf_event__synthesize_thread_map(struct thread_map *threads, 153int perf_event__synthesize_thread_map(struct perf_tool *tool,
154 struct thread_map *threads,
154 perf_event__handler_t process, 155 perf_event__handler_t process,
155 struct perf_session *session); 156 struct machine *machine);
156int perf_event__synthesize_threads(perf_event__handler_t process, 157int perf_event__synthesize_threads(struct perf_tool *tool,
157 struct perf_session *session); 158 perf_event__handler_t process,
158int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, 159 struct machine *machine);
159 struct perf_session *session, 160int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
161 perf_event__handler_t process,
160 struct machine *machine, 162 struct machine *machine,
161 const char *symbol_name); 163 const char *symbol_name);
162 164
163int perf_event__synthesize_modules(perf_event__handler_t process, 165int perf_event__synthesize_modules(struct perf_tool *tool,
164 struct perf_session *session, 166 perf_event__handler_t process,
165 struct machine *machine); 167 struct machine *machine);
166 168
167int perf_event__process_comm(union perf_event *event, struct perf_sample *sample, 169int perf_event__process_comm(struct perf_tool *tool,
168 struct perf_session *session); 170 union perf_event *event,
169int perf_event__process_lost(union perf_event *event, struct perf_sample *sample, 171 struct perf_sample *sample,
170 struct perf_session *session); 172 struct machine *machine);
171int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample, 173int perf_event__process_lost(struct perf_tool *tool,
172 struct perf_session *session); 174 union perf_event *event,
173int perf_event__process_task(union perf_event *event, struct perf_sample *sample, 175 struct perf_sample *sample,
174 struct perf_session *session); 176 struct machine *machine);
175int perf_event__process(union perf_event *event, struct perf_sample *sample, 177int perf_event__process_mmap(struct perf_tool *tool,
176 struct perf_session *session); 178 union perf_event *event,
179 struct perf_sample *sample,
180 struct machine *machine);
181int perf_event__process_task(struct perf_tool *tool,
182 union perf_event *event,
183 struct perf_sample *sample,
184 struct machine *machine);
185int perf_event__process(struct perf_tool *tool,
186 union perf_event *event,
187 struct perf_sample *sample,
188 struct machine *machine);
177 189
178struct addr_location; 190struct addr_location;
179int perf_event__preprocess_sample(const union perf_event *self, 191int perf_event__preprocess_sample(const union perf_event *self,
180 struct perf_session *session, 192 struct machine *machine,
181 struct addr_location *al, 193 struct addr_location *al,
182 struct perf_sample *sample, 194 struct perf_sample *sample,
183 symbol_filter_t filter); 195 symbol_filter_t filter);
@@ -187,5 +199,13 @@ const char *perf_event__name(unsigned int id);
187int perf_event__parse_sample(const union perf_event *event, u64 type, 199int perf_event__parse_sample(const union perf_event *event, u64 type,
188 int sample_size, bool sample_id_all, 200 int sample_size, bool sample_id_all,
189 struct perf_sample *sample, bool swapped); 201 struct perf_sample *sample, bool swapped);
202int perf_event__synthesize_sample(union perf_event *event, u64 type,
203 const struct perf_sample *sample,
204 bool swapped);
205
206size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
207size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
208size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
209size_t perf_event__fprintf(union perf_event *event, FILE *fp);
190 210
191#endif /* __PERF_RECORD_H */ 211#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index fbb4b4ab9cc6..fa1837088ca8 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -6,12 +6,16 @@
6 * 6 *
7 * Released under the GPL v2. (and only v2, not any later version) 7 * Released under the GPL v2. (and only v2, not any later version)
8 */ 8 */
9#include "util.h"
10#include "debugfs.h"
9#include <poll.h> 11#include <poll.h>
10#include "cpumap.h" 12#include "cpumap.h"
11#include "thread_map.h" 13#include "thread_map.h"
12#include "evlist.h" 14#include "evlist.h"
13#include "evsel.h" 15#include "evsel.h"
14#include "util.h" 16#include <unistd.h>
17
18#include "parse-events.h"
15 19
16#include <sys/mman.h> 20#include <sys/mman.h>
17 21
@@ -30,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
30 INIT_HLIST_HEAD(&evlist->heads[i]); 34 INIT_HLIST_HEAD(&evlist->heads[i]);
31 INIT_LIST_HEAD(&evlist->entries); 35 INIT_LIST_HEAD(&evlist->entries);
32 perf_evlist__set_maps(evlist, cpus, threads); 36 perf_evlist__set_maps(evlist, cpus, threads);
37 evlist->workload.pid = -1;
33} 38}
34 39
35struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, 40struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
@@ -43,6 +48,22 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
43 return evlist; 48 return evlist;
44} 49}
45 50
51void perf_evlist__config_attrs(struct perf_evlist *evlist,
52 struct perf_record_opts *opts)
53{
54 struct perf_evsel *evsel;
55
56 if (evlist->cpus->map[0] < 0)
57 opts->no_inherit = true;
58
59 list_for_each_entry(evsel, &evlist->entries, node) {
60 perf_evsel__config(evsel, opts);
61
62 if (evlist->nr_entries > 1)
63 evsel->attr.sample_type |= PERF_SAMPLE_ID;
64 }
65}
66
46static void perf_evlist__purge(struct perf_evlist *evlist) 67static void perf_evlist__purge(struct perf_evlist *evlist)
47{ 68{
48 struct perf_evsel *pos, *n; 69 struct perf_evsel *pos, *n;
@@ -76,6 +97,14 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
76 ++evlist->nr_entries; 97 ++evlist->nr_entries;
77} 98}
78 99
100static void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
101 struct list_head *list,
102 int nr_entries)
103{
104 list_splice_tail(list, &evlist->entries);
105 evlist->nr_entries += nr_entries;
106}
107
79int perf_evlist__add_default(struct perf_evlist *evlist) 108int perf_evlist__add_default(struct perf_evlist *evlist)
80{ 109{
81 struct perf_event_attr attr = { 110 struct perf_event_attr attr = {
@@ -100,6 +129,126 @@ error:
100 return -ENOMEM; 129 return -ENOMEM;
101} 130}
102 131
132int perf_evlist__add_attrs(struct perf_evlist *evlist,
133 struct perf_event_attr *attrs, size_t nr_attrs)
134{
135 struct perf_evsel *evsel, *n;
136 LIST_HEAD(head);
137 size_t i;
138
139 for (i = 0; i < nr_attrs; i++) {
140 evsel = perf_evsel__new(attrs + i, evlist->nr_entries + i);
141 if (evsel == NULL)
142 goto out_delete_partial_list;
143 list_add_tail(&evsel->node, &head);
144 }
145
146 perf_evlist__splice_list_tail(evlist, &head, nr_attrs);
147
148 return 0;
149
150out_delete_partial_list:
151 list_for_each_entry_safe(evsel, n, &head, node)
152 perf_evsel__delete(evsel);
153 return -1;
154}
155
156static int trace_event__id(const char *evname)
157{
158 char *filename, *colon;
159 int err = -1, fd;
160
161 if (asprintf(&filename, "%s/%s/id", tracing_events_path, evname) < 0)
162 return -1;
163
164 colon = strrchr(filename, ':');
165 if (colon != NULL)
166 *colon = '/';
167
168 fd = open(filename, O_RDONLY);
169 if (fd >= 0) {
170 char id[16];
171 if (read(fd, id, sizeof(id)) > 0)
172 err = atoi(id);
173 close(fd);
174 }
175
176 free(filename);
177 return err;
178}
179
180int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
181 const char *tracepoints[],
182 size_t nr_tracepoints)
183{
184 int err;
185 size_t i;
186 struct perf_event_attr *attrs = zalloc(nr_tracepoints * sizeof(*attrs));
187
188 if (attrs == NULL)
189 return -1;
190
191 for (i = 0; i < nr_tracepoints; i++) {
192 err = trace_event__id(tracepoints[i]);
193
194 if (err < 0)
195 goto out_free_attrs;
196
197 attrs[i].type = PERF_TYPE_TRACEPOINT;
198 attrs[i].config = err;
199 attrs[i].sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
200 PERF_SAMPLE_CPU);
201 attrs[i].sample_period = 1;
202 }
203
204 err = perf_evlist__add_attrs(evlist, attrs, nr_tracepoints);
205out_free_attrs:
206 free(attrs);
207 return err;
208}
209
210static struct perf_evsel *
211 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
212{
213 struct perf_evsel *evsel;
214
215 list_for_each_entry(evsel, &evlist->entries, node) {
216 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
217 (int)evsel->attr.config == id)
218 return evsel;
219 }
220
221 return NULL;
222}
223
224int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
225 const struct perf_evsel_str_handler *assocs,
226 size_t nr_assocs)
227{
228 struct perf_evsel *evsel;
229 int err;
230 size_t i;
231
232 for (i = 0; i < nr_assocs; i++) {
233 err = trace_event__id(assocs[i].name);
234 if (err < 0)
235 goto out;
236
237 evsel = perf_evlist__find_tracepoint_by_id(evlist, err);
238 if (evsel == NULL)
239 continue;
240
241 err = -EEXIST;
242 if (evsel->handler.func != NULL)
243 goto out;
244 evsel->handler.func = assocs[i].handler;
245 }
246
247 err = 0;
248out:
249 return err;
250}
251
103void perf_evlist__disable(struct perf_evlist *evlist) 252void perf_evlist__disable(struct perf_evlist *evlist)
104{ 253{
105 int cpu, thread; 254 int cpu, thread;
@@ -126,7 +275,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
126 } 275 }
127} 276}
128 277
129int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 278static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
130{ 279{
131 int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; 280 int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
132 evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); 281 evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
@@ -282,7 +431,7 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
282 evlist->mmap = NULL; 431 evlist->mmap = NULL;
283} 432}
284 433
285int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 434static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
286{ 435{
287 evlist->nr_mmaps = evlist->cpus->nr; 436 evlist->nr_mmaps = evlist->cpus->nr;
288 if (evlist->cpus->map[0] == -1) 437 if (evlist->cpus->map[0] == -1)
@@ -298,8 +447,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
298 evlist->mmap[idx].mask = mask; 447 evlist->mmap[idx].mask = mask;
299 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot, 448 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
300 MAP_SHARED, fd, 0); 449 MAP_SHARED, fd, 0);
301 if (evlist->mmap[idx].base == MAP_FAILED) 450 if (evlist->mmap[idx].base == MAP_FAILED) {
451 evlist->mmap[idx].base = NULL;
302 return -1; 452 return -1;
453 }
303 454
304 perf_evlist__add_pollfd(evlist, fd); 455 perf_evlist__add_pollfd(evlist, fd);
305 return 0; 456 return 0;
@@ -400,14 +551,22 @@ out_unmap:
400 * 551 *
401 * Using perf_evlist__read_on_cpu does this automatically. 552 * Using perf_evlist__read_on_cpu does this automatically.
402 */ 553 */
403int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) 554int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
555 bool overwrite)
404{ 556{
405 unsigned int page_size = sysconf(_SC_PAGE_SIZE); 557 unsigned int page_size = sysconf(_SC_PAGE_SIZE);
406 int mask = pages * page_size - 1;
407 struct perf_evsel *evsel; 558 struct perf_evsel *evsel;
408 const struct cpu_map *cpus = evlist->cpus; 559 const struct cpu_map *cpus = evlist->cpus;
409 const struct thread_map *threads = evlist->threads; 560 const struct thread_map *threads = evlist->threads;
410 int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); 561 int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask;
562
563 /* 512 kiB: default amount of unprivileged mlocked memory */
564 if (pages == UINT_MAX)
565 pages = (512 * 1024) / page_size;
566 else if (!is_power_of_2(pages))
567 return -EINVAL;
568
569 mask = pages * page_size - 1;
411 570
412 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 571 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
413 return -ENOMEM; 572 return -ENOMEM;
@@ -512,6 +671,38 @@ u64 perf_evlist__sample_type(const struct perf_evlist *evlist)
512 return first->attr.sample_type; 671 return first->attr.sample_type;
513} 672}
514 673
674u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist)
675{
676 struct perf_evsel *first;
677 struct perf_sample *data;
678 u64 sample_type;
679 u16 size = 0;
680
681 first = list_entry(evlist->entries.next, struct perf_evsel, node);
682
683 if (!first->attr.sample_id_all)
684 goto out;
685
686 sample_type = first->attr.sample_type;
687
688 if (sample_type & PERF_SAMPLE_TID)
689 size += sizeof(data->tid) * 2;
690
691 if (sample_type & PERF_SAMPLE_TIME)
692 size += sizeof(data->time);
693
694 if (sample_type & PERF_SAMPLE_ID)
695 size += sizeof(data->id);
696
697 if (sample_type & PERF_SAMPLE_STREAM_ID)
698 size += sizeof(data->stream_id);
699
700 if (sample_type & PERF_SAMPLE_CPU)
701 size += sizeof(data->cpu) * 2;
702out:
703 return size;
704}
705
515bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) 706bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist)
516{ 707{
517 struct perf_evsel *pos, *first; 708 struct perf_evsel *pos, *first;
@@ -569,3 +760,97 @@ out_err:
569 760
570 return err; 761 return err;
571} 762}
763
764int perf_evlist__prepare_workload(struct perf_evlist *evlist,
765 struct perf_record_opts *opts,
766 const char *argv[])
767{
768 int child_ready_pipe[2], go_pipe[2];
769 char bf;
770
771 if (pipe(child_ready_pipe) < 0) {
772 perror("failed to create 'ready' pipe");
773 return -1;
774 }
775
776 if (pipe(go_pipe) < 0) {
777 perror("failed to create 'go' pipe");
778 goto out_close_ready_pipe;
779 }
780
781 evlist->workload.pid = fork();
782 if (evlist->workload.pid < 0) {
783 perror("failed to fork");
784 goto out_close_pipes;
785 }
786
787 if (!evlist->workload.pid) {
788 if (opts->pipe_output)
789 dup2(2, 1);
790
791 close(child_ready_pipe[0]);
792 close(go_pipe[1]);
793 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
794
795 /*
796 * Do a dummy execvp to get the PLT entry resolved,
797 * so we avoid the resolver overhead on the real
798 * execvp call.
799 */
800 execvp("", (char **)argv);
801
802 /*
803 * Tell the parent we're ready to go
804 */
805 close(child_ready_pipe[1]);
806
807 /*
808 * Wait until the parent tells us to go.
809 */
810 if (read(go_pipe[0], &bf, 1) == -1)
811 perror("unable to read pipe");
812
813 execvp(argv[0], (char **)argv);
814
815 perror(argv[0]);
816 kill(getppid(), SIGUSR1);
817 exit(-1);
818 }
819
820 if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1)
821 evlist->threads->map[0] = evlist->workload.pid;
822
823 close(child_ready_pipe[1]);
824 close(go_pipe[0]);
825 /*
826 * wait for child to settle
827 */
828 if (read(child_ready_pipe[0], &bf, 1) == -1) {
829 perror("unable to read pipe");
830 goto out_close_pipes;
831 }
832
833 evlist->workload.cork_fd = go_pipe[1];
834 close(child_ready_pipe[0]);
835 return 0;
836
837out_close_pipes:
838 close(go_pipe[0]);
839 close(go_pipe[1]);
840out_close_ready_pipe:
841 close(child_ready_pipe[0]);
842 close(child_ready_pipe[1]);
843 return -1;
844}
845
846int perf_evlist__start_workload(struct perf_evlist *evlist)
847{
848 if (evlist->workload.cork_fd > 0) {
849 /*
850 * Remove the cork, let it rip!
851 */
852 return close(evlist->workload.cork_fd);
853 }
854
855 return 0;
856}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 1779ffef7828..8922aeed0467 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -2,12 +2,16 @@
2#define __PERF_EVLIST_H 1 2#define __PERF_EVLIST_H 1
3 3
4#include <linux/list.h> 4#include <linux/list.h>
5#include <stdio.h>
5#include "../perf.h" 6#include "../perf.h"
6#include "event.h" 7#include "event.h"
8#include "util.h"
9#include <unistd.h>
7 10
8struct pollfd; 11struct pollfd;
9struct thread_map; 12struct thread_map;
10struct cpu_map; 13struct cpu_map;
14struct perf_record_opts;
11 15
12#define PERF_EVLIST__HLIST_BITS 8 16#define PERF_EVLIST__HLIST_BITS 8
13#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) 17#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
@@ -19,6 +23,10 @@ struct perf_evlist {
19 int nr_fds; 23 int nr_fds;
20 int nr_mmaps; 24 int nr_mmaps;
21 int mmap_len; 25 int mmap_len;
26 struct {
27 int cork_fd;
28 pid_t pid;
29 } workload;
22 bool overwrite; 30 bool overwrite;
23 union perf_event event_copy; 31 union perf_event event_copy;
24 struct perf_mmap *mmap; 32 struct perf_mmap *mmap;
@@ -28,6 +36,11 @@ struct perf_evlist {
28 struct perf_evsel *selected; 36 struct perf_evsel *selected;
29}; 37};
30 38
39struct perf_evsel_str_handler {
40 const char *name;
41 void *handler;
42};
43
31struct perf_evsel; 44struct perf_evsel;
32 45
33struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, 46struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
@@ -39,11 +52,26 @@ void perf_evlist__delete(struct perf_evlist *evlist);
39 52
40void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); 53void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
41int perf_evlist__add_default(struct perf_evlist *evlist); 54int perf_evlist__add_default(struct perf_evlist *evlist);
55int perf_evlist__add_attrs(struct perf_evlist *evlist,
56 struct perf_event_attr *attrs, size_t nr_attrs);
57int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
58 const char *tracepoints[], size_t nr_tracepoints);
59int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
60 const struct perf_evsel_str_handler *assocs,
61 size_t nr_assocs);
62
63#define perf_evlist__add_attrs_array(evlist, array) \
64 perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array))
65
66#define perf_evlist__add_tracepoints_array(evlist, array) \
67 perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array))
68
69#define perf_evlist__set_tracepoints_handlers_array(evlist, array) \
70 perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
42 71
43void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 72void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
44 int cpu, int thread, u64 id); 73 int cpu, int thread, u64 id);
45 74
46int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
47void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); 75void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
48 76
49struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); 77struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
@@ -52,8 +80,16 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
52 80
53int perf_evlist__open(struct perf_evlist *evlist, bool group); 81int perf_evlist__open(struct perf_evlist *evlist, bool group);
54 82
55int perf_evlist__alloc_mmap(struct perf_evlist *evlist); 83void perf_evlist__config_attrs(struct perf_evlist *evlist,
56int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); 84 struct perf_record_opts *opts);
85
86int perf_evlist__prepare_workload(struct perf_evlist *evlist,
87 struct perf_record_opts *opts,
88 const char *argv[]);
89int perf_evlist__start_workload(struct perf_evlist *evlist);
90
91int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
92 bool overwrite);
57void perf_evlist__munmap(struct perf_evlist *evlist); 93void perf_evlist__munmap(struct perf_evlist *evlist);
58 94
59void perf_evlist__disable(struct perf_evlist *evlist); 95void perf_evlist__disable(struct perf_evlist *evlist);
@@ -77,6 +113,7 @@ int perf_evlist__set_filters(struct perf_evlist *evlist);
77 113
78u64 perf_evlist__sample_type(const struct perf_evlist *evlist); 114u64 perf_evlist__sample_type(const struct perf_evlist *evlist);
79bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); 115bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist);
116u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist);
80 117
81bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); 118bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist);
82bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); 119bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d7915d4e77cb..667f3b78bb2c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -63,6 +63,79 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
63 return evsel; 63 return evsel;
64} 64}
65 65
66void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
67{
68 struct perf_event_attr *attr = &evsel->attr;
69 int track = !evsel->idx; /* only the first counter needs these */
70
71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
72 attr->inherit = !opts->no_inherit;
73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
74 PERF_FORMAT_TOTAL_TIME_RUNNING |
75 PERF_FORMAT_ID;
76
77 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
78
79 /*
80 * We default some events to a 1 default interval. But keep
81 * it a weak assumption overridable by the user.
82 */
83 if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
84 opts->user_interval != ULLONG_MAX)) {
85 if (opts->freq) {
86 attr->sample_type |= PERF_SAMPLE_PERIOD;
87 attr->freq = 1;
88 attr->sample_freq = opts->freq;
89 } else {
90 attr->sample_period = opts->default_interval;
91 }
92 }
93
94 if (opts->no_samples)
95 attr->sample_freq = 0;
96
97 if (opts->inherit_stat)
98 attr->inherit_stat = 1;
99
100 if (opts->sample_address) {
101 attr->sample_type |= PERF_SAMPLE_ADDR;
102 attr->mmap_data = track;
103 }
104
105 if (opts->call_graph)
106 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
107
108 if (opts->system_wide)
109 attr->sample_type |= PERF_SAMPLE_CPU;
110
111 if (opts->period)
112 attr->sample_type |= PERF_SAMPLE_PERIOD;
113
114 if (opts->sample_id_all_avail &&
115 (opts->sample_time || opts->system_wide ||
116 !opts->no_inherit || opts->cpu_list))
117 attr->sample_type |= PERF_SAMPLE_TIME;
118
119 if (opts->raw_samples) {
120 attr->sample_type |= PERF_SAMPLE_TIME;
121 attr->sample_type |= PERF_SAMPLE_RAW;
122 attr->sample_type |= PERF_SAMPLE_CPU;
123 }
124
125 if (opts->no_delay) {
126 attr->watermark = 0;
127 attr->wakeup_events = 1;
128 }
129
130 attr->mmap = track;
131 attr->comm = track;
132
133 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) {
134 attr->disabled = 1;
135 attr->enable_on_exec = 1;
136 }
137}
138
66int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 139int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
67{ 140{
68 int cpu, thread; 141 int cpu, thread;
@@ -387,7 +460,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
387 u32 val32[2]; 460 u32 val32[2];
388 } u; 461 } u;
389 462
390 463 memset(data, 0, sizeof(*data));
391 data->cpu = data->pid = data->tid = -1; 464 data->cpu = data->pid = data->tid = -1;
392 data->stream_id = data->id = data->time = -1ULL; 465 data->stream_id = data->id = data->time = -1ULL;
393 466
@@ -504,3 +577,82 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
504 577
505 return 0; 578 return 0;
506} 579}
580
581int perf_event__synthesize_sample(union perf_event *event, u64 type,
582 const struct perf_sample *sample,
583 bool swapped)
584{
585 u64 *array;
586
587 /*
588 * used for cross-endian analysis. See git commit 65014ab3
589 * for why this goofiness is needed.
590 */
591 union {
592 u64 val64;
593 u32 val32[2];
594 } u;
595
596 array = event->sample.array;
597
598 if (type & PERF_SAMPLE_IP) {
599 event->ip.ip = sample->ip;
600 array++;
601 }
602
603 if (type & PERF_SAMPLE_TID) {
604 u.val32[0] = sample->pid;
605 u.val32[1] = sample->tid;
606 if (swapped) {
607 /*
608 * Inverse of what is done in perf_event__parse_sample
609 */
610 u.val32[0] = bswap_32(u.val32[0]);
611 u.val32[1] = bswap_32(u.val32[1]);
612 u.val64 = bswap_64(u.val64);
613 }
614
615 *array = u.val64;
616 array++;
617 }
618
619 if (type & PERF_SAMPLE_TIME) {
620 *array = sample->time;
621 array++;
622 }
623
624 if (type & PERF_SAMPLE_ADDR) {
625 *array = sample->addr;
626 array++;
627 }
628
629 if (type & PERF_SAMPLE_ID) {
630 *array = sample->id;
631 array++;
632 }
633
634 if (type & PERF_SAMPLE_STREAM_ID) {
635 *array = sample->stream_id;
636 array++;
637 }
638
639 if (type & PERF_SAMPLE_CPU) {
640 u.val32[0] = sample->cpu;
641 if (swapped) {
642 /*
643 * Inverse of what is done in perf_event__parse_sample
644 */
645 u.val32[0] = bswap_32(u.val32[0]);
646 u.val64 = bswap_64(u.val64);
647 }
648 *array = u.val64;
649 array++;
650 }
651
652 if (type & PERF_SAMPLE_PERIOD) {
653 *array = sample->period;
654 array++;
655 }
656
657 return 0;
658}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b1d15e6f7ae3..326b8e4d5035 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -61,12 +61,17 @@ struct perf_evsel {
61 off_t id_offset; 61 off_t id_offset;
62 }; 62 };
63 struct cgroup_sel *cgrp; 63 struct cgroup_sel *cgrp;
64 struct {
65 void *func;
66 void *data;
67 } handler;
64 bool supported; 68 bool supported;
65}; 69};
66 70
67struct cpu_map; 71struct cpu_map;
68struct thread_map; 72struct thread_map;
69struct perf_evlist; 73struct perf_evlist;
74struct perf_record_opts;
70 75
71struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); 76struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
72void perf_evsel__init(struct perf_evsel *evsel, 77void perf_evsel__init(struct perf_evsel *evsel,
@@ -74,6 +79,9 @@ void perf_evsel__init(struct perf_evsel *evsel,
74void perf_evsel__exit(struct perf_evsel *evsel); 79void perf_evsel__exit(struct perf_evsel *evsel);
75void perf_evsel__delete(struct perf_evsel *evsel); 80void perf_evsel__delete(struct perf_evsel *evsel);
76 81
82void perf_evsel__config(struct perf_evsel *evsel,
83 struct perf_record_opts *opts);
84
77int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 85int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
78int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); 86int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
79int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); 87int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 33c17a2b2a81..3e7e0b09c12c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,7 @@
8#include <stdlib.h> 8#include <stdlib.h>
9#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/bitops.h>
11#include <sys/utsname.h> 12#include <sys/utsname.h>
12 13
13#include "evlist.h" 14#include "evlist.h"
@@ -28,9 +29,6 @@ static struct perf_trace_event_type *events;
28static u32 header_argc; 29static u32 header_argc;
29static const char **header_argv; 30static const char **header_argv;
30 31
31static int dsos__write_buildid_table(struct perf_header *header, int fd);
32static int perf_session__cache_build_ids(struct perf_session *session);
33
34int perf_header__push_event(u64 id, const char *name) 32int perf_header__push_event(u64 id, const char *name)
35{ 33{
36 if (strlen(name) > MAX_EVENT_NAME) 34 if (strlen(name) > MAX_EVENT_NAME)
@@ -187,6 +185,252 @@ perf_header__set_cmdline(int argc, const char **argv)
187 return 0; 185 return 0;
188} 186}
189 187
188#define dsos__for_each_with_build_id(pos, head) \
189 list_for_each_entry(pos, head, node) \
190 if (!pos->has_build_id) \
191 continue; \
192 else
193
194static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
195 u16 misc, int fd)
196{
197 struct dso *pos;
198
199 dsos__for_each_with_build_id(pos, head) {
200 int err;
201 struct build_id_event b;
202 size_t len;
203
204 if (!pos->hit)
205 continue;
206 len = pos->long_name_len + 1;
207 len = ALIGN(len, NAME_ALIGN);
208 memset(&b, 0, sizeof(b));
209 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
210 b.pid = pid;
211 b.header.misc = misc;
212 b.header.size = sizeof(b) + len;
213 err = do_write(fd, &b, sizeof(b));
214 if (err < 0)
215 return err;
216 err = write_padded(fd, pos->long_name,
217 pos->long_name_len + 1, len);
218 if (err < 0)
219 return err;
220 }
221
222 return 0;
223}
224
225static int machine__write_buildid_table(struct machine *machine, int fd)
226{
227 int err;
228 u16 kmisc = PERF_RECORD_MISC_KERNEL,
229 umisc = PERF_RECORD_MISC_USER;
230
231 if (!machine__is_host(machine)) {
232 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
233 umisc = PERF_RECORD_MISC_GUEST_USER;
234 }
235
236 err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
237 kmisc, fd);
238 if (err == 0)
239 err = __dsos__write_buildid_table(&machine->user_dsos,
240 machine->pid, umisc, fd);
241 return err;
242}
243
244static int dsos__write_buildid_table(struct perf_header *header, int fd)
245{
246 struct perf_session *session = container_of(header,
247 struct perf_session, header);
248 struct rb_node *nd;
249 int err = machine__write_buildid_table(&session->host_machine, fd);
250
251 if (err)
252 return err;
253
254 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
255 struct machine *pos = rb_entry(nd, struct machine, rb_node);
256 err = machine__write_buildid_table(pos, fd);
257 if (err)
258 break;
259 }
260 return err;
261}
262
263int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
264 const char *name, bool is_kallsyms)
265{
266 const size_t size = PATH_MAX;
267 char *realname, *filename = zalloc(size),
268 *linkname = zalloc(size), *targetname;
269 int len, err = -1;
270
271 if (is_kallsyms) {
272 if (symbol_conf.kptr_restrict) {
273 pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
274 return 0;
275 }
276 realname = (char *)name;
277 } else
278 realname = realpath(name, NULL);
279
280 if (realname == NULL || filename == NULL || linkname == NULL)
281 goto out_free;
282
283 len = snprintf(filename, size, "%s%s%s",
284 debugdir, is_kallsyms ? "/" : "", realname);
285 if (mkdir_p(filename, 0755))
286 goto out_free;
287
288 snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
289
290 if (access(filename, F_OK)) {
291 if (is_kallsyms) {
292 if (copyfile("/proc/kallsyms", filename))
293 goto out_free;
294 } else if (link(realname, filename) && copyfile(name, filename))
295 goto out_free;
296 }
297
298 len = snprintf(linkname, size, "%s/.build-id/%.2s",
299 debugdir, sbuild_id);
300
301 if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
302 goto out_free;
303
304 snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
305 targetname = filename + strlen(debugdir) - 5;
306 memcpy(targetname, "../..", 5);
307
308 if (symlink(targetname, linkname) == 0)
309 err = 0;
310out_free:
311 if (!is_kallsyms)
312 free(realname);
313 free(filename);
314 free(linkname);
315 return err;
316}
317
318static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
319 const char *name, const char *debugdir,
320 bool is_kallsyms)
321{
322 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
323
324 build_id__sprintf(build_id, build_id_size, sbuild_id);
325
326 return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
327}
328
329int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
330{
331 const size_t size = PATH_MAX;
332 char *filename = zalloc(size),
333 *linkname = zalloc(size);
334 int err = -1;
335
336 if (filename == NULL || linkname == NULL)
337 goto out_free;
338
339 snprintf(linkname, size, "%s/.build-id/%.2s/%s",
340 debugdir, sbuild_id, sbuild_id + 2);
341
342 if (access(linkname, F_OK))
343 goto out_free;
344
345 if (readlink(linkname, filename, size - 1) < 0)
346 goto out_free;
347
348 if (unlink(linkname))
349 goto out_free;
350
351 /*
352 * Since the link is relative, we must make it absolute:
353 */
354 snprintf(linkname, size, "%s/.build-id/%.2s/%s",
355 debugdir, sbuild_id, filename);
356
357 if (unlink(linkname))
358 goto out_free;
359
360 err = 0;
361out_free:
362 free(filename);
363 free(linkname);
364 return err;
365}
366
367static int dso__cache_build_id(struct dso *dso, const char *debugdir)
368{
369 bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
370
371 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
372 dso->long_name, debugdir, is_kallsyms);
373}
374
375static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
376{
377 struct dso *pos;
378 int err = 0;
379
380 dsos__for_each_with_build_id(pos, head)
381 if (dso__cache_build_id(pos, debugdir))
382 err = -1;
383
384 return err;
385}
386
387static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
388{
389 int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
390 ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
391 return ret;
392}
393
394static int perf_session__cache_build_ids(struct perf_session *session)
395{
396 struct rb_node *nd;
397 int ret;
398 char debugdir[PATH_MAX];
399
400 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
401
402 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
403 return -1;
404
405 ret = machine__cache_build_ids(&session->host_machine, debugdir);
406
407 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
408 struct machine *pos = rb_entry(nd, struct machine, rb_node);
409 ret |= machine__cache_build_ids(pos, debugdir);
410 }
411 return ret ? -1 : 0;
412}
413
414static bool machine__read_build_ids(struct machine *machine, bool with_hits)
415{
416 bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
417 ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
418 return ret;
419}
420
421static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
422{
423 struct rb_node *nd;
424 bool ret = machine__read_build_ids(&session->host_machine, with_hits);
425
426 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
427 struct machine *pos = rb_entry(nd, struct machine, rb_node);
428 ret |= machine__read_build_ids(pos, with_hits);
429 }
430
431 return ret;
432}
433
190static int write_trace_info(int fd, struct perf_header *h __used, 434static int write_trace_info(int fd, struct perf_header *h __used,
191 struct perf_evlist *evlist) 435 struct perf_evlist *evlist)
192{ 436{
@@ -202,6 +446,9 @@ static int write_build_id(int fd, struct perf_header *h,
202 446
203 session = container_of(h, struct perf_session, header); 447 session = container_of(h, struct perf_session, header);
204 448
449 if (!perf_session__read_build_ids(session, true))
450 return -1;
451
205 err = dsos__write_buildid_table(h, fd); 452 err = dsos__write_buildid_table(h, fd);
206 if (err < 0) { 453 if (err < 0) {
207 pr_debug("failed to write buildid table\n"); 454 pr_debug("failed to write buildid table\n");
@@ -1065,26 +1312,30 @@ struct feature_ops {
1065 bool full_only; 1312 bool full_only;
1066}; 1313};
1067 1314
1068#define FEAT_OPA(n, w, p) \ 1315#define FEAT_OPA(n, func) \
1069 [n] = { .name = #n, .write = w, .print = p } 1316 [n] = { .name = #n, .write = write_##func, .print = print_##func }
1070#define FEAT_OPF(n, w, p) \ 1317#define FEAT_OPF(n, func) \
1071 [n] = { .name = #n, .write = w, .print = p, .full_only = true } 1318 [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true }
1319
1320/* feature_ops not implemented: */
1321#define print_trace_info NULL
1322#define print_build_id NULL
1072 1323
1073static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { 1324static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
1074 FEAT_OPA(HEADER_TRACE_INFO, write_trace_info, NULL), 1325 FEAT_OPA(HEADER_TRACE_INFO, trace_info),
1075 FEAT_OPA(HEADER_BUILD_ID, write_build_id, NULL), 1326 FEAT_OPA(HEADER_BUILD_ID, build_id),
1076 FEAT_OPA(HEADER_HOSTNAME, write_hostname, print_hostname), 1327 FEAT_OPA(HEADER_HOSTNAME, hostname),
1077 FEAT_OPA(HEADER_OSRELEASE, write_osrelease, print_osrelease), 1328 FEAT_OPA(HEADER_OSRELEASE, osrelease),
1078 FEAT_OPA(HEADER_VERSION, write_version, print_version), 1329 FEAT_OPA(HEADER_VERSION, version),
1079 FEAT_OPA(HEADER_ARCH, write_arch, print_arch), 1330 FEAT_OPA(HEADER_ARCH, arch),
1080 FEAT_OPA(HEADER_NRCPUS, write_nrcpus, print_nrcpus), 1331 FEAT_OPA(HEADER_NRCPUS, nrcpus),
1081 FEAT_OPA(HEADER_CPUDESC, write_cpudesc, print_cpudesc), 1332 FEAT_OPA(HEADER_CPUDESC, cpudesc),
1082 FEAT_OPA(HEADER_CPUID, write_cpuid, print_cpuid), 1333 FEAT_OPA(HEADER_CPUID, cpuid),
1083 FEAT_OPA(HEADER_TOTAL_MEM, write_total_mem, print_total_mem), 1334 FEAT_OPA(HEADER_TOTAL_MEM, total_mem),
1084 FEAT_OPA(HEADER_EVENT_DESC, write_event_desc, print_event_desc), 1335 FEAT_OPA(HEADER_EVENT_DESC, event_desc),
1085 FEAT_OPA(HEADER_CMDLINE, write_cmdline, print_cmdline), 1336 FEAT_OPA(HEADER_CMDLINE, cmdline),
1086 FEAT_OPF(HEADER_CPU_TOPOLOGY, write_cpu_topology, print_cpu_topology), 1337 FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
1087 FEAT_OPF(HEADER_NUMA_TOPOLOGY, write_numa_topology, print_numa_topology), 1338 FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
1088}; 1339};
1089 1340
1090struct header_print_data { 1341struct header_print_data {
@@ -1103,9 +1354,9 @@ static int perf_file_section__fprintf_info(struct perf_file_section *section,
1103 "%d, continuing...\n", section->offset, feat); 1354 "%d, continuing...\n", section->offset, feat);
1104 return 0; 1355 return 0;
1105 } 1356 }
1106 if (feat < HEADER_TRACE_INFO || feat >= HEADER_LAST_FEATURE) { 1357 if (feat >= HEADER_LAST_FEATURE) {
1107 pr_warning("unknown feature %d\n", feat); 1358 pr_warning("unknown feature %d\n", feat);
1108 return -1; 1359 return 0;
1109 } 1360 }
1110 if (!feat_ops[feat].print) 1361 if (!feat_ops[feat].print)
1111 return 0; 1362 return 0;
@@ -1132,252 +1383,6 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
1132 return 0; 1383 return 0;
1133} 1384}
1134 1385
1135#define dsos__for_each_with_build_id(pos, head) \
1136 list_for_each_entry(pos, head, node) \
1137 if (!pos->has_build_id) \
1138 continue; \
1139 else
1140
1141static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
1142 u16 misc, int fd)
1143{
1144 struct dso *pos;
1145
1146 dsos__for_each_with_build_id(pos, head) {
1147 int err;
1148 struct build_id_event b;
1149 size_t len;
1150
1151 if (!pos->hit)
1152 continue;
1153 len = pos->long_name_len + 1;
1154 len = ALIGN(len, NAME_ALIGN);
1155 memset(&b, 0, sizeof(b));
1156 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
1157 b.pid = pid;
1158 b.header.misc = misc;
1159 b.header.size = sizeof(b) + len;
1160 err = do_write(fd, &b, sizeof(b));
1161 if (err < 0)
1162 return err;
1163 err = write_padded(fd, pos->long_name,
1164 pos->long_name_len + 1, len);
1165 if (err < 0)
1166 return err;
1167 }
1168
1169 return 0;
1170}
1171
1172static int machine__write_buildid_table(struct machine *machine, int fd)
1173{
1174 int err;
1175 u16 kmisc = PERF_RECORD_MISC_KERNEL,
1176 umisc = PERF_RECORD_MISC_USER;
1177
1178 if (!machine__is_host(machine)) {
1179 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
1180 umisc = PERF_RECORD_MISC_GUEST_USER;
1181 }
1182
1183 err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
1184 kmisc, fd);
1185 if (err == 0)
1186 err = __dsos__write_buildid_table(&machine->user_dsos,
1187 machine->pid, umisc, fd);
1188 return err;
1189}
1190
1191static int dsos__write_buildid_table(struct perf_header *header, int fd)
1192{
1193 struct perf_session *session = container_of(header,
1194 struct perf_session, header);
1195 struct rb_node *nd;
1196 int err = machine__write_buildid_table(&session->host_machine, fd);
1197
1198 if (err)
1199 return err;
1200
1201 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
1202 struct machine *pos = rb_entry(nd, struct machine, rb_node);
1203 err = machine__write_buildid_table(pos, fd);
1204 if (err)
1205 break;
1206 }
1207 return err;
1208}
1209
1210int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
1211 const char *name, bool is_kallsyms)
1212{
1213 const size_t size = PATH_MAX;
1214 char *realname, *filename = zalloc(size),
1215 *linkname = zalloc(size), *targetname;
1216 int len, err = -1;
1217
1218 if (is_kallsyms) {
1219 if (symbol_conf.kptr_restrict) {
1220 pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
1221 return 0;
1222 }
1223 realname = (char *)name;
1224 } else
1225 realname = realpath(name, NULL);
1226
1227 if (realname == NULL || filename == NULL || linkname == NULL)
1228 goto out_free;
1229
1230 len = snprintf(filename, size, "%s%s%s",
1231 debugdir, is_kallsyms ? "/" : "", realname);
1232 if (mkdir_p(filename, 0755))
1233 goto out_free;
1234
1235 snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
1236
1237 if (access(filename, F_OK)) {
1238 if (is_kallsyms) {
1239 if (copyfile("/proc/kallsyms", filename))
1240 goto out_free;
1241 } else if (link(realname, filename) && copyfile(name, filename))
1242 goto out_free;
1243 }
1244
1245 len = snprintf(linkname, size, "%s/.build-id/%.2s",
1246 debugdir, sbuild_id);
1247
1248 if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
1249 goto out_free;
1250
1251 snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
1252 targetname = filename + strlen(debugdir) - 5;
1253 memcpy(targetname, "../..", 5);
1254
1255 if (symlink(targetname, linkname) == 0)
1256 err = 0;
1257out_free:
1258 if (!is_kallsyms)
1259 free(realname);
1260 free(filename);
1261 free(linkname);
1262 return err;
1263}
1264
1265static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
1266 const char *name, const char *debugdir,
1267 bool is_kallsyms)
1268{
1269 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1270
1271 build_id__sprintf(build_id, build_id_size, sbuild_id);
1272
1273 return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
1274}
1275
1276int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
1277{
1278 const size_t size = PATH_MAX;
1279 char *filename = zalloc(size),
1280 *linkname = zalloc(size);
1281 int err = -1;
1282
1283 if (filename == NULL || linkname == NULL)
1284 goto out_free;
1285
1286 snprintf(linkname, size, "%s/.build-id/%.2s/%s",
1287 debugdir, sbuild_id, sbuild_id + 2);
1288
1289 if (access(linkname, F_OK))
1290 goto out_free;
1291
1292 if (readlink(linkname, filename, size - 1) < 0)
1293 goto out_free;
1294
1295 if (unlink(linkname))
1296 goto out_free;
1297
1298 /*
1299 * Since the link is relative, we must make it absolute:
1300 */
1301 snprintf(linkname, size, "%s/.build-id/%.2s/%s",
1302 debugdir, sbuild_id, filename);
1303
1304 if (unlink(linkname))
1305 goto out_free;
1306
1307 err = 0;
1308out_free:
1309 free(filename);
1310 free(linkname);
1311 return err;
1312}
1313
1314static int dso__cache_build_id(struct dso *dso, const char *debugdir)
1315{
1316 bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
1317
1318 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
1319 dso->long_name, debugdir, is_kallsyms);
1320}
1321
1322static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
1323{
1324 struct dso *pos;
1325 int err = 0;
1326
1327 dsos__for_each_with_build_id(pos, head)
1328 if (dso__cache_build_id(pos, debugdir))
1329 err = -1;
1330
1331 return err;
1332}
1333
1334static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
1335{
1336 int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
1337 ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
1338 return ret;
1339}
1340
1341static int perf_session__cache_build_ids(struct perf_session *session)
1342{
1343 struct rb_node *nd;
1344 int ret;
1345 char debugdir[PATH_MAX];
1346
1347 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
1348
1349 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
1350 return -1;
1351
1352 ret = machine__cache_build_ids(&session->host_machine, debugdir);
1353
1354 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
1355 struct machine *pos = rb_entry(nd, struct machine, rb_node);
1356 ret |= machine__cache_build_ids(pos, debugdir);
1357 }
1358 return ret ? -1 : 0;
1359}
1360
1361static bool machine__read_build_ids(struct machine *machine, bool with_hits)
1362{
1363 bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
1364 ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
1365 return ret;
1366}
1367
1368static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
1369{
1370 struct rb_node *nd;
1371 bool ret = machine__read_build_ids(&session->host_machine, with_hits);
1372
1373 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
1374 struct machine *pos = rb_entry(nd, struct machine, rb_node);
1375 ret |= machine__read_build_ids(pos, with_hits);
1376 }
1377
1378 return ret;
1379}
1380
1381static int do_write_feat(int fd, struct perf_header *h, int type, 1386static int do_write_feat(int fd, struct perf_header *h, int type,
1382 struct perf_file_section **p, 1387 struct perf_file_section **p,
1383 struct perf_evlist *evlist) 1388 struct perf_evlist *evlist)
@@ -1386,6 +1391,8 @@ static int do_write_feat(int fd, struct perf_header *h, int type,
1386 int ret = 0; 1391 int ret = 0;
1387 1392
1388 if (perf_header__has_feat(h, type)) { 1393 if (perf_header__has_feat(h, type)) {
1394 if (!feat_ops[type].write)
1395 return -1;
1389 1396
1390 (*p)->offset = lseek(fd, 0, SEEK_CUR); 1397 (*p)->offset = lseek(fd, 0, SEEK_CUR);
1391 1398
@@ -1408,18 +1415,12 @@ static int perf_header__adds_write(struct perf_header *header,
1408 struct perf_evlist *evlist, int fd) 1415 struct perf_evlist *evlist, int fd)
1409{ 1416{
1410 int nr_sections; 1417 int nr_sections;
1411 struct perf_session *session;
1412 struct perf_file_section *feat_sec, *p; 1418 struct perf_file_section *feat_sec, *p;
1413 int sec_size; 1419 int sec_size;
1414 u64 sec_start; 1420 u64 sec_start;
1421 int feat;
1415 int err; 1422 int err;
1416 1423
1417 session = container_of(header, struct perf_session, header);
1418
1419 if (perf_header__has_feat(header, HEADER_BUILD_ID &&
1420 !perf_session__read_build_ids(session, true)))
1421 perf_header__clear_feat(header, HEADER_BUILD_ID);
1422
1423 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); 1424 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
1424 if (!nr_sections) 1425 if (!nr_sections)
1425 return 0; 1426 return 0;
@@ -1433,64 +1434,11 @@ static int perf_header__adds_write(struct perf_header *header,
1433 sec_start = header->data_offset + header->data_size; 1434 sec_start = header->data_offset + header->data_size;
1434 lseek(fd, sec_start + sec_size, SEEK_SET); 1435 lseek(fd, sec_start + sec_size, SEEK_SET);
1435 1436
1436 err = do_write_feat(fd, header, HEADER_TRACE_INFO, &p, evlist); 1437 for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
1437 if (err) 1438 if (do_write_feat(fd, header, feat, &p, evlist))
1438 goto out_free; 1439 perf_header__clear_feat(header, feat);
1439
1440 err = do_write_feat(fd, header, HEADER_BUILD_ID, &p, evlist);
1441 if (err) {
1442 perf_header__clear_feat(header, HEADER_BUILD_ID);
1443 goto out_free;
1444 } 1440 }
1445 1441
1446 err = do_write_feat(fd, header, HEADER_HOSTNAME, &p, evlist);
1447 if (err)
1448 perf_header__clear_feat(header, HEADER_HOSTNAME);
1449
1450 err = do_write_feat(fd, header, HEADER_OSRELEASE, &p, evlist);
1451 if (err)
1452 perf_header__clear_feat(header, HEADER_OSRELEASE);
1453
1454 err = do_write_feat(fd, header, HEADER_VERSION, &p, evlist);
1455 if (err)
1456 perf_header__clear_feat(header, HEADER_VERSION);
1457
1458 err = do_write_feat(fd, header, HEADER_ARCH, &p, evlist);
1459 if (err)
1460 perf_header__clear_feat(header, HEADER_ARCH);
1461
1462 err = do_write_feat(fd, header, HEADER_NRCPUS, &p, evlist);
1463 if (err)
1464 perf_header__clear_feat(header, HEADER_NRCPUS);
1465
1466 err = do_write_feat(fd, header, HEADER_CPUDESC, &p, evlist);
1467 if (err)
1468 perf_header__clear_feat(header, HEADER_CPUDESC);
1469
1470 err = do_write_feat(fd, header, HEADER_CPUID, &p, evlist);
1471 if (err)
1472 perf_header__clear_feat(header, HEADER_CPUID);
1473
1474 err = do_write_feat(fd, header, HEADER_TOTAL_MEM, &p, evlist);
1475 if (err)
1476 perf_header__clear_feat(header, HEADER_TOTAL_MEM);
1477
1478 err = do_write_feat(fd, header, HEADER_CMDLINE, &p, evlist);
1479 if (err)
1480 perf_header__clear_feat(header, HEADER_CMDLINE);
1481
1482 err = do_write_feat(fd, header, HEADER_EVENT_DESC, &p, evlist);
1483 if (err)
1484 perf_header__clear_feat(header, HEADER_EVENT_DESC);
1485
1486 err = do_write_feat(fd, header, HEADER_CPU_TOPOLOGY, &p, evlist);
1487 if (err)
1488 perf_header__clear_feat(header, HEADER_CPU_TOPOLOGY);
1489
1490 err = do_write_feat(fd, header, HEADER_NUMA_TOPOLOGY, &p, evlist);
1491 if (err)
1492 perf_header__clear_feat(header, HEADER_NUMA_TOPOLOGY);
1493
1494 lseek(fd, sec_start, SEEK_SET); 1442 lseek(fd, sec_start, SEEK_SET);
1495 /* 1443 /*
1496 * may write more than needed due to dropped feature, but 1444 * may write more than needed due to dropped feature, but
@@ -1499,7 +1447,6 @@ static int perf_header__adds_write(struct perf_header *header,
1499 err = do_write(fd, feat_sec, sec_size); 1447 err = do_write(fd, feat_sec, sec_size);
1500 if (err < 0) 1448 if (err < 0)
1501 pr_debug("failed to write feature section\n"); 1449 pr_debug("failed to write feature section\n");
1502out_free:
1503 free(feat_sec); 1450 free(feat_sec);
1504 return err; 1451 return err;
1505} 1452}
@@ -1637,20 +1584,20 @@ static int perf_header__getbuffer64(struct perf_header *header,
1637int perf_header__process_sections(struct perf_header *header, int fd, 1584int perf_header__process_sections(struct perf_header *header, int fd,
1638 void *data, 1585 void *data,
1639 int (*process)(struct perf_file_section *section, 1586 int (*process)(struct perf_file_section *section,
1640 struct perf_header *ph, 1587 struct perf_header *ph,
1641 int feat, int fd, void *data)) 1588 int feat, int fd, void *data))
1642{ 1589{
1643 struct perf_file_section *feat_sec; 1590 struct perf_file_section *feat_sec, *sec;
1644 int nr_sections; 1591 int nr_sections;
1645 int sec_size; 1592 int sec_size;
1646 int idx = 0; 1593 int feat;
1647 int err = -1, feat = 1; 1594 int err;
1648 1595
1649 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); 1596 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
1650 if (!nr_sections) 1597 if (!nr_sections)
1651 return 0; 1598 return 0;
1652 1599
1653 feat_sec = calloc(sizeof(*feat_sec), nr_sections); 1600 feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections);
1654 if (!feat_sec) 1601 if (!feat_sec)
1655 return -1; 1602 return -1;
1656 1603
@@ -1658,20 +1605,16 @@ int perf_header__process_sections(struct perf_header *header, int fd,
1658 1605
1659 lseek(fd, header->data_offset + header->data_size, SEEK_SET); 1606 lseek(fd, header->data_offset + header->data_size, SEEK_SET);
1660 1607
1661 if (perf_header__getbuffer64(header, fd, feat_sec, sec_size)) 1608 err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
1609 if (err < 0)
1662 goto out_free; 1610 goto out_free;
1663 1611
1664 err = 0; 1612 for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
1665 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { 1613 err = process(sec++, header, feat, fd, data);
1666 if (perf_header__has_feat(header, feat)) { 1614 if (err < 0)
1667 struct perf_file_section *sec = &feat_sec[idx++]; 1615 goto out_free;
1668
1669 err = process(sec, header, feat, fd, data);
1670 if (err < 0)
1671 break;
1672 }
1673 ++feat;
1674 } 1616 }
1617 err = 0;
1675out_free: 1618out_free:
1676 free(feat_sec); 1619 free(feat_sec);
1677 return err; 1620 return err;
@@ -1906,32 +1849,21 @@ static int perf_file_section__process(struct perf_file_section *section,
1906 return 0; 1849 return 0;
1907 } 1850 }
1908 1851
1852 if (feat >= HEADER_LAST_FEATURE) {
1853 pr_debug("unknown feature %d, continuing...\n", feat);
1854 return 0;
1855 }
1856
1909 switch (feat) { 1857 switch (feat) {
1910 case HEADER_TRACE_INFO: 1858 case HEADER_TRACE_INFO:
1911 trace_report(fd, false); 1859 trace_report(fd, false);
1912 break; 1860 break;
1913
1914 case HEADER_BUILD_ID: 1861 case HEADER_BUILD_ID:
1915 if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) 1862 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
1916 pr_debug("Failed to read buildids, continuing...\n"); 1863 pr_debug("Failed to read buildids, continuing...\n");
1917 break; 1864 break;
1918
1919 case HEADER_HOSTNAME:
1920 case HEADER_OSRELEASE:
1921 case HEADER_VERSION:
1922 case HEADER_ARCH:
1923 case HEADER_NRCPUS:
1924 case HEADER_CPUDESC:
1925 case HEADER_CPUID:
1926 case HEADER_TOTAL_MEM:
1927 case HEADER_CMDLINE:
1928 case HEADER_EVENT_DESC:
1929 case HEADER_CPU_TOPOLOGY:
1930 case HEADER_NUMA_TOPOLOGY:
1931 break;
1932
1933 default: 1865 default:
1934 pr_debug("unknown feature %d, continuing...\n", feat); 1866 break;
1935 } 1867 }
1936 1868
1937 return 0; 1869 return 0;
@@ -2041,6 +1973,8 @@ int perf_session__read_header(struct perf_session *session, int fd)
2041 lseek(fd, tmp, SEEK_SET); 1973 lseek(fd, tmp, SEEK_SET);
2042 } 1974 }
2043 1975
1976 symbol_conf.nr_events = nr_attrs;
1977
2044 if (f_header.event_types.size) { 1978 if (f_header.event_types.size) {
2045 lseek(fd, f_header.event_types.offset, SEEK_SET); 1979 lseek(fd, f_header.event_types.offset, SEEK_SET);
2046 events = malloc(f_header.event_types.size); 1980 events = malloc(f_header.event_types.size);
@@ -2068,9 +2002,9 @@ out_delete_evlist:
2068 return -ENOMEM; 2002 return -ENOMEM;
2069} 2003}
2070 2004
2071int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 2005int perf_event__synthesize_attr(struct perf_tool *tool,
2072 perf_event__handler_t process, 2006 struct perf_event_attr *attr, u16 ids, u64 *id,
2073 struct perf_session *session) 2007 perf_event__handler_t process)
2074{ 2008{
2075 union perf_event *ev; 2009 union perf_event *ev;
2076 size_t size; 2010 size_t size;
@@ -2092,22 +2026,23 @@ int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
2092 ev->attr.header.type = PERF_RECORD_HEADER_ATTR; 2026 ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
2093 ev->attr.header.size = size; 2027 ev->attr.header.size = size;
2094 2028
2095 err = process(ev, NULL, session); 2029 err = process(tool, ev, NULL, NULL);
2096 2030
2097 free(ev); 2031 free(ev);
2098 2032
2099 return err; 2033 return err;
2100} 2034}
2101 2035
2102int perf_session__synthesize_attrs(struct perf_session *session, 2036int perf_event__synthesize_attrs(struct perf_tool *tool,
2037 struct perf_session *session,
2103 perf_event__handler_t process) 2038 perf_event__handler_t process)
2104{ 2039{
2105 struct perf_evsel *attr; 2040 struct perf_evsel *attr;
2106 int err = 0; 2041 int err = 0;
2107 2042
2108 list_for_each_entry(attr, &session->evlist->entries, node) { 2043 list_for_each_entry(attr, &session->evlist->entries, node) {
2109 err = perf_event__synthesize_attr(&attr->attr, attr->ids, 2044 err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids,
2110 attr->id, process, session); 2045 attr->id, process);
2111 if (err) { 2046 if (err) {
2112 pr_debug("failed to create perf header attribute\n"); 2047 pr_debug("failed to create perf header attribute\n");
2113 return err; 2048 return err;
@@ -2118,23 +2053,23 @@ int perf_session__synthesize_attrs(struct perf_session *session,
2118} 2053}
2119 2054
2120int perf_event__process_attr(union perf_event *event, 2055int perf_event__process_attr(union perf_event *event,
2121 struct perf_session *session) 2056 struct perf_evlist **pevlist)
2122{ 2057{
2123 unsigned int i, ids, n_ids; 2058 unsigned int i, ids, n_ids;
2124 struct perf_evsel *evsel; 2059 struct perf_evsel *evsel;
2060 struct perf_evlist *evlist = *pevlist;
2125 2061
2126 if (session->evlist == NULL) { 2062 if (evlist == NULL) {
2127 session->evlist = perf_evlist__new(NULL, NULL); 2063 *pevlist = evlist = perf_evlist__new(NULL, NULL);
2128 if (session->evlist == NULL) 2064 if (evlist == NULL)
2129 return -ENOMEM; 2065 return -ENOMEM;
2130 } 2066 }
2131 2067
2132 evsel = perf_evsel__new(&event->attr.attr, 2068 evsel = perf_evsel__new(&event->attr.attr, evlist->nr_entries);
2133 session->evlist->nr_entries);
2134 if (evsel == NULL) 2069 if (evsel == NULL)
2135 return -ENOMEM; 2070 return -ENOMEM;
2136 2071
2137 perf_evlist__add(session->evlist, evsel); 2072 perf_evlist__add(evlist, evsel);
2138 2073
2139 ids = event->header.size; 2074 ids = event->header.size;
2140 ids -= (void *)&event->attr.id - (void *)event; 2075 ids -= (void *)&event->attr.id - (void *)event;
@@ -2148,18 +2083,16 @@ int perf_event__process_attr(union perf_event *event,
2148 return -ENOMEM; 2083 return -ENOMEM;
2149 2084
2150 for (i = 0; i < n_ids; i++) { 2085 for (i = 0; i < n_ids; i++) {
2151 perf_evlist__id_add(session->evlist, evsel, 0, i, 2086 perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
2152 event->attr.id[i]);
2153 } 2087 }
2154 2088
2155 perf_session__update_sample_type(session);
2156
2157 return 0; 2089 return 0;
2158} 2090}
2159 2091
2160int perf_event__synthesize_event_type(u64 event_id, char *name, 2092int perf_event__synthesize_event_type(struct perf_tool *tool,
2093 u64 event_id, char *name,
2161 perf_event__handler_t process, 2094 perf_event__handler_t process,
2162 struct perf_session *session) 2095 struct machine *machine)
2163{ 2096{
2164 union perf_event ev; 2097 union perf_event ev;
2165 size_t size = 0; 2098 size_t size = 0;
@@ -2177,13 +2110,14 @@ int perf_event__synthesize_event_type(u64 event_id, char *name,
2177 ev.event_type.header.size = sizeof(ev.event_type) - 2110 ev.event_type.header.size = sizeof(ev.event_type) -
2178 (sizeof(ev.event_type.event_type.name) - size); 2111 (sizeof(ev.event_type.event_type.name) - size);
2179 2112
2180 err = process(&ev, NULL, session); 2113 err = process(tool, &ev, NULL, machine);
2181 2114
2182 return err; 2115 return err;
2183} 2116}
2184 2117
2185int perf_event__synthesize_event_types(perf_event__handler_t process, 2118int perf_event__synthesize_event_types(struct perf_tool *tool,
2186 struct perf_session *session) 2119 perf_event__handler_t process,
2120 struct machine *machine)
2187{ 2121{
2188 struct perf_trace_event_type *type; 2122 struct perf_trace_event_type *type;
2189 int i, err = 0; 2123 int i, err = 0;
@@ -2191,9 +2125,9 @@ int perf_event__synthesize_event_types(perf_event__handler_t process,
2191 for (i = 0; i < event_count; i++) { 2125 for (i = 0; i < event_count; i++) {
2192 type = &events[i]; 2126 type = &events[i];
2193 2127
2194 err = perf_event__synthesize_event_type(type->event_id, 2128 err = perf_event__synthesize_event_type(tool, type->event_id,
2195 type->name, process, 2129 type->name, process,
2196 session); 2130 machine);
2197 if (err) { 2131 if (err) {
2198 pr_debug("failed to create perf header event type\n"); 2132 pr_debug("failed to create perf header event type\n");
2199 return err; 2133 return err;
@@ -2203,8 +2137,8 @@ int perf_event__synthesize_event_types(perf_event__handler_t process,
2203 return err; 2137 return err;
2204} 2138}
2205 2139
2206int perf_event__process_event_type(union perf_event *event, 2140int perf_event__process_event_type(struct perf_tool *tool __unused,
2207 struct perf_session *session __unused) 2141 union perf_event *event)
2208{ 2142{
2209 if (perf_header__push_event(event->event_type.event_type.event_id, 2143 if (perf_header__push_event(event->event_type.event_type.event_id,
2210 event->event_type.event_type.name) < 0) 2144 event->event_type.event_type.name) < 0)
@@ -2213,9 +2147,9 @@ int perf_event__process_event_type(union perf_event *event,
2213 return 0; 2147 return 0;
2214} 2148}
2215 2149
2216int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, 2150int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
2217 perf_event__handler_t process, 2151 struct perf_evlist *evlist,
2218 struct perf_session *session __unused) 2152 perf_event__handler_t process)
2219{ 2153{
2220 union perf_event ev; 2154 union perf_event ev;
2221 struct tracing_data *tdata; 2155 struct tracing_data *tdata;
@@ -2246,7 +2180,7 @@ int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
2246 ev.tracing_data.header.size = sizeof(ev.tracing_data); 2180 ev.tracing_data.header.size = sizeof(ev.tracing_data);
2247 ev.tracing_data.size = aligned_size; 2181 ev.tracing_data.size = aligned_size;
2248 2182
2249 process(&ev, NULL, session); 2183 process(tool, &ev, NULL, NULL);
2250 2184
2251 /* 2185 /*
2252 * The put function will copy all the tracing data 2186 * The put function will copy all the tracing data
@@ -2288,10 +2222,10 @@ int perf_event__process_tracing_data(union perf_event *event,
2288 return size_read + padding; 2222 return size_read + padding;
2289} 2223}
2290 2224
2291int perf_event__synthesize_build_id(struct dso *pos, u16 misc, 2225int perf_event__synthesize_build_id(struct perf_tool *tool,
2226 struct dso *pos, u16 misc,
2292 perf_event__handler_t process, 2227 perf_event__handler_t process,
2293 struct machine *machine, 2228 struct machine *machine)
2294 struct perf_session *session)
2295{ 2229{
2296 union perf_event ev; 2230 union perf_event ev;
2297 size_t len; 2231 size_t len;
@@ -2311,12 +2245,13 @@ int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
2311 ev.build_id.header.size = sizeof(ev.build_id) + len; 2245 ev.build_id.header.size = sizeof(ev.build_id) + len;
2312 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); 2246 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
2313 2247
2314 err = process(&ev, NULL, session); 2248 err = process(tool, &ev, NULL, machine);
2315 2249
2316 return err; 2250 return err;
2317} 2251}
2318 2252
2319int perf_event__process_build_id(union perf_event *event, 2253int perf_event__process_build_id(struct perf_tool *tool __used,
2254 union perf_event *event,
2320 struct perf_session *session) 2255 struct perf_session *session)
2321{ 2256{
2322 __event_process_build_id(&event->build_id, 2257 __event_process_build_id(&event->build_id,
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3d5a742f4a2a..ac4ec956024e 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -10,7 +10,8 @@
10#include <linux/bitmap.h> 10#include <linux/bitmap.h>
11 11
12enum { 12enum {
13 HEADER_TRACE_INFO = 1, 13 HEADER_RESERVED = 0, /* always cleared */
14 HEADER_TRACE_INFO = 1,
14 HEADER_BUILD_ID, 15 HEADER_BUILD_ID,
15 16
16 HEADER_HOSTNAME, 17 HEADER_HOSTNAME,
@@ -27,10 +28,9 @@ enum {
27 HEADER_NUMA_TOPOLOGY, 28 HEADER_NUMA_TOPOLOGY,
28 29
29 HEADER_LAST_FEATURE, 30 HEADER_LAST_FEATURE,
31 HEADER_FEAT_BITS = 256,
30}; 32};
31 33
32#define HEADER_FEAT_BITS 256
33
34struct perf_file_section { 34struct perf_file_section {
35 u64 offset; 35 u64 offset;
36 u64 size; 36 u64 size;
@@ -68,6 +68,7 @@ struct perf_header {
68}; 68};
69 69
70struct perf_evlist; 70struct perf_evlist;
71struct perf_session;
71 72
72int perf_session__read_header(struct perf_session *session, int fd); 73int perf_session__read_header(struct perf_session *session, int fd);
73int perf_session__write_header(struct perf_session *session, 74int perf_session__write_header(struct perf_session *session,
@@ -96,32 +97,36 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
96 const char *name, bool is_kallsyms); 97 const char *name, bool is_kallsyms);
97int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); 98int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
98 99
99int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 100int perf_event__synthesize_attr(struct perf_tool *tool,
100 perf_event__handler_t process, 101 struct perf_event_attr *attr, u16 ids, u64 *id,
101 struct perf_session *session); 102 perf_event__handler_t process);
102int perf_session__synthesize_attrs(struct perf_session *session, 103int perf_event__synthesize_attrs(struct perf_tool *tool,
103 perf_event__handler_t process); 104 struct perf_session *session,
104int perf_event__process_attr(union perf_event *event, struct perf_session *session); 105 perf_event__handler_t process);
106int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist);
105 107
106int perf_event__synthesize_event_type(u64 event_id, char *name, 108int perf_event__synthesize_event_type(struct perf_tool *tool,
109 u64 event_id, char *name,
107 perf_event__handler_t process, 110 perf_event__handler_t process,
108 struct perf_session *session); 111 struct machine *machine);
109int perf_event__synthesize_event_types(perf_event__handler_t process, 112int perf_event__synthesize_event_types(struct perf_tool *tool,
110 struct perf_session *session); 113 perf_event__handler_t process,
111int perf_event__process_event_type(union perf_event *event, 114 struct machine *machine);
112 struct perf_session *session); 115int perf_event__process_event_type(struct perf_tool *tool,
113 116 union perf_event *event);
114int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, 117
115 perf_event__handler_t process, 118int perf_event__synthesize_tracing_data(struct perf_tool *tool,
116 struct perf_session *session); 119 int fd, struct perf_evlist *evlist,
120 perf_event__handler_t process);
117int perf_event__process_tracing_data(union perf_event *event, 121int perf_event__process_tracing_data(union perf_event *event,
118 struct perf_session *session); 122 struct perf_session *session);
119 123
120int perf_event__synthesize_build_id(struct dso *pos, u16 misc, 124int perf_event__synthesize_build_id(struct perf_tool *tool,
125 struct dso *pos, u16 misc,
121 perf_event__handler_t process, 126 perf_event__handler_t process,
122 struct machine *machine, 127 struct machine *machine);
123 struct perf_session *session); 128int perf_event__process_build_id(struct perf_tool *tool,
124int perf_event__process_build_id(union perf_event *event, 129 union perf_event *event,
125 struct perf_session *session); 130 struct perf_session *session);
126 131
127/* 132/*
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 89289c8e935e..ff6f9d56ea41 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -117,7 +117,6 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
117 117
118static inline int hist_entry__tui_annotate(struct hist_entry *self __used, 118static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
119 int evidx __used, 119 int evidx __used,
120 int nr_events __used,
121 void(*timer)(void *arg) __used, 120 void(*timer)(void *arg) __used,
122 void *arg __used, 121 void *arg __used,
123 int delay_secs __used) 122 int delay_secs __used)
@@ -128,7 +127,7 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
128#define K_RIGHT -2 127#define K_RIGHT -2
129#else 128#else
130#include "ui/keysyms.h" 129#include "ui/keysyms.h"
131int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events, 130int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
132 void(*timer)(void *arg), void *arg, int delay_secs); 131 void(*timer)(void *arg), void *arg, int delay_secs);
133 132
134int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 133int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index 305c8484f200..62cdee78db7b 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -9,6 +9,17 @@
9#define BITS_PER_BYTE 8 9#define BITS_PER_BYTE 8
10#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) 10#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
11 11
12#define for_each_set_bit(bit, addr, size) \
13 for ((bit) = find_first_bit((addr), (size)); \
14 (bit) < (size); \
15 (bit) = find_next_bit((addr), (size), (bit) + 1))
16
17/* same as for_each_set_bit() but use bit as value to start with */
18#define for_each_set_bit_cont(bit, addr, size) \
19 for ((bit) = find_next_bit((addr), (size), (bit)); \
20 (bit) < (size); \
21 (bit) = find_next_bit((addr), (size), (bit) + 1))
22
12static inline void set_bit(int nr, unsigned long *addr) 23static inline void set_bit(int nr, unsigned long *addr)
13{ 24{
14 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); 25 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
@@ -30,4 +41,111 @@ static inline unsigned long hweight_long(unsigned long w)
30 return sizeof(w) == 4 ? hweight32(w) : hweight64(w); 41 return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
31} 42}
32 43
44#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
45
46/**
47 * __ffs - find first bit in word.
48 * @word: The word to search
49 *
50 * Undefined if no bit exists, so code should check against 0 first.
51 */
52static __always_inline unsigned long __ffs(unsigned long word)
53{
54 int num = 0;
55
56#if BITS_PER_LONG == 64
57 if ((word & 0xffffffff) == 0) {
58 num += 32;
59 word >>= 32;
60 }
61#endif
62 if ((word & 0xffff) == 0) {
63 num += 16;
64 word >>= 16;
65 }
66 if ((word & 0xff) == 0) {
67 num += 8;
68 word >>= 8;
69 }
70 if ((word & 0xf) == 0) {
71 num += 4;
72 word >>= 4;
73 }
74 if ((word & 0x3) == 0) {
75 num += 2;
76 word >>= 2;
77 }
78 if ((word & 0x1) == 0)
79 num += 1;
80 return num;
81}
82
83/*
84 * Find the first set bit in a memory region.
85 */
86static inline unsigned long
87find_first_bit(const unsigned long *addr, unsigned long size)
88{
89 const unsigned long *p = addr;
90 unsigned long result = 0;
91 unsigned long tmp;
92
93 while (size & ~(BITS_PER_LONG-1)) {
94 if ((tmp = *(p++)))
95 goto found;
96 result += BITS_PER_LONG;
97 size -= BITS_PER_LONG;
98 }
99 if (!size)
100 return result;
101
102 tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
103 if (tmp == 0UL) /* Are any bits set? */
104 return result + size; /* Nope. */
105found:
106 return result + __ffs(tmp);
107}
108
109/*
110 * Find the next set bit in a memory region.
111 */
112static inline unsigned long
113find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
114{
115 const unsigned long *p = addr + BITOP_WORD(offset);
116 unsigned long result = offset & ~(BITS_PER_LONG-1);
117 unsigned long tmp;
118
119 if (offset >= size)
120 return size;
121 size -= result;
122 offset %= BITS_PER_LONG;
123 if (offset) {
124 tmp = *(p++);
125 tmp &= (~0UL << offset);
126 if (size < BITS_PER_LONG)
127 goto found_first;
128 if (tmp)
129 goto found_middle;
130 size -= BITS_PER_LONG;
131 result += BITS_PER_LONG;
132 }
133 while (size & ~(BITS_PER_LONG-1)) {
134 if ((tmp = *(p++)))
135 goto found_middle;
136 result += BITS_PER_LONG;
137 size -= BITS_PER_LONG;
138 }
139 if (!size)
140 return result;
141 tmp = *p;
142
143found_first:
144 tmp &= (~0UL >> (BITS_PER_LONG - size));
145 if (tmp == 0UL) /* Are any bits set? */
146 return result + size; /* Nope. */
147found_middle:
148 return result + __ffs(tmp);
149}
150
33#endif 151#endif
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 78284b13e808..316aa0ab7122 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -562,6 +562,10 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid)
562 INIT_LIST_HEAD(&self->user_dsos); 562 INIT_LIST_HEAD(&self->user_dsos);
563 INIT_LIST_HEAD(&self->kernel_dsos); 563 INIT_LIST_HEAD(&self->kernel_dsos);
564 564
565 self->threads = RB_ROOT;
566 INIT_LIST_HEAD(&self->dead_threads);
567 self->last_match = NULL;
568
565 self->kmaps.machine = self; 569 self->kmaps.machine = self;
566 self->pid = pid; 570 self->pid = pid;
567 self->root_dir = strdup(root_dir); 571 self->root_dir = strdup(root_dir);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 890d85545d0f..2b8017f8a930 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -18,9 +18,11 @@ enum map_type {
18extern const char *map_type__name[MAP__NR_TYPES]; 18extern const char *map_type__name[MAP__NR_TYPES];
19 19
20struct dso; 20struct dso;
21struct ip_callchain;
21struct ref_reloc_sym; 22struct ref_reloc_sym;
22struct map_groups; 23struct map_groups;
23struct machine; 24struct machine;
25struct perf_evsel;
24 26
25struct map { 27struct map {
26 union { 28 union {
@@ -61,7 +63,11 @@ struct map_groups {
61struct machine { 63struct machine {
62 struct rb_node rb_node; 64 struct rb_node rb_node;
63 pid_t pid; 65 pid_t pid;
66 u16 id_hdr_size;
64 char *root_dir; 67 char *root_dir;
68 struct rb_root threads;
69 struct list_head dead_threads;
70 struct thread *last_match;
65 struct list_head user_dsos; 71 struct list_head user_dsos;
66 struct list_head kernel_dsos; 72 struct list_head kernel_dsos;
67 struct map_groups kmaps; 73 struct map_groups kmaps;
@@ -148,6 +154,13 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid);
148void machine__exit(struct machine *self); 154void machine__exit(struct machine *self);
149void machine__delete(struct machine *self); 155void machine__delete(struct machine *self);
150 156
157int machine__resolve_callchain(struct machine *machine,
158 struct perf_evsel *evsel, struct thread *thread,
159 struct ip_callchain *chain,
160 struct symbol **parent);
161int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
162 u64 addr);
163
151/* 164/*
152 * Default guest kernel is defined by parameter --guestkallsyms 165 * Default guest kernel is defined by parameter --guestkallsyms
153 * and --guestmodules 166 * and --guestmodules
@@ -190,6 +203,12 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
190 struct map **mapp, 203 struct map **mapp,
191 symbol_filter_t filter); 204 symbol_filter_t filter);
192 205
206
207struct thread *machine__findnew_thread(struct machine *machine, pid_t pid);
208void machine__remove_thread(struct machine *machine, struct thread *th);
209
210size_t machine__fprintf(struct machine *machine, FILE *fp);
211
193static inline 212static inline
194struct symbol *machine__find_kernel_symbol(struct machine *self, 213struct symbol *machine__find_kernel_symbol(struct machine *self,
195 enum map_type type, u64 addr, 214 enum map_type type, u64 addr,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 928918b796b2..531c283fc0c5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -25,8 +25,6 @@ enum event_result {
25 EVT_HANDLED_ALL 25 EVT_HANDLED_ALL
26}; 26};
27 27
28char debugfs_path[MAXPATHLEN];
29
30#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x 28#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
31#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x 29#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
32 30
@@ -40,6 +38,7 @@ static struct event_symbol event_symbols[] = {
40 { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, 38 { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
41 { CHW(BRANCH_MISSES), "branch-misses", "" }, 39 { CHW(BRANCH_MISSES), "branch-misses", "" },
42 { CHW(BUS_CYCLES), "bus-cycles", "" }, 40 { CHW(BUS_CYCLES), "bus-cycles", "" },
41 { CHW(REF_CPU_CYCLES), "ref-cycles", "" },
43 42
44 { CSW(CPU_CLOCK), "cpu-clock", "" }, 43 { CSW(CPU_CLOCK), "cpu-clock", "" },
45 { CSW(TASK_CLOCK), "task-clock", "" }, 44 { CSW(TASK_CLOCK), "task-clock", "" },
@@ -70,6 +69,7 @@ static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
70 "bus-cycles", 69 "bus-cycles",
71 "stalled-cycles-frontend", 70 "stalled-cycles-frontend",
72 "stalled-cycles-backend", 71 "stalled-cycles-backend",
72 "ref-cycles",
73}; 73};
74 74
75static const char *sw_event_names[PERF_COUNT_SW_MAX] = { 75static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
@@ -140,7 +140,7 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
140 char evt_path[MAXPATHLEN]; 140 char evt_path[MAXPATHLEN];
141 int fd; 141 int fd;
142 142
143 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 143 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
144 sys_dir->d_name, evt_dir->d_name); 144 sys_dir->d_name, evt_dir->d_name);
145 fd = open(evt_path, O_RDONLY); 145 fd = open(evt_path, O_RDONLY);
146 if (fd < 0) 146 if (fd < 0)
@@ -171,16 +171,16 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
171 char evt_path[MAXPATHLEN]; 171 char evt_path[MAXPATHLEN];
172 char dir_path[MAXPATHLEN]; 172 char dir_path[MAXPATHLEN];
173 173
174 if (debugfs_valid_mountpoint(debugfs_path)) 174 if (debugfs_valid_mountpoint(tracing_events_path))
175 return NULL; 175 return NULL;
176 176
177 sys_dir = opendir(debugfs_path); 177 sys_dir = opendir(tracing_events_path);
178 if (!sys_dir) 178 if (!sys_dir)
179 return NULL; 179 return NULL;
180 180
181 for_each_subsystem(sys_dir, sys_dirent, sys_next) { 181 for_each_subsystem(sys_dir, sys_dirent, sys_next) {
182 182
183 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 183 snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
184 sys_dirent.d_name); 184 sys_dirent.d_name);
185 evt_dir = opendir(dir_path); 185 evt_dir = opendir(dir_path);
186 if (!evt_dir) 186 if (!evt_dir)
@@ -447,7 +447,7 @@ parse_single_tracepoint_event(char *sys_name,
447 u64 id; 447 u64 id;
448 int fd; 448 int fd;
449 449
450 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 450 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
451 sys_name, evt_name); 451 sys_name, evt_name);
452 452
453 fd = open(evt_path, O_RDONLY); 453 fd = open(evt_path, O_RDONLY);
@@ -485,7 +485,7 @@ parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name,
485 struct dirent *evt_ent; 485 struct dirent *evt_ent;
486 DIR *evt_dir; 486 DIR *evt_dir;
487 487
488 snprintf(evt_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_name); 488 snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
489 evt_dir = opendir(evt_path); 489 evt_dir = opendir(evt_path);
490 490
491 if (!evt_dir) { 491 if (!evt_dir) {
@@ -528,7 +528,7 @@ parse_tracepoint_event(struct perf_evlist *evlist, const char **strp,
528 char sys_name[MAX_EVENT_LENGTH]; 528 char sys_name[MAX_EVENT_LENGTH];
529 unsigned int sys_length, evt_length; 529 unsigned int sys_length, evt_length;
530 530
531 if (debugfs_valid_mountpoint(debugfs_path)) 531 if (debugfs_valid_mountpoint(tracing_events_path))
532 return 0; 532 return 0;
533 533
534 evt_name = strchr(*strp, ':'); 534 evt_name = strchr(*strp, ':');
@@ -920,10 +920,10 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
920 char evt_path[MAXPATHLEN]; 920 char evt_path[MAXPATHLEN];
921 char dir_path[MAXPATHLEN]; 921 char dir_path[MAXPATHLEN];
922 922
923 if (debugfs_valid_mountpoint(debugfs_path)) 923 if (debugfs_valid_mountpoint(tracing_events_path))
924 return; 924 return;
925 925
926 sys_dir = opendir(debugfs_path); 926 sys_dir = opendir(tracing_events_path);
927 if (!sys_dir) 927 if (!sys_dir)
928 return; 928 return;
929 929
@@ -932,7 +932,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
932 !strglobmatch(sys_dirent.d_name, subsys_glob)) 932 !strglobmatch(sys_dirent.d_name, subsys_glob))
933 continue; 933 continue;
934 934
935 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 935 snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
936 sys_dirent.d_name); 936 sys_dirent.d_name);
937 evt_dir = opendir(dir_path); 937 evt_dir = opendir(dir_path);
938 if (!evt_dir) 938 if (!evt_dir)
@@ -964,16 +964,16 @@ int is_valid_tracepoint(const char *event_string)
964 char evt_path[MAXPATHLEN]; 964 char evt_path[MAXPATHLEN];
965 char dir_path[MAXPATHLEN]; 965 char dir_path[MAXPATHLEN];
966 966
967 if (debugfs_valid_mountpoint(debugfs_path)) 967 if (debugfs_valid_mountpoint(tracing_events_path))
968 return 0; 968 return 0;
969 969
970 sys_dir = opendir(debugfs_path); 970 sys_dir = opendir(tracing_events_path);
971 if (!sys_dir) 971 if (!sys_dir)
972 return 0; 972 return 0;
973 973
974 for_each_subsystem(sys_dir, sys_dirent, sys_next) { 974 for_each_subsystem(sys_dir, sys_dirent, sys_next) {
975 975
976 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 976 snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
977 sys_dirent.d_name); 977 sys_dirent.d_name);
978 evt_dir = opendir(dir_path); 978 evt_dir = opendir(dir_path);
979 if (!evt_dir) 979 if (!evt_dir)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 2f8e375e038d..7e0cbe75d5f1 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -39,7 +39,6 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob);
39int print_hwcache_events(const char *event_glob); 39int print_hwcache_events(const char *event_glob);
40extern int is_valid_tracepoint(const char *event_string); 40extern int is_valid_tracepoint(const char *event_string);
41 41
42extern char debugfs_path[];
43extern int valid_debugfs_mount(const char *debugfs); 42extern int valid_debugfs_mount(const char *debugfs);
44 43
45#endif /* __PERF_PARSE_EVENTS_H */ 44#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 1132c8f0ce89..17e94d0c36f9 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -5,7 +5,6 @@
5#include "util.h" 5#include "util.h"
6#include "probe-event.h" 6#include "probe-event.h"
7 7
8#define MAX_PATH_LEN 256
9#define MAX_PROBE_BUFFER 1024 8#define MAX_PROBE_BUFFER 1024
10#define MAX_PROBES 128 9#define MAX_PROBES 128
11 10
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 74350ffb57fe..e30749e38a9b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -27,7 +27,10 @@
27 27
28#include "../../perf.h" 28#include "../../perf.h"
29#include "../util.h" 29#include "../util.h"
30#include "../thread.h"
31#include "../event.h"
30#include "../trace-event.h" 32#include "../trace-event.h"
33#include "../evsel.h"
31 34
32#include <EXTERN.h> 35#include <EXTERN.h>
33#include <perl.h> 36#include <perl.h>
@@ -245,11 +248,11 @@ static inline struct event *find_cache_event(int type)
245 return event; 248 return event;
246} 249}
247 250
248static void perl_process_event(union perf_event *pevent __unused, 251static void perl_process_tracepoint(union perf_event *pevent __unused,
249 struct perf_sample *sample, 252 struct perf_sample *sample,
250 struct perf_evsel *evsel, 253 struct perf_evsel *evsel,
251 struct perf_session *session __unused, 254 struct machine *machine __unused,
252 struct thread *thread) 255 struct thread *thread)
253{ 256{
254 struct format_field *field; 257 struct format_field *field;
255 static char handler[256]; 258 static char handler[256];
@@ -265,6 +268,9 @@ static void perl_process_event(union perf_event *pevent __unused,
265 268
266 dSP; 269 dSP;
267 270
271 if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
272 return;
273
268 type = trace_parse_common_type(data); 274 type = trace_parse_common_type(data);
269 275
270 event = find_cache_event(type); 276 event = find_cache_event(type);
@@ -332,6 +338,42 @@ static void perl_process_event(union perf_event *pevent __unused,
332 LEAVE; 338 LEAVE;
333} 339}
334 340
341static void perl_process_event_generic(union perf_event *pevent __unused,
342 struct perf_sample *sample,
343 struct perf_evsel *evsel __unused,
344 struct machine *machine __unused,
345 struct thread *thread __unused)
346{
347 dSP;
348
349 if (!get_cv("process_event", 0))
350 return;
351
352 ENTER;
353 SAVETMPS;
354 PUSHMARK(SP);
355 XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size)));
356 XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr))));
357 XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample))));
358 XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size)));
359 PUTBACK;
360 call_pv("process_event", G_SCALAR);
361 SPAGAIN;
362 PUTBACK;
363 FREETMPS;
364 LEAVE;
365}
366
367static void perl_process_event(union perf_event *pevent,
368 struct perf_sample *sample,
369 struct perf_evsel *evsel,
370 struct machine *machine,
371 struct thread *thread)
372{
373 perl_process_tracepoint(pevent, sample, evsel, machine, thread);
374 perl_process_event_generic(pevent, sample, evsel, machine, thread);
375}
376
335static void run_start_sub(void) 377static void run_start_sub(void)
336{ 378{
337 dSP; /* access to Perl stack */ 379 dSP; /* access to Perl stack */
@@ -553,7 +595,28 @@ static int perl_generate_script(const char *outfile)
553 fprintf(ofp, "sub print_header\n{\n" 595 fprintf(ofp, "sub print_header\n{\n"
554 "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" 596 "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
555 "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t " 597 "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t "
556 "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}"); 598 "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n");
599
600 fprintf(ofp,
601 "\n# Packed byte string args of process_event():\n"
602 "#\n"
603 "# $event:\tunion perf_event\tutil/event.h\n"
604 "# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n"
605 "# $sample:\tstruct perf_sample\tutil/event.h\n"
606 "# $raw_data:\tperf_sample->raw_data\tutil/event.h\n"
607 "\n"
608 "sub process_event\n"
609 "{\n"
610 "\tmy ($event, $attr, $sample, $raw_data) = @_;\n"
611 "\n"
612 "\tmy @event\t= unpack(\"LSS\", $event);\n"
613 "\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n"
614 "\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n"
615 "\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n"
616 "\n"
617 "\tuse Data::Dumper;\n"
618 "\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n"
619 "}\n");
557 620
558 fclose(ofp); 621 fclose(ofp);
559 622
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 6ccf70e8d8f2..0b2a48783172 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -29,6 +29,8 @@
29 29
30#include "../../perf.h" 30#include "../../perf.h"
31#include "../util.h" 31#include "../util.h"
32#include "../event.h"
33#include "../thread.h"
32#include "../trace-event.h" 34#include "../trace-event.h"
33 35
34PyMODINIT_FUNC initperf_trace_context(void); 36PyMODINIT_FUNC initperf_trace_context(void);
@@ -207,7 +209,7 @@ static inline struct event *find_cache_event(int type)
207static void python_process_event(union perf_event *pevent __unused, 209static void python_process_event(union perf_event *pevent __unused,
208 struct perf_sample *sample, 210 struct perf_sample *sample,
209 struct perf_evsel *evsel __unused, 211 struct perf_evsel *evsel __unused,
210 struct perf_session *session __unused, 212 struct machine *machine __unused,
211 struct thread *thread) 213 struct thread *thread)
212{ 214{
213 PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; 215 PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0f4555ce9063..b5ca2558c7bb 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -10,6 +10,7 @@
10#include "evlist.h" 10#include "evlist.h"
11#include "evsel.h" 11#include "evsel.h"
12#include "session.h" 12#include "session.h"
13#include "tool.h"
13#include "sort.h" 14#include "sort.h"
14#include "util.h" 15#include "util.h"
15#include "cpumap.h" 16#include "cpumap.h"
@@ -78,39 +79,13 @@ out_close:
78 return -1; 79 return -1;
79} 80}
80 81
81static void perf_session__id_header_size(struct perf_session *session)
82{
83 struct perf_sample *data;
84 u64 sample_type = session->sample_type;
85 u16 size = 0;
86
87 if (!session->sample_id_all)
88 goto out;
89
90 if (sample_type & PERF_SAMPLE_TID)
91 size += sizeof(data->tid) * 2;
92
93 if (sample_type & PERF_SAMPLE_TIME)
94 size += sizeof(data->time);
95
96 if (sample_type & PERF_SAMPLE_ID)
97 size += sizeof(data->id);
98
99 if (sample_type & PERF_SAMPLE_STREAM_ID)
100 size += sizeof(data->stream_id);
101
102 if (sample_type & PERF_SAMPLE_CPU)
103 size += sizeof(data->cpu) * 2;
104out:
105 session->id_hdr_size = size;
106}
107
108void perf_session__update_sample_type(struct perf_session *self) 82void perf_session__update_sample_type(struct perf_session *self)
109{ 83{
110 self->sample_type = perf_evlist__sample_type(self->evlist); 84 self->sample_type = perf_evlist__sample_type(self->evlist);
111 self->sample_size = __perf_evsel__sample_size(self->sample_type); 85 self->sample_size = __perf_evsel__sample_size(self->sample_type);
112 self->sample_id_all = perf_evlist__sample_id_all(self->evlist); 86 self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
113 perf_session__id_header_size(self); 87 self->id_hdr_size = perf_evlist__id_hdr_size(self->evlist);
88 self->host_machine.id_hdr_size = self->id_hdr_size;
114} 89}
115 90
116int perf_session__create_kernel_maps(struct perf_session *self) 91int perf_session__create_kernel_maps(struct perf_session *self)
@@ -130,18 +105,26 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
130 105
131struct perf_session *perf_session__new(const char *filename, int mode, 106struct perf_session *perf_session__new(const char *filename, int mode,
132 bool force, bool repipe, 107 bool force, bool repipe,
133 struct perf_event_ops *ops) 108 struct perf_tool *tool)
134{ 109{
135 size_t len = filename ? strlen(filename) + 1 : 0; 110 struct perf_session *self;
136 struct perf_session *self = zalloc(sizeof(*self) + len); 111 struct stat st;
112 size_t len;
113
114 if (!filename || !strlen(filename)) {
115 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
116 filename = "-";
117 else
118 filename = "perf.data";
119 }
120
121 len = strlen(filename);
122 self = zalloc(sizeof(*self) + len);
137 123
138 if (self == NULL) 124 if (self == NULL)
139 goto out; 125 goto out;
140 126
141 memcpy(self->filename, filename, len); 127 memcpy(self->filename, filename, len);
142 self->threads = RB_ROOT;
143 INIT_LIST_HEAD(&self->dead_threads);
144 self->last_match = NULL;
145 /* 128 /*
146 * On 64bit we can mmap the data file in one go. No need for tiny mmap 129 * On 64bit we can mmap the data file in one go. No need for tiny mmap
147 * slices. On 32bit we use 32MB. 130 * slices. On 32bit we use 32MB.
@@ -171,10 +154,10 @@ struct perf_session *perf_session__new(const char *filename, int mode,
171 goto out_delete; 154 goto out_delete;
172 } 155 }
173 156
174 if (ops && ops->ordering_requires_timestamps && 157 if (tool && tool->ordering_requires_timestamps &&
175 ops->ordered_samples && !self->sample_id_all) { 158 tool->ordered_samples && !self->sample_id_all) {
176 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); 159 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
177 ops->ordered_samples = false; 160 tool->ordered_samples = false;
178 } 161 }
179 162
180out: 163out:
@@ -184,17 +167,22 @@ out_delete:
184 return NULL; 167 return NULL;
185} 168}
186 169
187static void perf_session__delete_dead_threads(struct perf_session *self) 170static void machine__delete_dead_threads(struct machine *machine)
188{ 171{
189 struct thread *n, *t; 172 struct thread *n, *t;
190 173
191 list_for_each_entry_safe(t, n, &self->dead_threads, node) { 174 list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
192 list_del(&t->node); 175 list_del(&t->node);
193 thread__delete(t); 176 thread__delete(t);
194 } 177 }
195} 178}
196 179
197static void perf_session__delete_threads(struct perf_session *self) 180static void perf_session__delete_dead_threads(struct perf_session *session)
181{
182 machine__delete_dead_threads(&session->host_machine);
183}
184
185static void machine__delete_threads(struct machine *self)
198{ 186{
199 struct rb_node *nd = rb_first(&self->threads); 187 struct rb_node *nd = rb_first(&self->threads);
200 188
@@ -207,6 +195,11 @@ static void perf_session__delete_threads(struct perf_session *self)
207 } 195 }
208} 196}
209 197
198static void perf_session__delete_threads(struct perf_session *session)
199{
200 machine__delete_threads(&session->host_machine);
201}
202
210void perf_session__delete(struct perf_session *self) 203void perf_session__delete(struct perf_session *self)
211{ 204{
212 perf_session__destroy_kernel_maps(self); 205 perf_session__destroy_kernel_maps(self);
@@ -217,7 +210,7 @@ void perf_session__delete(struct perf_session *self)
217 free(self); 210 free(self);
218} 211}
219 212
220void perf_session__remove_thread(struct perf_session *self, struct thread *th) 213void machine__remove_thread(struct machine *self, struct thread *th)
221{ 214{
222 self->last_match = NULL; 215 self->last_match = NULL;
223 rb_erase(&th->rb_node, &self->threads); 216 rb_erase(&th->rb_node, &self->threads);
@@ -236,16 +229,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
236 return 0; 229 return 0;
237} 230}
238 231
239int perf_session__resolve_callchain(struct perf_session *self, 232int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
240 struct thread *thread, 233 struct thread *thread,
241 struct ip_callchain *chain, 234 struct ip_callchain *chain,
242 struct symbol **parent) 235 struct symbol **parent)
243{ 236{
244 u8 cpumode = PERF_RECORD_MISC_USER; 237 u8 cpumode = PERF_RECORD_MISC_USER;
245 unsigned int i; 238 unsigned int i;
246 int err; 239 int err;
247 240
248 callchain_cursor_reset(&self->callchain_cursor); 241 callchain_cursor_reset(&evsel->hists.callchain_cursor);
249 242
250 for (i = 0; i < chain->nr; i++) { 243 for (i = 0; i < chain->nr; i++) {
251 u64 ip; 244 u64 ip;
@@ -272,7 +265,7 @@ int perf_session__resolve_callchain(struct perf_session *self,
272 265
273 al.filtered = false; 266 al.filtered = false;
274 thread__find_addr_location(thread, self, cpumode, 267 thread__find_addr_location(thread, self, cpumode,
275 MAP__FUNCTION, thread->pid, ip, &al, NULL); 268 MAP__FUNCTION, ip, &al, NULL);
276 if (al.sym != NULL) { 269 if (al.sym != NULL) {
277 if (sort__has_parent && !*parent && 270 if (sort__has_parent && !*parent &&
278 symbol__match_parent_regex(al.sym)) 271 symbol__match_parent_regex(al.sym))
@@ -281,7 +274,7 @@ int perf_session__resolve_callchain(struct perf_session *self,
281 break; 274 break;
282 } 275 }
283 276
284 err = callchain_cursor_append(&self->callchain_cursor, 277 err = callchain_cursor_append(&evsel->hists.callchain_cursor,
285 ip, al.map, al.sym); 278 ip, al.map, al.sym);
286 if (err) 279 if (err)
287 return err; 280 return err;
@@ -290,75 +283,91 @@ int perf_session__resolve_callchain(struct perf_session *self,
290 return 0; 283 return 0;
291} 284}
292 285
293static int process_event_synth_stub(union perf_event *event __used, 286static int process_event_synth_tracing_data_stub(union perf_event *event __used,
294 struct perf_session *session __used) 287 struct perf_session *session __used)
288{
289 dump_printf(": unhandled!\n");
290 return 0;
291}
292
293static int process_event_synth_attr_stub(union perf_event *event __used,
294 struct perf_evlist **pevlist __used)
295{ 295{
296 dump_printf(": unhandled!\n"); 296 dump_printf(": unhandled!\n");
297 return 0; 297 return 0;
298} 298}
299 299
300static int process_event_sample_stub(union perf_event *event __used, 300static int process_event_sample_stub(struct perf_tool *tool __used,
301 union perf_event *event __used,
301 struct perf_sample *sample __used, 302 struct perf_sample *sample __used,
302 struct perf_evsel *evsel __used, 303 struct perf_evsel *evsel __used,
303 struct perf_session *session __used) 304 struct machine *machine __used)
304{ 305{
305 dump_printf(": unhandled!\n"); 306 dump_printf(": unhandled!\n");
306 return 0; 307 return 0;
307} 308}
308 309
309static int process_event_stub(union perf_event *event __used, 310static int process_event_stub(struct perf_tool *tool __used,
311 union perf_event *event __used,
310 struct perf_sample *sample __used, 312 struct perf_sample *sample __used,
311 struct perf_session *session __used) 313 struct machine *machine __used)
312{ 314{
313 dump_printf(": unhandled!\n"); 315 dump_printf(": unhandled!\n");
314 return 0; 316 return 0;
315} 317}
316 318
317static int process_finished_round_stub(union perf_event *event __used, 319static int process_finished_round_stub(struct perf_tool *tool __used,
318 struct perf_session *session __used, 320 union perf_event *event __used,
319 struct perf_event_ops *ops __used) 321 struct perf_session *perf_session __used)
320{ 322{
321 dump_printf(": unhandled!\n"); 323 dump_printf(": unhandled!\n");
322 return 0; 324 return 0;
323} 325}
324 326
325static int process_finished_round(union perf_event *event, 327static int process_event_type_stub(struct perf_tool *tool __used,
326 struct perf_session *session, 328 union perf_event *event __used)
327 struct perf_event_ops *ops); 329{
330 dump_printf(": unhandled!\n");
331 return 0;
332}
328 333
329static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) 334static int process_finished_round(struct perf_tool *tool,
335 union perf_event *event,
336 struct perf_session *session);
337
338static void perf_tool__fill_defaults(struct perf_tool *tool)
330{ 339{
331 if (handler->sample == NULL) 340 if (tool->sample == NULL)
332 handler->sample = process_event_sample_stub; 341 tool->sample = process_event_sample_stub;
333 if (handler->mmap == NULL) 342 if (tool->mmap == NULL)
334 handler->mmap = process_event_stub; 343 tool->mmap = process_event_stub;
335 if (handler->comm == NULL) 344 if (tool->comm == NULL)
336 handler->comm = process_event_stub; 345 tool->comm = process_event_stub;
337 if (handler->fork == NULL) 346 if (tool->fork == NULL)
338 handler->fork = process_event_stub; 347 tool->fork = process_event_stub;
339 if (handler->exit == NULL) 348 if (tool->exit == NULL)
340 handler->exit = process_event_stub; 349 tool->exit = process_event_stub;
341 if (handler->lost == NULL) 350 if (tool->lost == NULL)
342 handler->lost = perf_event__process_lost; 351 tool->lost = perf_event__process_lost;
343 if (handler->read == NULL) 352 if (tool->read == NULL)
344 handler->read = process_event_stub; 353 tool->read = process_event_sample_stub;
345 if (handler->throttle == NULL) 354 if (tool->throttle == NULL)
346 handler->throttle = process_event_stub; 355 tool->throttle = process_event_stub;
347 if (handler->unthrottle == NULL) 356 if (tool->unthrottle == NULL)
348 handler->unthrottle = process_event_stub; 357 tool->unthrottle = process_event_stub;
349 if (handler->attr == NULL) 358 if (tool->attr == NULL)
350 handler->attr = process_event_synth_stub; 359 tool->attr = process_event_synth_attr_stub;
351 if (handler->event_type == NULL) 360 if (tool->event_type == NULL)
352 handler->event_type = process_event_synth_stub; 361 tool->event_type = process_event_type_stub;
353 if (handler->tracing_data == NULL) 362 if (tool->tracing_data == NULL)
354 handler->tracing_data = process_event_synth_stub; 363 tool->tracing_data = process_event_synth_tracing_data_stub;
355 if (handler->build_id == NULL) 364 if (tool->build_id == NULL)
356 handler->build_id = process_event_synth_stub; 365 tool->build_id = process_finished_round_stub;
357 if (handler->finished_round == NULL) { 366 if (tool->finished_round == NULL) {
358 if (handler->ordered_samples) 367 if (tool->ordered_samples)
359 handler->finished_round = process_finished_round; 368 tool->finished_round = process_finished_round;
360 else 369 else
361 handler->finished_round = process_finished_round_stub; 370 tool->finished_round = process_finished_round_stub;
362 } 371 }
363} 372}
364 373
@@ -490,11 +499,11 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
490static int perf_session_deliver_event(struct perf_session *session, 499static int perf_session_deliver_event(struct perf_session *session,
491 union perf_event *event, 500 union perf_event *event,
492 struct perf_sample *sample, 501 struct perf_sample *sample,
493 struct perf_event_ops *ops, 502 struct perf_tool *tool,
494 u64 file_offset); 503 u64 file_offset);
495 504
496static void flush_sample_queue(struct perf_session *s, 505static void flush_sample_queue(struct perf_session *s,
497 struct perf_event_ops *ops) 506 struct perf_tool *tool)
498{ 507{
499 struct ordered_samples *os = &s->ordered_samples; 508 struct ordered_samples *os = &s->ordered_samples;
500 struct list_head *head = &os->samples; 509 struct list_head *head = &os->samples;
@@ -505,7 +514,7 @@ static void flush_sample_queue(struct perf_session *s,
505 unsigned idx = 0, progress_next = os->nr_samples / 16; 514 unsigned idx = 0, progress_next = os->nr_samples / 16;
506 int ret; 515 int ret;
507 516
508 if (!ops->ordered_samples || !limit) 517 if (!tool->ordered_samples || !limit)
509 return; 518 return;
510 519
511 list_for_each_entry_safe(iter, tmp, head, list) { 520 list_for_each_entry_safe(iter, tmp, head, list) {
@@ -516,7 +525,7 @@ static void flush_sample_queue(struct perf_session *s,
516 if (ret) 525 if (ret)
517 pr_err("Can't parse sample, err = %d\n", ret); 526 pr_err("Can't parse sample, err = %d\n", ret);
518 else 527 else
519 perf_session_deliver_event(s, iter->event, &sample, ops, 528 perf_session_deliver_event(s, iter->event, &sample, tool,
520 iter->file_offset); 529 iter->file_offset);
521 530
522 os->last_flush = iter->timestamp; 531 os->last_flush = iter->timestamp;
@@ -578,11 +587,11 @@ static void flush_sample_queue(struct perf_session *s,
578 * Flush every events below timestamp 7 587 * Flush every events below timestamp 7
579 * etc... 588 * etc...
580 */ 589 */
581static int process_finished_round(union perf_event *event __used, 590static int process_finished_round(struct perf_tool *tool,
582 struct perf_session *session, 591 union perf_event *event __used,
583 struct perf_event_ops *ops) 592 struct perf_session *session)
584{ 593{
585 flush_sample_queue(session, ops); 594 flush_sample_queue(session, tool);
586 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; 595 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
587 596
588 return 0; 597 return 0;
@@ -737,13 +746,26 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
737 callchain__printf(sample); 746 callchain__printf(sample);
738} 747}
739 748
749static struct machine *
750 perf_session__find_machine_for_cpumode(struct perf_session *session,
751 union perf_event *event)
752{
753 const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
754
755 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest)
756 return perf_session__find_machine(session, event->ip.pid);
757
758 return perf_session__find_host_machine(session);
759}
760
740static int perf_session_deliver_event(struct perf_session *session, 761static int perf_session_deliver_event(struct perf_session *session,
741 union perf_event *event, 762 union perf_event *event,
742 struct perf_sample *sample, 763 struct perf_sample *sample,
743 struct perf_event_ops *ops, 764 struct perf_tool *tool,
744 u64 file_offset) 765 u64 file_offset)
745{ 766{
746 struct perf_evsel *evsel; 767 struct perf_evsel *evsel;
768 struct machine *machine;
747 769
748 dump_event(session, event, file_offset, sample); 770 dump_event(session, event, file_offset, sample);
749 771
@@ -765,6 +787,8 @@ static int perf_session_deliver_event(struct perf_session *session,
765 hists__inc_nr_events(&evsel->hists, event->header.type); 787 hists__inc_nr_events(&evsel->hists, event->header.type);
766 } 788 }
767 789
790 machine = perf_session__find_machine_for_cpumode(session, event);
791
768 switch (event->header.type) { 792 switch (event->header.type) {
769 case PERF_RECORD_SAMPLE: 793 case PERF_RECORD_SAMPLE:
770 dump_sample(session, event, sample); 794 dump_sample(session, event, sample);
@@ -772,23 +796,25 @@ static int perf_session_deliver_event(struct perf_session *session,
772 ++session->hists.stats.nr_unknown_id; 796 ++session->hists.stats.nr_unknown_id;
773 return -1; 797 return -1;
774 } 798 }
775 return ops->sample(event, sample, evsel, session); 799 return tool->sample(tool, event, sample, evsel, machine);
776 case PERF_RECORD_MMAP: 800 case PERF_RECORD_MMAP:
777 return ops->mmap(event, sample, session); 801 return tool->mmap(tool, event, sample, machine);
778 case PERF_RECORD_COMM: 802 case PERF_RECORD_COMM:
779 return ops->comm(event, sample, session); 803 return tool->comm(tool, event, sample, machine);
780 case PERF_RECORD_FORK: 804 case PERF_RECORD_FORK:
781 return ops->fork(event, sample, session); 805 return tool->fork(tool, event, sample, machine);
782 case PERF_RECORD_EXIT: 806 case PERF_RECORD_EXIT:
783 return ops->exit(event, sample, session); 807 return tool->exit(tool, event, sample, machine);
784 case PERF_RECORD_LOST: 808 case PERF_RECORD_LOST:
785 return ops->lost(event, sample, session); 809 if (tool->lost == perf_event__process_lost)
810 session->hists.stats.total_lost += event->lost.lost;
811 return tool->lost(tool, event, sample, machine);
786 case PERF_RECORD_READ: 812 case PERF_RECORD_READ:
787 return ops->read(event, sample, session); 813 return tool->read(tool, event, sample, evsel, machine);
788 case PERF_RECORD_THROTTLE: 814 case PERF_RECORD_THROTTLE:
789 return ops->throttle(event, sample, session); 815 return tool->throttle(tool, event, sample, machine);
790 case PERF_RECORD_UNTHROTTLE: 816 case PERF_RECORD_UNTHROTTLE:
791 return ops->unthrottle(event, sample, session); 817 return tool->unthrottle(tool, event, sample, machine);
792 default: 818 default:
793 ++session->hists.stats.nr_unknown_events; 819 ++session->hists.stats.nr_unknown_events;
794 return -1; 820 return -1;
@@ -812,24 +838,29 @@ static int perf_session__preprocess_sample(struct perf_session *session,
812} 838}
813 839
814static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, 840static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
815 struct perf_event_ops *ops, u64 file_offset) 841 struct perf_tool *tool, u64 file_offset)
816{ 842{
843 int err;
844
817 dump_event(session, event, file_offset, NULL); 845 dump_event(session, event, file_offset, NULL);
818 846
819 /* These events are processed right away */ 847 /* These events are processed right away */
820 switch (event->header.type) { 848 switch (event->header.type) {
821 case PERF_RECORD_HEADER_ATTR: 849 case PERF_RECORD_HEADER_ATTR:
822 return ops->attr(event, session); 850 err = tool->attr(event, &session->evlist);
851 if (err == 0)
852 perf_session__update_sample_type(session);
853 return err;
823 case PERF_RECORD_HEADER_EVENT_TYPE: 854 case PERF_RECORD_HEADER_EVENT_TYPE:
824 return ops->event_type(event, session); 855 return tool->event_type(tool, event);
825 case PERF_RECORD_HEADER_TRACING_DATA: 856 case PERF_RECORD_HEADER_TRACING_DATA:
826 /* setup for reading amidst mmap */ 857 /* setup for reading amidst mmap */
827 lseek(session->fd, file_offset, SEEK_SET); 858 lseek(session->fd, file_offset, SEEK_SET);
828 return ops->tracing_data(event, session); 859 return tool->tracing_data(event, session);
829 case PERF_RECORD_HEADER_BUILD_ID: 860 case PERF_RECORD_HEADER_BUILD_ID:
830 return ops->build_id(event, session); 861 return tool->build_id(tool, event, session);
831 case PERF_RECORD_FINISHED_ROUND: 862 case PERF_RECORD_FINISHED_ROUND:
832 return ops->finished_round(event, session, ops); 863 return tool->finished_round(tool, event, session);
833 default: 864 default:
834 return -EINVAL; 865 return -EINVAL;
835 } 866 }
@@ -837,7 +868,7 @@ static int perf_session__process_user_event(struct perf_session *session, union
837 868
838static int perf_session__process_event(struct perf_session *session, 869static int perf_session__process_event(struct perf_session *session,
839 union perf_event *event, 870 union perf_event *event,
840 struct perf_event_ops *ops, 871 struct perf_tool *tool,
841 u64 file_offset) 872 u64 file_offset)
842{ 873{
843 struct perf_sample sample; 874 struct perf_sample sample;
@@ -853,7 +884,7 @@ static int perf_session__process_event(struct perf_session *session,
853 hists__inc_nr_events(&session->hists, event->header.type); 884 hists__inc_nr_events(&session->hists, event->header.type);
854 885
855 if (event->header.type >= PERF_RECORD_USER_TYPE_START) 886 if (event->header.type >= PERF_RECORD_USER_TYPE_START)
856 return perf_session__process_user_event(session, event, ops, file_offset); 887 return perf_session__process_user_event(session, event, tool, file_offset);
857 888
858 /* 889 /*
859 * For all kernel events we get the sample data 890 * For all kernel events we get the sample data
@@ -866,14 +897,14 @@ static int perf_session__process_event(struct perf_session *session,
866 if (perf_session__preprocess_sample(session, event, &sample)) 897 if (perf_session__preprocess_sample(session, event, &sample))
867 return 0; 898 return 0;
868 899
869 if (ops->ordered_samples) { 900 if (tool->ordered_samples) {
870 ret = perf_session_queue_event(session, event, &sample, 901 ret = perf_session_queue_event(session, event, &sample,
871 file_offset); 902 file_offset);
872 if (ret != -ETIME) 903 if (ret != -ETIME)
873 return ret; 904 return ret;
874 } 905 }
875 906
876 return perf_session_deliver_event(session, event, &sample, ops, 907 return perf_session_deliver_event(session, event, &sample, tool,
877 file_offset); 908 file_offset);
878} 909}
879 910
@@ -884,6 +915,11 @@ void perf_event_header__bswap(struct perf_event_header *self)
884 self->size = bswap_16(self->size); 915 self->size = bswap_16(self->size);
885} 916}
886 917
918struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
919{
920 return machine__findnew_thread(&session->host_machine, pid);
921}
922
887static struct thread *perf_session__register_idle_thread(struct perf_session *self) 923static struct thread *perf_session__register_idle_thread(struct perf_session *self)
888{ 924{
889 struct thread *thread = perf_session__findnew(self, 0); 925 struct thread *thread = perf_session__findnew(self, 0);
@@ -897,9 +933,9 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
897} 933}
898 934
899static void perf_session__warn_about_errors(const struct perf_session *session, 935static void perf_session__warn_about_errors(const struct perf_session *session,
900 const struct perf_event_ops *ops) 936 const struct perf_tool *tool)
901{ 937{
902 if (ops->lost == perf_event__process_lost && 938 if (tool->lost == perf_event__process_lost &&
903 session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) { 939 session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) {
904 ui__warning("Processed %d events and lost %d chunks!\n\n" 940 ui__warning("Processed %d events and lost %d chunks!\n\n"
905 "Check IO/CPU overload!\n\n", 941 "Check IO/CPU overload!\n\n",
@@ -934,7 +970,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
934volatile int session_done; 970volatile int session_done;
935 971
936static int __perf_session__process_pipe_events(struct perf_session *self, 972static int __perf_session__process_pipe_events(struct perf_session *self,
937 struct perf_event_ops *ops) 973 struct perf_tool *tool)
938{ 974{
939 union perf_event event; 975 union perf_event event;
940 uint32_t size; 976 uint32_t size;
@@ -943,7 +979,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
943 int err; 979 int err;
944 void *p; 980 void *p;
945 981
946 perf_event_ops__fill_defaults(ops); 982 perf_tool__fill_defaults(tool);
947 983
948 head = 0; 984 head = 0;
949more: 985more:
@@ -979,8 +1015,7 @@ more:
979 } 1015 }
980 } 1016 }
981 1017
982 if (size == 0 || 1018 if ((skip = perf_session__process_event(self, &event, tool, head)) < 0) {
983 (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
984 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", 1019 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
985 head, event.header.size, event.header.type); 1020 head, event.header.size, event.header.type);
986 /* 1021 /*
@@ -1003,7 +1038,7 @@ more:
1003done: 1038done:
1004 err = 0; 1039 err = 0;
1005out_err: 1040out_err:
1006 perf_session__warn_about_errors(self, ops); 1041 perf_session__warn_about_errors(self, tool);
1007 perf_session_free_sample_buffers(self); 1042 perf_session_free_sample_buffers(self);
1008 return err; 1043 return err;
1009} 1044}
@@ -1034,7 +1069,7 @@ fetch_mmaped_event(struct perf_session *session,
1034 1069
1035int __perf_session__process_events(struct perf_session *session, 1070int __perf_session__process_events(struct perf_session *session,
1036 u64 data_offset, u64 data_size, 1071 u64 data_offset, u64 data_size,
1037 u64 file_size, struct perf_event_ops *ops) 1072 u64 file_size, struct perf_tool *tool)
1038{ 1073{
1039 u64 head, page_offset, file_offset, file_pos, progress_next; 1074 u64 head, page_offset, file_offset, file_pos, progress_next;
1040 int err, mmap_prot, mmap_flags, map_idx = 0; 1075 int err, mmap_prot, mmap_flags, map_idx = 0;
@@ -1043,7 +1078,7 @@ int __perf_session__process_events(struct perf_session *session,
1043 union perf_event *event; 1078 union perf_event *event;
1044 uint32_t size; 1079 uint32_t size;
1045 1080
1046 perf_event_ops__fill_defaults(ops); 1081 perf_tool__fill_defaults(tool);
1047 1082
1048 page_size = sysconf(_SC_PAGESIZE); 1083 page_size = sysconf(_SC_PAGESIZE);
1049 1084
@@ -1098,7 +1133,7 @@ more:
1098 size = event->header.size; 1133 size = event->header.size;
1099 1134
1100 if (size == 0 || 1135 if (size == 0 ||
1101 perf_session__process_event(session, event, ops, file_pos) < 0) { 1136 perf_session__process_event(session, event, tool, file_pos) < 0) {
1102 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", 1137 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1103 file_offset + head, event->header.size, 1138 file_offset + head, event->header.size,
1104 event->header.type); 1139 event->header.type);
@@ -1127,15 +1162,15 @@ more:
1127 err = 0; 1162 err = 0;
1128 /* do the final flush for ordered samples */ 1163 /* do the final flush for ordered samples */
1129 session->ordered_samples.next_flush = ULLONG_MAX; 1164 session->ordered_samples.next_flush = ULLONG_MAX;
1130 flush_sample_queue(session, ops); 1165 flush_sample_queue(session, tool);
1131out_err: 1166out_err:
1132 perf_session__warn_about_errors(session, ops); 1167 perf_session__warn_about_errors(session, tool);
1133 perf_session_free_sample_buffers(session); 1168 perf_session_free_sample_buffers(session);
1134 return err; 1169 return err;
1135} 1170}
1136 1171
1137int perf_session__process_events(struct perf_session *self, 1172int perf_session__process_events(struct perf_session *self,
1138 struct perf_event_ops *ops) 1173 struct perf_tool *tool)
1139{ 1174{
1140 int err; 1175 int err;
1141 1176
@@ -1146,9 +1181,9 @@ int perf_session__process_events(struct perf_session *self,
1146 err = __perf_session__process_events(self, 1181 err = __perf_session__process_events(self,
1147 self->header.data_offset, 1182 self->header.data_offset,
1148 self->header.data_size, 1183 self->header.data_size,
1149 self->size, ops); 1184 self->size, tool);
1150 else 1185 else
1151 err = __perf_session__process_pipe_events(self, ops); 1186 err = __perf_session__process_pipe_events(self, tool);
1152 1187
1153 return err; 1188 return err;
1154} 1189}
@@ -1163,9 +1198,8 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg)
1163 return true; 1198 return true;
1164} 1199}
1165 1200
1166int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps, 1201int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
1167 const char *symbol_name, 1202 const char *symbol_name, u64 addr)
1168 u64 addr)
1169{ 1203{
1170 char *bracket; 1204 char *bracket;
1171 enum map_type i; 1205 enum map_type i;
@@ -1224,6 +1258,27 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1224 return ret; 1258 return ret;
1225} 1259}
1226 1260
1261size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
1262{
1263 /*
1264 * FIXME: Here we have to actually print all the machines in this
1265 * session, not just the host...
1266 */
1267 return machine__fprintf(&session->host_machine, fp);
1268}
1269
1270void perf_session__remove_thread(struct perf_session *session,
1271 struct thread *th)
1272{
1273 /*
1274 * FIXME: This one makes no sense, we need to remove the thread from
1275 * the machine it belongs to, perf_session can have many machines, so
1276 * doing it always on ->host_machine is wrong. Fix when auditing all
1277 * the 'perf kvm' code.
1278 */
1279 machine__remove_thread(&session->host_machine, th);
1280}
1281
1227struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 1282struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1228 unsigned int type) 1283 unsigned int type)
1229{ 1284{
@@ -1236,17 +1291,16 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1236 return NULL; 1291 return NULL;
1237} 1292}
1238 1293
1239void perf_session__print_ip(union perf_event *event, 1294void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1240 struct perf_sample *sample, 1295 struct machine *machine, struct perf_evsel *evsel,
1241 struct perf_session *session, 1296 int print_sym, int print_dso)
1242 int print_sym, int print_dso)
1243{ 1297{
1244 struct addr_location al; 1298 struct addr_location al;
1245 const char *symname, *dsoname; 1299 const char *symname, *dsoname;
1246 struct callchain_cursor *cursor = &session->callchain_cursor; 1300 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
1247 struct callchain_cursor_node *node; 1301 struct callchain_cursor_node *node;
1248 1302
1249 if (perf_event__preprocess_sample(event, session, &al, sample, 1303 if (perf_event__preprocess_sample(event, machine, &al, sample,
1250 NULL) < 0) { 1304 NULL) < 0) {
1251 error("problem processing %d event, skipping it.\n", 1305 error("problem processing %d event, skipping it.\n",
1252 event->header.type); 1306 event->header.type);
@@ -1255,7 +1309,7 @@ void perf_session__print_ip(union perf_event *event,
1255 1309
1256 if (symbol_conf.use_callchain && sample->callchain) { 1310 if (symbol_conf.use_callchain && sample->callchain) {
1257 1311
1258 if (perf_session__resolve_callchain(session, al.thread, 1312 if (machine__resolve_callchain(machine, evsel, al.thread,
1259 sample->callchain, NULL) != 0) { 1313 sample->callchain, NULL) != 0) {
1260 if (verbose) 1314 if (verbose)
1261 error("Failed to resolve callchain. Skipping\n"); 1315 error("Failed to resolve callchain. Skipping\n");
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 6e393c98eb34..37bc38381fb6 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -30,9 +30,6 @@ struct perf_session {
30 struct perf_header header; 30 struct perf_header header;
31 unsigned long size; 31 unsigned long size;
32 unsigned long mmap_window; 32 unsigned long mmap_window;
33 struct rb_root threads;
34 struct list_head dead_threads;
35 struct thread *last_match;
36 struct machine host_machine; 33 struct machine host_machine;
37 struct rb_root machines; 34 struct rb_root machines;
38 struct perf_evlist *evlist; 35 struct perf_evlist *evlist;
@@ -53,65 +50,31 @@ struct perf_session {
53 int cwdlen; 50 int cwdlen;
54 char *cwd; 51 char *cwd;
55 struct ordered_samples ordered_samples; 52 struct ordered_samples ordered_samples;
56 struct callchain_cursor callchain_cursor; 53 char filename[1];
57 char filename[0];
58}; 54};
59 55
60struct perf_evsel; 56struct perf_tool;
61struct perf_event_ops;
62
63typedef int (*event_sample)(union perf_event *event, struct perf_sample *sample,
64 struct perf_evsel *evsel, struct perf_session *session);
65typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
66 struct perf_session *session);
67typedef int (*event_synth_op)(union perf_event *self,
68 struct perf_session *session);
69typedef int (*event_op2)(union perf_event *self, struct perf_session *session,
70 struct perf_event_ops *ops);
71
72struct perf_event_ops {
73 event_sample sample;
74 event_op mmap,
75 comm,
76 fork,
77 exit,
78 lost,
79 read,
80 throttle,
81 unthrottle;
82 event_synth_op attr,
83 event_type,
84 tracing_data,
85 build_id;
86 event_op2 finished_round;
87 bool ordered_samples;
88 bool ordering_requires_timestamps;
89};
90 57
91struct perf_session *perf_session__new(const char *filename, int mode, 58struct perf_session *perf_session__new(const char *filename, int mode,
92 bool force, bool repipe, 59 bool force, bool repipe,
93 struct perf_event_ops *ops); 60 struct perf_tool *tool);
94void perf_session__delete(struct perf_session *self); 61void perf_session__delete(struct perf_session *self);
95 62
96void perf_event_header__bswap(struct perf_event_header *self); 63void perf_event_header__bswap(struct perf_event_header *self);
97 64
98int __perf_session__process_events(struct perf_session *self, 65int __perf_session__process_events(struct perf_session *self,
99 u64 data_offset, u64 data_size, u64 size, 66 u64 data_offset, u64 data_size, u64 size,
100 struct perf_event_ops *ops); 67 struct perf_tool *tool);
101int perf_session__process_events(struct perf_session *self, 68int perf_session__process_events(struct perf_session *self,
102 struct perf_event_ops *event_ops); 69 struct perf_tool *tool);
103 70
104int perf_session__resolve_callchain(struct perf_session *self, 71int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel,
105 struct thread *thread, 72 struct thread *thread,
106 struct ip_callchain *chain, 73 struct ip_callchain *chain,
107 struct symbol **parent); 74 struct symbol **parent);
108 75
109bool perf_session__has_traces(struct perf_session *self, const char *msg); 76bool perf_session__has_traces(struct perf_session *self, const char *msg);
110 77
111int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
112 const char *symbol_name,
113 u64 addr);
114
115void mem_bswap_64(void *src, int byte_size); 78void mem_bswap_64(void *src, int byte_size);
116void perf_event__attr_swap(struct perf_event_attr *attr); 79void perf_event__attr_swap(struct perf_event_attr *attr);
117 80
@@ -144,12 +107,16 @@ struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t p
144 107
145static inline 108static inline
146void perf_session__process_machines(struct perf_session *self, 109void perf_session__process_machines(struct perf_session *self,
110 struct perf_tool *tool,
147 machine__process_t process) 111 machine__process_t process)
148{ 112{
149 process(&self->host_machine, self); 113 process(&self->host_machine, tool);
150 return machines__process(&self->machines, process, self); 114 return machines__process(&self->machines, process, tool);
151} 115}
152 116
117struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
118size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
119
153size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); 120size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
154 121
155size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, 122size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
@@ -167,13 +134,20 @@ static inline int perf_session__parse_sample(struct perf_session *session,
167 session->header.needs_swap); 134 session->header.needs_swap);
168} 135}
169 136
137static inline int perf_session__synthesize_sample(struct perf_session *session,
138 union perf_event *event,
139 const struct perf_sample *sample)
140{
141 return perf_event__synthesize_sample(event, session->sample_type,
142 sample, session->header.needs_swap);
143}
144
170struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 145struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
171 unsigned int type); 146 unsigned int type);
172 147
173void perf_session__print_ip(union perf_event *event, 148void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
174 struct perf_sample *sample, 149 struct machine *machine, struct perf_evsel *evsel,
175 struct perf_session *session, 150 int print_sym, int print_dso);
176 int print_sym, int print_dso);
177 151
178int perf_session__cpu_bitmap(struct perf_session *session, 152int perf_session__cpu_bitmap(struct perf_session *session,
179 const char *cpu_list, unsigned long *cpu_bitmap); 153 const char *cpu_list, unsigned long *cpu_bitmap);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 95d370074928..36d4c5619575 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -27,7 +27,8 @@ build_tmp = getenv('PYTHON_EXTBUILD_TMP')
27perf = Extension('perf', 27perf = Extension('perf',
28 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', 28 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
29 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', 29 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
30 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'], 30 'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
31 'util/debugfs.c'],
31 include_dirs = ['util/include'], 32 include_dirs = ['util/include'],
32 extra_compile_args = cflags, 33 extra_compile_args = cflags,
33 ) 34 )
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 632b50c7bc26..215d50f2042e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1757,7 +1757,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
1757 struct stat st; 1757 struct stat st;
1758 1758
1759 /*sshfs might return bad dent->d_type, so we have to stat*/ 1759 /*sshfs might return bad dent->d_type, so we have to stat*/
1760 sprintf(path, "%s/%s", dir_name, dent->d_name); 1760 snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
1761 if (stat(path, &st)) 1761 if (stat(path, &st))
1762 continue; 1762 continue;
1763 1763
@@ -1766,8 +1766,6 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
1766 !strcmp(dent->d_name, "..")) 1766 !strcmp(dent->d_name, ".."))
1767 continue; 1767 continue;
1768 1768
1769 snprintf(path, sizeof(path), "%s/%s",
1770 dir_name, dent->d_name);
1771 ret = map_groups__set_modules_path_dir(mg, path); 1769 ret = map_groups__set_modules_path_dir(mg, path);
1772 if (ret < 0) 1770 if (ret < 0)
1773 goto out; 1771 goto out;
@@ -1788,9 +1786,6 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
1788 if (map == NULL) 1786 if (map == NULL)
1789 continue; 1787 continue;
1790 1788
1791 snprintf(path, sizeof(path), "%s/%s",
1792 dir_name, dent->d_name);
1793
1794 long_name = strdup(path); 1789 long_name = strdup(path);
1795 if (long_name == NULL) { 1790 if (long_name == NULL) {
1796 ret = -1; 1791 ret = -1;
@@ -2609,10 +2604,10 @@ int symbol__init(void)
2609 symbol_conf.initialized = true; 2604 symbol_conf.initialized = true;
2610 return 0; 2605 return 0;
2611 2606
2612out_free_dso_list:
2613 strlist__delete(symbol_conf.dso_list);
2614out_free_comm_list: 2607out_free_comm_list:
2615 strlist__delete(symbol_conf.comm_list); 2608 strlist__delete(symbol_conf.comm_list);
2609out_free_dso_list:
2610 strlist__delete(symbol_conf.dso_list);
2616 return -1; 2611 return -1;
2617} 2612}
2618 2613
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 29f8d742e92f..123c2e14353e 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -68,6 +68,7 @@ struct strlist;
68 68
69struct symbol_conf { 69struct symbol_conf {
70 unsigned short priv_size; 70 unsigned short priv_size;
71 unsigned short nr_events;
71 bool try_vmlinux_path, 72 bool try_vmlinux_path,
72 use_modules, 73 use_modules,
73 sort_by_name, 74 sort_by_name,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index d5d3b22250f3..fb4b7ea6752f 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -61,7 +61,7 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
61 map_groups__fprintf(&self->mg, verbose, fp); 61 map_groups__fprintf(&self->mg, verbose, fp);
62} 62}
63 63
64struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) 64struct thread *machine__findnew_thread(struct machine *self, pid_t pid)
65{ 65{
66 struct rb_node **p = &self->threads.rb_node; 66 struct rb_node **p = &self->threads.rb_node;
67 struct rb_node *parent = NULL; 67 struct rb_node *parent = NULL;
@@ -125,12 +125,12 @@ int thread__fork(struct thread *self, struct thread *parent)
125 return 0; 125 return 0;
126} 126}
127 127
128size_t perf_session__fprintf(struct perf_session *self, FILE *fp) 128size_t machine__fprintf(struct machine *machine, FILE *fp)
129{ 129{
130 size_t ret = 0; 130 size_t ret = 0;
131 struct rb_node *nd; 131 struct rb_node *nd;
132 132
133 for (nd = rb_first(&self->threads); nd; nd = rb_next(nd)) { 133 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
134 struct thread *pos = rb_entry(nd, struct thread, rb_node); 134 struct thread *pos = rb_entry(nd, struct thread, rb_node);
135 135
136 ret += thread__fprintf(pos, fp); 136 ret += thread__fprintf(pos, fp);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e5f2401c1b5e..70c2c13ff679 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,16 +18,14 @@ struct thread {
18 int comm_len; 18 int comm_len;
19}; 19};
20 20
21struct perf_session; 21struct machine;
22 22
23void thread__delete(struct thread *self); 23void thread__delete(struct thread *self);
24 24
25int thread__set_comm(struct thread *self, const char *comm); 25int thread__set_comm(struct thread *self, const char *comm);
26int thread__comm_len(struct thread *self); 26int thread__comm_len(struct thread *self);
27struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
28void thread__insert_map(struct thread *self, struct map *map); 27void thread__insert_map(struct thread *self, struct map *map);
29int thread__fork(struct thread *self, struct thread *parent); 28int thread__fork(struct thread *self, struct thread *parent);
30size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
31 29
32static inline struct map *thread__find_map(struct thread *self, 30static inline struct map *thread__find_map(struct thread *self,
33 enum map_type type, u64 addr) 31 enum map_type type, u64 addr)
@@ -35,14 +33,12 @@ static inline struct map *thread__find_map(struct thread *self,
35 return self ? map_groups__find(&self->mg, type, addr) : NULL; 33 return self ? map_groups__find(&self->mg, type, addr) : NULL;
36} 34}
37 35
38void thread__find_addr_map(struct thread *self, 36void thread__find_addr_map(struct thread *thread, struct machine *machine,
39 struct perf_session *session, u8 cpumode, 37 u8 cpumode, enum map_type type, u64 addr,
40 enum map_type type, pid_t pid, u64 addr,
41 struct addr_location *al); 38 struct addr_location *al);
42 39
43void thread__find_addr_location(struct thread *self, 40void thread__find_addr_location(struct thread *thread, struct machine *machine,
44 struct perf_session *session, u8 cpumode, 41 u8 cpumode, enum map_type type, u64 addr,
45 enum map_type type, pid_t pid, u64 addr,
46 struct addr_location *al, 42 struct addr_location *al,
47 symbol_filter_t filter); 43 symbol_filter_t filter);
48#endif /* __PERF_THREAD_H */ 44#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
new file mode 100644
index 000000000000..b0e1aadba8d5
--- /dev/null
+++ b/tools/perf/util/tool.h
@@ -0,0 +1,50 @@
1#ifndef __PERF_TOOL_H
2#define __PERF_TOOL_H
3
4#include <stdbool.h>
5
6struct perf_session;
7union perf_event;
8struct perf_evlist;
9struct perf_evsel;
10struct perf_sample;
11struct perf_tool;
12struct machine;
13
14typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
15 struct perf_sample *sample,
16 struct perf_evsel *evsel, struct machine *machine);
17
18typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
19 struct perf_sample *sample, struct machine *machine);
20
21typedef int (*event_attr_op)(union perf_event *event,
22 struct perf_evlist **pevlist);
23typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event);
24
25typedef int (*event_synth_op)(union perf_event *event,
26 struct perf_session *session);
27
28typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
29 struct perf_session *session);
30
31struct perf_tool {
32 event_sample sample,
33 read;
34 event_op mmap,
35 comm,
36 fork,
37 exit,
38 lost,
39 throttle,
40 unthrottle;
41 event_attr_op attr;
42 event_synth_op tracing_data;
43 event_simple_op event_type;
44 event_op2 finished_round,
45 build_id;
46 bool ordered_samples;
47 bool ordering_requires_timestamps;
48};
49
50#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 399650967958..a248f3c2c60d 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -1,15 +1,17 @@
1#ifndef __PERF_TOP_H 1#ifndef __PERF_TOP_H
2#define __PERF_TOP_H 1 2#define __PERF_TOP_H 1
3 3
4#include "tool.h"
4#include "types.h" 5#include "types.h"
5#include "../perf.h"
6#include <stddef.h> 6#include <stddef.h>
7#include <stdbool.h>
7 8
8struct perf_evlist; 9struct perf_evlist;
9struct perf_evsel; 10struct perf_evsel;
10struct perf_session; 11struct perf_session;
11 12
12struct perf_top { 13struct perf_top {
14 struct perf_tool tool;
13 struct perf_evlist *evlist; 15 struct perf_evlist *evlist;
14 /* 16 /*
15 * Symbols will be added here in perf_event__process_sample and will 17 * Symbols will be added here in perf_event__process_sample and will
@@ -23,10 +25,26 @@ struct perf_top {
23 int freq; 25 int freq;
24 pid_t target_pid, target_tid; 26 pid_t target_pid, target_tid;
25 bool hide_kernel_symbols, hide_user_symbols, zero; 27 bool hide_kernel_symbols, hide_user_symbols, zero;
28 bool system_wide;
29 bool use_tui, use_stdio;
30 bool sort_has_symbols;
31 bool dont_use_callchains;
32 bool kptr_restrict_warned;
33 bool vmlinux_warned;
34 bool inherit;
35 bool group;
36 bool sample_id_all_avail;
37 bool dump_symtab;
26 const char *cpu_list; 38 const char *cpu_list;
27 struct hist_entry *sym_filter_entry; 39 struct hist_entry *sym_filter_entry;
28 struct perf_evsel *sym_evsel; 40 struct perf_evsel *sym_evsel;
29 struct perf_session *session; 41 struct perf_session *session;
42 struct winsize winsize;
43 unsigned int mmap_pages;
44 int default_interval;
45 int realtime_prio;
46 int sym_pcnt_filter;
47 const char *sym_filter;
30}; 48};
31 49
32size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); 50size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index d2655f08bcc0..ac6830d8292b 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -18,7 +18,8 @@
18 * 18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 20 */
21#define _GNU_SOURCE 21#include <ctype.h>
22#include "util.h"
22#include <dirent.h> 23#include <dirent.h>
23#include <mntent.h> 24#include <mntent.h>
24#include <stdio.h> 25#include <stdio.h>
@@ -31,7 +32,6 @@
31#include <pthread.h> 32#include <pthread.h>
32#include <fcntl.h> 33#include <fcntl.h>
33#include <unistd.h> 34#include <unistd.h>
34#include <ctype.h>
35#include <errno.h> 35#include <errno.h>
36#include <stdbool.h> 36#include <stdbool.h>
37#include <linux/list.h> 37#include <linux/list.h>
@@ -44,10 +44,6 @@
44 44
45#define VERSION "0.5" 45#define VERSION "0.5"
46 46
47#define _STR(x) #x
48#define STR(x) _STR(x)
49#define MAX_PATH 256
50
51#define TRACE_CTRL "tracing_on" 47#define TRACE_CTRL "tracing_on"
52#define TRACE "trace" 48#define TRACE "trace"
53#define AVAILABLE "available_tracers" 49#define AVAILABLE "available_tracers"
@@ -73,26 +69,6 @@ struct events {
73}; 69};
74 70
75 71
76
77static void die(const char *fmt, ...)
78{
79 va_list ap;
80 int ret = errno;
81
82 if (errno)
83 perror("perf");
84 else
85 ret = -1;
86
87 va_start(ap, fmt);
88 fprintf(stderr, " ");
89 vfprintf(stderr, fmt, ap);
90 va_end(ap);
91
92 fprintf(stderr, "\n");
93 exit(ret);
94}
95
96void *malloc_or_die(unsigned int size) 72void *malloc_or_die(unsigned int size)
97{ 73{
98 void *data; 74 void *data;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index c9dcbec7d800..a3fdf55f317b 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -39,7 +39,7 @@ static int stop_script_unsupported(void)
39static void process_event_unsupported(union perf_event *event __unused, 39static void process_event_unsupported(union perf_event *event __unused,
40 struct perf_sample *sample __unused, 40 struct perf_sample *sample __unused,
41 struct perf_evsel *evsel __unused, 41 struct perf_evsel *evsel __unused,
42 struct perf_session *session __unused, 42 struct machine *machine __unused,
43 struct thread *thread __unused) 43 struct thread *thread __unused)
44{ 44{
45} 45}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index a84100817649..58ae14c5baac 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -3,7 +3,11 @@
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include "parse-events.h" 5#include "parse-events.h"
6#include "session.h" 6
7struct machine;
8struct perf_sample;
9union perf_event;
10struct thread;
7 11
8#define __unused __attribute__((unused)) 12#define __unused __attribute__((unused))
9 13
@@ -292,7 +296,7 @@ struct scripting_ops {
292 void (*process_event) (union perf_event *event, 296 void (*process_event) (union perf_event *event,
293 struct perf_sample *sample, 297 struct perf_sample *sample,
294 struct perf_evsel *evsel, 298 struct perf_evsel *evsel,
295 struct perf_session *session, 299 struct machine *machine,
296 struct thread *thread); 300 struct thread *thread);
297 int (*generate_script) (const char *outfile); 301 int (*generate_script) (const char *outfile);
298}; 302};
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 0575905d1205..295a9c93f945 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -224,7 +224,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
224} 224}
225 225
226static int annotate_browser__run(struct annotate_browser *self, int evidx, 226static int annotate_browser__run(struct annotate_browser *self, int evidx,
227 int nr_events, void(*timer)(void *arg), 227 void(*timer)(void *arg),
228 void *arg, int delay_secs) 228 void *arg, int delay_secs)
229{ 229{
230 struct rb_node *nd = NULL; 230 struct rb_node *nd = NULL;
@@ -328,8 +328,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
328 notes = symbol__annotation(target); 328 notes = symbol__annotation(target);
329 pthread_mutex_lock(&notes->lock); 329 pthread_mutex_lock(&notes->lock);
330 330
331 if (notes->src == NULL && 331 if (notes->src == NULL && symbol__alloc_hist(target) < 0) {
332 symbol__alloc_hist(target, nr_events) < 0) {
333 pthread_mutex_unlock(&notes->lock); 332 pthread_mutex_unlock(&notes->lock);
334 ui__warning("Not enough memory for annotating '%s' symbol!\n", 333 ui__warning("Not enough memory for annotating '%s' symbol!\n",
335 target->name); 334 target->name);
@@ -337,7 +336,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
337 } 336 }
338 337
339 pthread_mutex_unlock(&notes->lock); 338 pthread_mutex_unlock(&notes->lock);
340 symbol__tui_annotate(target, ms->map, evidx, nr_events, 339 symbol__tui_annotate(target, ms->map, evidx,
341 timer, arg, delay_secs); 340 timer, arg, delay_secs);
342 } 341 }
343 continue; 342 continue;
@@ -358,15 +357,15 @@ out:
358 return key; 357 return key;
359} 358}
360 359
361int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events, 360int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
362 void(*timer)(void *arg), void *arg, int delay_secs) 361 void(*timer)(void *arg), void *arg, int delay_secs)
363{ 362{
364 return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, nr_events, 363 return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx,
365 timer, arg, delay_secs); 364 timer, arg, delay_secs);
366} 365}
367 366
368int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, 367int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
369 int nr_events, void(*timer)(void *arg), void *arg, 368 void(*timer)(void *arg), void *arg,
370 int delay_secs) 369 int delay_secs)
371{ 370{
372 struct objdump_line *pos, *n; 371 struct objdump_line *pos, *n;
@@ -419,8 +418,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
419 browser.b.nr_entries = browser.nr_entries; 418 browser.b.nr_entries = browser.nr_entries;
420 browser.b.entries = &notes->src->source, 419 browser.b.entries = &notes->src->source,
421 browser.b.width += 18; /* Percentage */ 420 browser.b.width += 18; /* Percentage */
422 ret = annotate_browser__run(&browser, evidx, nr_events, 421 ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs);
423 timer, arg, delay_secs);
424 list_for_each_entry_safe(pos, n, &notes->src->source, node) { 422 list_for_each_entry_safe(pos, n, &notes->src->source, node) {
425 list_del(&pos->node); 423 list_del(&pos->node);
426 objdump_line__free(pos); 424 objdump_line__free(pos);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index d0c94b459685..1212a386a033 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -1020,7 +1020,7 @@ do_annotate:
1020 * Don't let this be freed, say, by hists__decay_entry. 1020 * Don't let this be freed, say, by hists__decay_entry.
1021 */ 1021 */
1022 he->used = true; 1022 he->used = true;
1023 err = hist_entry__tui_annotate(he, evsel->idx, nr_events, 1023 err = hist_entry__tui_annotate(he, evsel->idx,
1024 timer, arg, delay_secs); 1024 timer, arg, delay_secs);
1025 he->used = false; 1025 he->used = false;
1026 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); 1026 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
diff --git a/tools/perf/util/ui/progress.c b/tools/perf/util/ui/progress.c
index 295e366b6311..13aa64e50e11 100644
--- a/tools/perf/util/ui/progress.c
+++ b/tools/perf/util/ui/progress.c
@@ -14,6 +14,9 @@ void ui_progress__update(u64 curr, u64 total, const char *title)
14 if (use_browser <= 0) 14 if (use_browser <= 0)
15 return; 15 return;
16 16
17 if (total == 0)
18 return;
19
17 ui__refresh_dimensions(true); 20 ui__refresh_dimensions(true);
18 pthread_mutex_lock(&ui__lock); 21 pthread_mutex_lock(&ui__lock);
19 y = SLtt_Screen_Rows / 2 - 2; 22 y = SLtt_Screen_Rows / 2 - 2;
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index e16bf9a707e8..d76d1c0ff98f 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -1,5 +1,8 @@
1/* 1/*
2 * GIT - The information manager from hell 2 * usage.c
3 *
4 * Various reporting routines.
5 * Originally copied from GIT source.
3 * 6 *
4 * Copyright (C) Linus Torvalds, 2005 7 * Copyright (C) Linus Torvalds, 2005
5 */ 8 */
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 0128906bac88..37be34dff798 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -245,4 +245,15 @@ int readn(int fd, void *buf, size_t size);
245#define _STR(x) #x 245#define _STR(x) #x
246#define STR(x) _STR(x) 246#define STR(x) _STR(x)
247 247
248/*
249 * Determine whether some value is a power of two, where zero is
250 * *not* considered a power of two.
251 */
252
253static inline __attribute__((const))
254bool is_power_of_2(unsigned long n)
255{
256 return (n != 0 && ((n & (n - 1)) == 0));
257}
258
248#endif 259#endif
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index bdd33470b235..697c8b4e59cc 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -32,6 +32,7 @@ void perf_read_values_destroy(struct perf_read_values *values)
32 32
33 for (i = 0; i < values->threads; i++) 33 for (i = 0; i < values->threads; i++)
34 free(values->value[i]); 34 free(values->value[i]);
35 free(values->value);
35 free(values->pid); 36 free(values->pid);
36 free(values->tid); 37 free(values->tid);
37 free(values->counterrawid); 38 free(values->counterrawid);
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 3ad0925d23a9..758e3b36d4cf 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -17,6 +17,8 @@
17#include <linux/pci.h> 17#include <linux/pci.h>
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/namei.h>
21#include <linux/fs.h>
20#include "irq.h" 22#include "irq.h"
21 23
22static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, 24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
@@ -480,12 +482,76 @@ out:
480 return r; 482 return r;
481} 483}
482 484
485/*
486 * We want to test whether the caller has been granted permissions to
487 * use this device. To be able to configure and control the device,
488 * the user needs access to PCI configuration space and BAR resources.
489 * These are accessed through PCI sysfs. PCI config space is often
490 * passed to the process calling this ioctl via file descriptor, so we
491 * can't rely on access to that file. We can check for permissions
492 * on each of the BAR resource files, which is a pretty clear
493 * indicator that the user has been granted access to the device.
494 */
495static int probe_sysfs_permissions(struct pci_dev *dev)
496{
497#ifdef CONFIG_SYSFS
498 int i;
499 bool bar_found = false;
500
501 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
502 char *kpath, *syspath;
503 struct path path;
504 struct inode *inode;
505 int r;
506
507 if (!pci_resource_len(dev, i))
508 continue;
509
510 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
511 if (!kpath)
512 return -ENOMEM;
513
514 /* Per sysfs-rules, sysfs is always at /sys */
515 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
516 kfree(kpath);
517 if (!syspath)
518 return -ENOMEM;
519
520 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
521 kfree(syspath);
522 if (r)
523 return r;
524
525 inode = path.dentry->d_inode;
526
527 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
528 path_put(&path);
529 if (r)
530 return r;
531
532 bar_found = true;
533 }
534
535 /* If no resources, probably something special */
536 if (!bar_found)
537 return -EPERM;
538
539 return 0;
540#else
541 return -EINVAL; /* No way to control the device without sysfs */
542#endif
543}
544
483static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 545static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
484 struct kvm_assigned_pci_dev *assigned_dev) 546 struct kvm_assigned_pci_dev *assigned_dev)
485{ 547{
486 int r = 0, idx; 548 int r = 0, idx;
487 struct kvm_assigned_dev_kernel *match; 549 struct kvm_assigned_dev_kernel *match;
488 struct pci_dev *dev; 550 struct pci_dev *dev;
551 u8 header_type;
552
553 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
554 return -EINVAL;
489 555
490 mutex_lock(&kvm->lock); 556 mutex_lock(&kvm->lock);
491 idx = srcu_read_lock(&kvm->srcu); 557 idx = srcu_read_lock(&kvm->srcu);
@@ -513,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
513 r = -EINVAL; 579 r = -EINVAL;
514 goto out_free; 580 goto out_free;
515 } 581 }
582
583 /* Don't allow bridges to be assigned */
584 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
585 if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
586 r = -EPERM;
587 goto out_put;
588 }
589
590 r = probe_sysfs_permissions(dev);
591 if (r)
592 goto out_put;
593
516 if (pci_enable_device(dev)) { 594 if (pci_enable_device(dev)) {
517 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); 595 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
518 r = -EBUSY; 596 r = -EBUSY;
@@ -544,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
544 622
545 list_add(&match->list, &kvm->arch.assigned_dev_head); 623 list_add(&match->list, &kvm->arch.assigned_dev_head);
546 624
547 if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { 625 if (!kvm->arch.iommu_domain) {
548 if (!kvm->arch.iommu_domain) { 626 r = kvm_iommu_map_guest(kvm);
549 r = kvm_iommu_map_guest(kvm);
550 if (r)
551 goto out_list_del;
552 }
553 r = kvm_assign_device(kvm, match);
554 if (r) 627 if (r)
555 goto out_list_del; 628 goto out_list_del;
556 } 629 }
630 r = kvm_assign_device(kvm, match);
631 if (r)
632 goto out_list_del;
557 633
558out: 634out:
559 srcu_read_unlock(&kvm->srcu, idx); 635 srcu_read_unlock(&kvm->srcu, idx);
@@ -593,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
593 goto out; 669 goto out;
594 } 670 }
595 671
596 if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) 672 kvm_deassign_device(kvm, match);
597 kvm_deassign_device(kvm, match);
598 673
599 kvm_free_assigned_device(kvm, match); 674 kvm_free_assigned_device(kvm, match);
600 675